diff options
Diffstat (limited to 'llvm/test')
70 files changed, 30514 insertions, 7252 deletions
diff --git a/llvm/test/Analysis/CostModel/AArch64/masked_ldst_vls.ll b/llvm/test/Analysis/CostModel/AArch64/masked_ldst_vls.ll index fa53a18..1920fc9 100644 --- a/llvm/test/Analysis/CostModel/AArch64/masked_ldst_vls.ll +++ b/llvm/test/Analysis/CostModel/AArch64/masked_ldst_vls.ll @@ -1,17 +1,6 @@  ; RUN: opt < %s -passes="print<cost-model>" 2>&1 -disable-output -aarch64-sve-vector-bits-min=256 | FileCheck %s -D#VBITS=256 -; RUN: opt < %s -passes="print<cost-model>" 2>&1 -disable-output -aarch64-sve-vector-bits-min=384 | FileCheck %s -D#VBITS=256  ; RUN: opt < %s -passes="print<cost-model>" 2>&1 -disable-output -aarch64-sve-vector-bits-min=512 | FileCheck %s -D#VBITS=512 -; RUN: opt < %s -passes="print<cost-model>" 2>&1 -disable-output -aarch64-sve-vector-bits-min=640 | FileCheck %s -D#VBITS=512 -; RUN: opt < %s -passes="print<cost-model>" 2>&1 -disable-output -aarch64-sve-vector-bits-min=768 | FileCheck %s -D#VBITS=512 -; RUN: opt < %s -passes="print<cost-model>" 2>&1 -disable-output -aarch64-sve-vector-bits-min=896 | FileCheck %s -D#VBITS=512  ; RUN: opt < %s -passes="print<cost-model>" 2>&1 -disable-output -aarch64-sve-vector-bits-min=1024 | FileCheck %s -D#VBITS=1024 -; RUN: opt < %s -passes="print<cost-model>" 2>&1 -disable-output -aarch64-sve-vector-bits-min=1152 | FileCheck %s -D#VBITS=1024 -; RUN: opt < %s -passes="print<cost-model>" 2>&1 -disable-output -aarch64-sve-vector-bits-min=1280 | FileCheck %s -D#VBITS=1024 -; RUN: opt < %s -passes="print<cost-model>" 2>&1 -disable-output -aarch64-sve-vector-bits-min=1408 | FileCheck %s -D#VBITS=1024 -; RUN: opt < %s -passes="print<cost-model>" 2>&1 -disable-output -aarch64-sve-vector-bits-min=1536 | FileCheck %s -D#VBITS=1024 -; RUN: opt < %s -passes="print<cost-model>" 2>&1 -disable-output -aarch64-sve-vector-bits-min=1664 | FileCheck %s -D#VBITS=1024 -; RUN: opt < %s -passes="print<cost-model>" 2>&1 -disable-output -aarch64-sve-vector-bits-min=1792 | FileCheck %s -D#VBITS=1024 -; RUN: opt < %s -passes="print<cost-model>" 2>&1 -disable-output -aarch64-sve-vector-bits-min=1920 | FileCheck %s -D#VBITS=1024  ; RUN: opt < %s -passes="print<cost-model>" 2>&1 -disable-output -aarch64-sve-vector-bits-min=2048 | FileCheck %s -D#VBITS=2048  target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128" diff --git a/llvm/test/Analysis/CostModel/AArch64/sve-fixed-length.ll b/llvm/test/Analysis/CostModel/AArch64/sve-fixed-length.ll index df40a96..e128987 100644 --- a/llvm/test/Analysis/CostModel/AArch64/sve-fixed-length.ll +++ b/llvm/test/Analysis/CostModel/AArch64/sve-fixed-length.ll @@ -1,19 +1,8 @@  ; RUN: opt < %s -passes="print<cost-model>" 2>&1 -disable-output | FileCheck %s -D#VBITS=128  ; RUN: opt < %s -passes="print<cost-model>" 2>&1 -disable-output -aarch64-sve-vector-bits-min=128 | FileCheck %s -D#VBITS=128  ; RUN: opt < %s -passes="print<cost-model>" 2>&1 -disable-output -aarch64-sve-vector-bits-min=256 | FileCheck %s -D#VBITS=256 -; RUN: opt < %s -passes="print<cost-model>" 2>&1 -disable-output -aarch64-sve-vector-bits-min=384 | FileCheck %s -D#VBITS=256  ; RUN: opt < %s -passes="print<cost-model>" 2>&1 -disable-output -aarch64-sve-vector-bits-min=512 | FileCheck %s -D#VBITS=512 -; RUN: opt < %s -passes="print<cost-model>" 2>&1 -disable-output -aarch64-sve-vector-bits-min=640 | FileCheck %s -D#VBITS=512 -; RUN: opt < %s -passes="print<cost-model>" 2>&1 -disable-output -aarch64-sve-vector-bits-min=768 | FileCheck %s -D#VBITS=512 -; RUN: opt < %s -passes="print<cost-model>" 2>&1 -disable-output -aarch64-sve-vector-bits-min=896 | FileCheck %s -D#VBITS=512  ; RUN: opt < %s -passes="print<cost-model>" 2>&1 -disable-output -aarch64-sve-vector-bits-min=1024 | FileCheck %s -D#VBITS=1024 -; RUN: opt < %s -passes="print<cost-model>" 2>&1 -disable-output -aarch64-sve-vector-bits-min=1152 | FileCheck %s -D#VBITS=1024 -; RUN: opt < %s -passes="print<cost-model>" 2>&1 -disable-output -aarch64-sve-vector-bits-min=1280 | FileCheck %s -D#VBITS=1024 -; RUN: opt < %s -passes="print<cost-model>" 2>&1 -disable-output -aarch64-sve-vector-bits-min=1408 | FileCheck %s -D#VBITS=1024 -; RUN: opt < %s -passes="print<cost-model>" 2>&1 -disable-output -aarch64-sve-vector-bits-min=1536 | FileCheck %s -D#VBITS=1024 -; RUN: opt < %s -passes="print<cost-model>" 2>&1 -disable-output -aarch64-sve-vector-bits-min=1664 | FileCheck %s -D#VBITS=1024 -; RUN: opt < %s -passes="print<cost-model>" 2>&1 -disable-output -aarch64-sve-vector-bits-min=1792 | FileCheck %s -D#VBITS=1024 -; RUN: opt < %s -passes="print<cost-model>" 2>&1 -disable-output -aarch64-sve-vector-bits-min=1920 | FileCheck %s -D#VBITS=1024  ; RUN: opt < %s -passes="print<cost-model>" 2>&1 -disable-output -aarch64-sve-vector-bits-min=2048 | FileCheck %s -D#VBITS=2048  ; VBITS represents the useful bit size of a vector register from the code diff --git a/llvm/test/CodeGen/AArch64/aarch64-matrix-umull-smull.ll b/llvm/test/CodeGen/AArch64/aarch64-matrix-umull-smull.ll index b54f262..4894932 100644 --- a/llvm/test/CodeGen/AArch64/aarch64-matrix-umull-smull.ll +++ b/llvm/test/CodeGen/AArch64/aarch64-matrix-umull-smull.ll @@ -755,199 +755,117 @@ define i64 @red_mla_dup_ext_u8_s8_s64(ptr noalias noundef readonly captures(none  ; CHECK-SD-NEXT:    // kill: def $w1 killed $w1 def $x1  ; CHECK-SD-NEXT:    cbz w2, .LBB6_3  ; CHECK-SD-NEXT:  // %bb.1: // %iter.check -; CHECK-SD-NEXT:    str x25, [sp, #-64]! // 8-byte Folded Spill -; CHECK-SD-NEXT:    stp x24, x23, [sp, #16] // 16-byte Folded Spill -; CHECK-SD-NEXT:    stp x22, x21, [sp, #32] // 16-byte Folded Spill -; CHECK-SD-NEXT:    stp x20, x19, [sp, #48] // 16-byte Folded Spill -; CHECK-SD-NEXT:    .cfi_def_cfa_offset 64 -; CHECK-SD-NEXT:    .cfi_offset w19, -8 -; CHECK-SD-NEXT:    .cfi_offset w20, -16 -; CHECK-SD-NEXT:    .cfi_offset w21, -24 -; CHECK-SD-NEXT:    .cfi_offset w22, -32 -; CHECK-SD-NEXT:    .cfi_offset w23, -40 -; CHECK-SD-NEXT:    .cfi_offset w24, -48 -; CHECK-SD-NEXT:    .cfi_offset w25, -64 -; CHECK-SD-NEXT:    sxtb x9, w1  ; CHECK-SD-NEXT:    cmp w2, #3 -; CHECK-SD-NEXT:    mov w10, w2 +; CHECK-SD-NEXT:    mov w9, w2  ; CHECK-SD-NEXT:    b.hi .LBB6_4  ; CHECK-SD-NEXT:  // %bb.2: -; CHECK-SD-NEXT:    mov x11, xzr +; CHECK-SD-NEXT:    mov x10, xzr  ; CHECK-SD-NEXT:    mov x8, xzr  ; CHECK-SD-NEXT:    b .LBB6_13  ; CHECK-SD-NEXT:  .LBB6_3: -; CHECK-SD-NEXT:    mov x0, xzr +; CHECK-SD-NEXT:    mov x8, xzr +; CHECK-SD-NEXT:    mov x0, x8  ; CHECK-SD-NEXT:    ret  ; CHECK-SD-NEXT:  .LBB6_4: // %vector.main.loop.iter.check -; CHECK-SD-NEXT:    dup v0.2d, x9  ; CHECK-SD-NEXT:    cmp w2, #16  ; CHECK-SD-NEXT:    b.hs .LBB6_6  ; CHECK-SD-NEXT:  // %bb.5: -; CHECK-SD-NEXT:    mov x11, xzr +; CHECK-SD-NEXT:    mov x10, xzr  ; CHECK-SD-NEXT:    mov x8, xzr  ; CHECK-SD-NEXT:    b .LBB6_10  ; CHECK-SD-NEXT:  .LBB6_6: // %vector.ph +; CHECK-SD-NEXT:    mov w8, w1 +; CHECK-SD-NEXT:    movi v0.2d, #0000000000000000  ; CHECK-SD-NEXT:    movi v1.2d, #0000000000000000 -; CHECK-SD-NEXT:    mov x8, v0.d[1] -; CHECK-SD-NEXT:    and x12, x10, #0xc +; CHECK-SD-NEXT:    sxtb x8, w8 +; CHECK-SD-NEXT:    movi v3.2d, #0000000000000000  ; CHECK-SD-NEXT:    movi v2.2d, #0000000000000000 +; CHECK-SD-NEXT:    movi v6.2d, #0000000000000000  ; CHECK-SD-NEXT:    movi v4.2d, #0000000000000000 -; CHECK-SD-NEXT:    and x11, x10, #0xfffffff0 -; CHECK-SD-NEXT:    movi v3.2d, #0000000000000000 +; CHECK-SD-NEXT:    and x11, x9, #0xc  ; CHECK-SD-NEXT:    movi v7.2d, #0000000000000000 -; CHECK-SD-NEXT:    mov x15, x0  ; CHECK-SD-NEXT:    movi v5.2d, #0000000000000000 -; CHECK-SD-NEXT:    movi v16.2d, #0000000000000000 -; CHECK-SD-NEXT:    and x16, x10, #0xfffffff0 -; CHECK-SD-NEXT:    movi v6.2d, #0000000000000000 -; CHECK-SD-NEXT:    fmov x13, d0 -; CHECK-SD-NEXT:    fmov x14, d0 +; CHECK-SD-NEXT:    and x10, x9, #0xfffffff0 +; CHECK-SD-NEXT:    dup v16.4s, w8 +; CHECK-SD-NEXT:    mov x8, x0 +; CHECK-SD-NEXT:    and x12, x9, #0xfffffff0  ; CHECK-SD-NEXT:  .LBB6_7: // %vector.body  ; CHECK-SD-NEXT:    // =>This Inner Loop Header: Depth=1 -; CHECK-SD-NEXT:    ldr q17, [x15], #16 -; CHECK-SD-NEXT:    subs x16, x16, #16 +; CHECK-SD-NEXT:    ldr q17, [x8], #16 +; CHECK-SD-NEXT:    subs x12, x12, #16  ; CHECK-SD-NEXT:    ushll v18.8h, v17.8b, #0 -; CHECK-SD-NEXT:    ushll2 v19.8h, v17.16b, #0 -; CHECK-SD-NEXT:    ushll v17.4s, v18.4h, #0 -; CHECK-SD-NEXT:    ushll2 v20.4s, v19.8h, #0 -; CHECK-SD-NEXT:    ushll2 v18.4s, v18.8h, #0 -; CHECK-SD-NEXT:    ushll v19.4s, v19.4h, #0 -; CHECK-SD-NEXT:    ushll v21.2d, v17.2s, #0 -; CHECK-SD-NEXT:    ushll2 v22.2d, v20.4s, #0 -; CHECK-SD-NEXT:    ushll2 v17.2d, v17.4s, #0 -; CHECK-SD-NEXT:    ushll v23.2d, v18.2s, #0 -; CHECK-SD-NEXT:    ushll v20.2d, v20.2s, #0 -; CHECK-SD-NEXT:    ushll2 v18.2d, v18.4s, #0 -; CHECK-SD-NEXT:    fmov x17, d21 -; CHECK-SD-NEXT:    mov x2, v21.d[1] -; CHECK-SD-NEXT:    ushll v21.2d, v19.2s, #0 -; CHECK-SD-NEXT:    ushll2 v19.2d, v19.4s, #0 -; CHECK-SD-NEXT:    fmov x18, d22 -; CHECK-SD-NEXT:    fmov x1, d17 -; CHECK-SD-NEXT:    fmov x3, d23 -; CHECK-SD-NEXT:    fmov x21, d20 -; CHECK-SD-NEXT:    fmov x22, d18 -; CHECK-SD-NEXT:    fmov x19, d21 -; CHECK-SD-NEXT:    mul x17, x13, x17 -; CHECK-SD-NEXT:    mov x4, v22.d[1] -; CHECK-SD-NEXT:    fmov x24, d19 -; CHECK-SD-NEXT:    mov x5, v23.d[1] -; CHECK-SD-NEXT:    mov x6, v21.d[1] -; CHECK-SD-NEXT:    mov x7, v20.d[1] -; CHECK-SD-NEXT:    mov x20, v18.d[1] -; CHECK-SD-NEXT:    mov x23, v19.d[1] -; CHECK-SD-NEXT:    mov x25, v17.d[1] -; CHECK-SD-NEXT:    mul x18, x14, x18 -; CHECK-SD-NEXT:    mul x1, x13, x1 -; CHECK-SD-NEXT:    fmov d17, x17 -; CHECK-SD-NEXT:    mul x3, x13, x3 -; CHECK-SD-NEXT:    fmov d18, x18 -; CHECK-SD-NEXT:    mul x19, x13, x19 -; CHECK-SD-NEXT:    fmov d19, x1 -; CHECK-SD-NEXT:    mul x21, x13, x21 -; CHECK-SD-NEXT:    fmov d20, x3 -; CHECK-SD-NEXT:    mul x22, x13, x22 -; CHECK-SD-NEXT:    fmov d21, x19 -; CHECK-SD-NEXT:    mul x24, x13, x24 -; CHECK-SD-NEXT:    fmov d24, x21 -; CHECK-SD-NEXT:    mul x2, x8, x2 -; CHECK-SD-NEXT:    fmov d22, x22 -; CHECK-SD-NEXT:    mul x4, x8, x4 -; CHECK-SD-NEXT:    fmov d23, x24 -; CHECK-SD-NEXT:    mul x5, x8, x5 -; CHECK-SD-NEXT:    mov v17.d[1], x2 -; CHECK-SD-NEXT:    mul x6, x8, x6 -; CHECK-SD-NEXT:    mov v18.d[1], x4 -; CHECK-SD-NEXT:    mul x7, x8, x7 -; CHECK-SD-NEXT:    mov v20.d[1], x5 -; CHECK-SD-NEXT:    add v1.2d, v17.2d, v1.2d -; CHECK-SD-NEXT:    mul x20, x8, x20 -; CHECK-SD-NEXT:    mov v21.d[1], x6 -; CHECK-SD-NEXT:    add v6.2d, v18.2d, v6.2d -; CHECK-SD-NEXT:    mul x23, x8, x23 -; CHECK-SD-NEXT:    mov v24.d[1], x7 -; CHECK-SD-NEXT:    add v4.2d, v20.2d, v4.2d -; CHECK-SD-NEXT:    mul x17, x8, x25 -; CHECK-SD-NEXT:    mov v22.d[1], x20 -; CHECK-SD-NEXT:    add v7.2d, v21.2d, v7.2d -; CHECK-SD-NEXT:    mov v23.d[1], x23 -; CHECK-SD-NEXT:    add v16.2d, v24.2d, v16.2d -; CHECK-SD-NEXT:    mov v19.d[1], x17 -; CHECK-SD-NEXT:    add v3.2d, v22.2d, v3.2d -; CHECK-SD-NEXT:    add v5.2d, v23.2d, v5.2d -; CHECK-SD-NEXT:    add v2.2d, v19.2d, v2.2d +; CHECK-SD-NEXT:    ushll2 v17.8h, v17.16b, #0 +; CHECK-SD-NEXT:    ushll2 v19.4s, v18.8h, #0 +; CHECK-SD-NEXT:    ushll v20.4s, v17.4h, #0 +; CHECK-SD-NEXT:    ushll v18.4s, v18.4h, #0 +; CHECK-SD-NEXT:    ushll2 v17.4s, v17.8h, #0 +; CHECK-SD-NEXT:    smlal2 v2.2d, v16.4s, v19.4s +; CHECK-SD-NEXT:    smlal2 v4.2d, v16.4s, v20.4s +; CHECK-SD-NEXT:    smlal v6.2d, v16.2s, v20.2s +; CHECK-SD-NEXT:    smlal v3.2d, v16.2s, v19.2s +; CHECK-SD-NEXT:    smlal2 v1.2d, v16.4s, v18.4s +; CHECK-SD-NEXT:    smlal v7.2d, v16.2s, v17.2s +; CHECK-SD-NEXT:    smlal v0.2d, v16.2s, v18.2s +; CHECK-SD-NEXT:    smlal2 v5.2d, v16.4s, v17.4s  ; CHECK-SD-NEXT:    b.ne .LBB6_7  ; CHECK-SD-NEXT:  // %bb.8: // %middle.block -; CHECK-SD-NEXT:    add v1.2d, v1.2d, v7.2d -; CHECK-SD-NEXT:    add v4.2d, v4.2d, v16.2d -; CHECK-SD-NEXT:    cmp x11, x10 -; CHECK-SD-NEXT:    add v2.2d, v2.2d, v5.2d -; CHECK-SD-NEXT:    add v3.2d, v3.2d, v6.2d +; CHECK-SD-NEXT:    add v0.2d, v0.2d, v6.2d +; CHECK-SD-NEXT:    add v3.2d, v3.2d, v7.2d +; CHECK-SD-NEXT:    cmp x10, x9  ; CHECK-SD-NEXT:    add v1.2d, v1.2d, v4.2d -; CHECK-SD-NEXT:    add v2.2d, v2.2d, v3.2d +; CHECK-SD-NEXT:    add v2.2d, v2.2d, v5.2d +; CHECK-SD-NEXT:    add v0.2d, v0.2d, v3.2d  ; CHECK-SD-NEXT:    add v1.2d, v1.2d, v2.2d -; CHECK-SD-NEXT:    addp d1, v1.2d -; CHECK-SD-NEXT:    fmov x8, d1 +; CHECK-SD-NEXT:    add v0.2d, v0.2d, v1.2d +; CHECK-SD-NEXT:    addp d0, v0.2d +; CHECK-SD-NEXT:    fmov x8, d0  ; CHECK-SD-NEXT:    b.eq .LBB6_15  ; CHECK-SD-NEXT:  // %bb.9: // %vec.epilog.iter.check -; CHECK-SD-NEXT:    cbz x12, .LBB6_13 +; CHECK-SD-NEXT:    cbz x11, .LBB6_13  ; CHECK-SD-NEXT:  .LBB6_10: // %vec.epilog.ph +; CHECK-SD-NEXT:    movi v0.2d, #0000000000000000 +; CHECK-SD-NEXT:    mov w11, w1  ; CHECK-SD-NEXT:    movi v1.2d, #0000000000000000 -; CHECK-SD-NEXT:    movi v2.2d, #0000000000000000 -; CHECK-SD-NEXT:    mov x13, x11 +; CHECK-SD-NEXT:    sxtb x11, w11  ; CHECK-SD-NEXT:    movi v3.2d, #0x000000000000ff -; CHECK-SD-NEXT:    fmov x14, d0 -; CHECK-SD-NEXT:    and x11, x10, #0xfffffffc -; CHECK-SD-NEXT:    fmov x15, d0 -; CHECK-SD-NEXT:    sub x12, x13, x11 -; CHECK-SD-NEXT:    add x13, x0, x13 -; CHECK-SD-NEXT:    mov v1.d[0], x8 -; CHECK-SD-NEXT:    mov x8, v0.d[1] +; CHECK-SD-NEXT:    dup v2.2s, w11 +; CHECK-SD-NEXT:    mov x11, x10 +; CHECK-SD-NEXT:    and x10, x9, #0xfffffffc +; CHECK-SD-NEXT:    mov v0.d[0], x8 +; CHECK-SD-NEXT:    sub x8, x11, x10 +; CHECK-SD-NEXT:    add x11, x0, x11  ; CHECK-SD-NEXT:  .LBB6_11: // %vec.epilog.vector.body  ; CHECK-SD-NEXT:    // =>This Inner Loop Header: Depth=1 -; CHECK-SD-NEXT:    ldr s0, [x13], #4 -; CHECK-SD-NEXT:    adds x12, x12, #4 -; CHECK-SD-NEXT:    ushll v0.8h, v0.8b, #0 -; CHECK-SD-NEXT:    ushll v0.4s, v0.4h, #0 -; CHECK-SD-NEXT:    ushll v4.2d, v0.2s, #0 -; CHECK-SD-NEXT:    ushll2 v0.2d, v0.4s, #0 +; CHECK-SD-NEXT:    ldr s4, [x11], #4 +; CHECK-SD-NEXT:    adds x8, x8, #4 +; CHECK-SD-NEXT:    ushll v4.8h, v4.8b, #0 +; CHECK-SD-NEXT:    ushll v4.4s, v4.4h, #0 +; CHECK-SD-NEXT:    ushll v5.2d, v4.2s, #0 +; CHECK-SD-NEXT:    ushll2 v4.2d, v4.4s, #0 +; CHECK-SD-NEXT:    and v5.16b, v5.16b, v3.16b  ; CHECK-SD-NEXT:    and v4.16b, v4.16b, v3.16b -; CHECK-SD-NEXT:    and v0.16b, v0.16b, v3.16b -; CHECK-SD-NEXT:    fmov x16, d4 -; CHECK-SD-NEXT:    fmov x18, d0 -; CHECK-SD-NEXT:    mov x17, v4.d[1] -; CHECK-SD-NEXT:    mov x1, v0.d[1] -; CHECK-SD-NEXT:    mul x16, x14, x16 -; CHECK-SD-NEXT:    mul x18, x15, x18 -; CHECK-SD-NEXT:    mul x17, x8, x17 -; CHECK-SD-NEXT:    fmov d0, x16 -; CHECK-SD-NEXT:    mul x1, x8, x1 -; CHECK-SD-NEXT:    fmov d4, x18 -; CHECK-SD-NEXT:    mov v0.d[1], x17 -; CHECK-SD-NEXT:    mov v4.d[1], x1 -; CHECK-SD-NEXT:    add v1.2d, v0.2d, v1.2d -; CHECK-SD-NEXT:    add v2.2d, v4.2d, v2.2d +; CHECK-SD-NEXT:    xtn v5.2s, v5.2d +; CHECK-SD-NEXT:    xtn v4.2s, v4.2d +; CHECK-SD-NEXT:    smlal v1.2d, v2.2s, v4.2s +; CHECK-SD-NEXT:    smlal v0.2d, v2.2s, v5.2s  ; CHECK-SD-NEXT:    b.ne .LBB6_11  ; CHECK-SD-NEXT:  // %bb.12: // %vec.epilog.middle.block -; CHECK-SD-NEXT:    add v0.2d, v1.2d, v2.2d -; CHECK-SD-NEXT:    cmp x11, x10 +; CHECK-SD-NEXT:    add v0.2d, v0.2d, v1.2d +; CHECK-SD-NEXT:    cmp x10, x9  ; CHECK-SD-NEXT:    addp d0, v0.2d  ; CHECK-SD-NEXT:    fmov x8, d0  ; CHECK-SD-NEXT:    b.eq .LBB6_15  ; CHECK-SD-NEXT:  .LBB6_13: // %for.body.preheader -; CHECK-SD-NEXT:    sub x10, x10, x11 -; CHECK-SD-NEXT:    add x11, x0, x11 +; CHECK-SD-NEXT:    sxtb x11, w1 +; CHECK-SD-NEXT:    sub x9, x9, x10 +; CHECK-SD-NEXT:    add x10, x0, x10  ; CHECK-SD-NEXT:  .LBB6_14: // %for.body  ; CHECK-SD-NEXT:    // =>This Inner Loop Header: Depth=1 -; CHECK-SD-NEXT:    ldrb w12, [x11], #1 -; CHECK-SD-NEXT:    subs x10, x10, #1 -; CHECK-SD-NEXT:    smaddl x8, w12, w9, x8 +; CHECK-SD-NEXT:    ldrb w12, [x10], #1 +; CHECK-SD-NEXT:    subs x9, x9, #1 +; CHECK-SD-NEXT:    smaddl x8, w12, w11, x8  ; CHECK-SD-NEXT:    b.ne .LBB6_14 -; CHECK-SD-NEXT:  .LBB6_15: -; CHECK-SD-NEXT:    ldp x20, x19, [sp, #48] // 16-byte Folded Reload -; CHECK-SD-NEXT:    ldp x22, x21, [sp, #32] // 16-byte Folded Reload -; CHECK-SD-NEXT:    ldp x24, x23, [sp, #16] // 16-byte Folded Reload -; CHECK-SD-NEXT:    ldr x25, [sp], #64 // 8-byte Folded Reload +; CHECK-SD-NEXT:  .LBB6_15: // %for.cond.cleanup  ; CHECK-SD-NEXT:    mov x0, x8  ; CHECK-SD-NEXT:    ret  ; @@ -957,63 +875,64 @@ define i64 @red_mla_dup_ext_u8_s8_s64(ptr noalias noundef readonly captures(none  ; CHECK-GI-NEXT:    cbz w2, .LBB6_7  ; CHECK-GI-NEXT:  // %bb.1: // %iter.check  ; CHECK-GI-NEXT:    movi d0, #0000000000000000 -; CHECK-GI-NEXT:    sxtb x9, w1 -; CHECK-GI-NEXT:    mov x11, xzr +; CHECK-GI-NEXT:    mov x10, xzr  ; CHECK-GI-NEXT:    cmp w2, #4 -; CHECK-GI-NEXT:    mov w10, w2 +; CHECK-GI-NEXT:    mov w9, w2  ; CHECK-GI-NEXT:    b.lo .LBB6_12  ; CHECK-GI-NEXT:  // %bb.2: // %vector.main.loop.iter.check  ; CHECK-GI-NEXT:    movi d0, #0000000000000000 -; CHECK-GI-NEXT:    dup v1.2d, x9 -; CHECK-GI-NEXT:    mov x11, xzr +; CHECK-GI-NEXT:    mov x10, xzr  ; CHECK-GI-NEXT:    cmp w2, #16  ; CHECK-GI-NEXT:    b.lo .LBB6_9  ; CHECK-GI-NEXT:  // %bb.3: // %vector.ph +; CHECK-GI-NEXT:    mov w8, w1  ; CHECK-GI-NEXT:    movi v0.2d, #0000000000000000 -; CHECK-GI-NEXT:    xtn v2.2s, v1.2d -; CHECK-GI-NEXT:    and x8, x10, #0xc +; CHECK-GI-NEXT:    movi v1.2d, #0000000000000000 +; CHECK-GI-NEXT:    sxtb x8, w8 +; CHECK-GI-NEXT:    movi v2.2d, #0000000000000000  ; CHECK-GI-NEXT:    movi v3.2d, #0000000000000000  ; CHECK-GI-NEXT:    movi v4.2d, #0000000000000000 -; CHECK-GI-NEXT:    and x11, x10, #0xfffffff0 -; CHECK-GI-NEXT:    movi v5.2d, #0000000000000000  ; CHECK-GI-NEXT:    movi v6.2d, #0000000000000000 -; CHECK-GI-NEXT:    mov x12, x0 +; CHECK-GI-NEXT:    and x10, x9, #0xfffffff0 +; CHECK-GI-NEXT:    dup v5.2d, x8  ; CHECK-GI-NEXT:    movi v7.2d, #0000000000000000 -; CHECK-GI-NEXT:    movi v16.2d, #0000000000000000 -; CHECK-GI-NEXT:    and x13, x10, #0xfffffff0 -; CHECK-GI-NEXT:    movi v17.2d, #0000000000000000 +; CHECK-GI-NEXT:    and x8, x9, #0xc +; CHECK-GI-NEXT:    mov x11, x0 +; CHECK-GI-NEXT:    and x12, x9, #0xfffffff0 +; CHECK-GI-NEXT:    xtn v16.2s, v5.2d +; CHECK-GI-NEXT:    movi v5.2d, #0000000000000000  ; CHECK-GI-NEXT:  .LBB6_4: // %vector.body  ; CHECK-GI-NEXT:    // =>This Inner Loop Header: Depth=1 -; CHECK-GI-NEXT:    ldr q18, [x12], #16 -; CHECK-GI-NEXT:    subs x13, x13, #16 -; CHECK-GI-NEXT:    ushll v19.8h, v18.8b, #0 -; CHECK-GI-NEXT:    ushll2 v18.8h, v18.16b, #0 -; CHECK-GI-NEXT:    ushll v20.4s, v19.4h, #0 -; CHECK-GI-NEXT:    ushll2 v19.4s, v19.8h, #0 -; CHECK-GI-NEXT:    ushll v21.4s, v18.4h, #0 +; CHECK-GI-NEXT:    ldr q17, [x11], #16 +; CHECK-GI-NEXT:    subs x12, x12, #16 +; CHECK-GI-NEXT:    ushll v18.8h, v17.8b, #0 +; CHECK-GI-NEXT:    ushll2 v17.8h, v17.16b, #0 +; CHECK-GI-NEXT:    ushll v19.4s, v18.4h, #0  ; CHECK-GI-NEXT:    ushll2 v18.4s, v18.8h, #0 -; CHECK-GI-NEXT:    mov d22, v20.d[1] -; CHECK-GI-NEXT:    mov d23, v19.d[1] -; CHECK-GI-NEXT:    mov d24, v21.d[1] -; CHECK-GI-NEXT:    mov d25, v18.d[1] -; CHECK-GI-NEXT:    smlal v0.2d, v2.2s, v20.2s -; CHECK-GI-NEXT:    smlal v4.2d, v2.2s, v19.2s -; CHECK-GI-NEXT:    smlal v6.2d, v2.2s, v21.2s -; CHECK-GI-NEXT:    smlal v16.2d, v2.2s, v18.2s -; CHECK-GI-NEXT:    smlal v3.2d, v2.2s, v22.2s -; CHECK-GI-NEXT:    smlal v5.2d, v2.2s, v23.2s -; CHECK-GI-NEXT:    smlal v7.2d, v2.2s, v24.2s -; CHECK-GI-NEXT:    smlal v17.2d, v2.2s, v25.2s +; CHECK-GI-NEXT:    ushll v20.4s, v17.4h, #0 +; CHECK-GI-NEXT:    ushll2 v17.4s, v17.8h, #0 +; CHECK-GI-NEXT:    mov d21, v19.d[1] +; CHECK-GI-NEXT:    mov d22, v18.d[1] +; CHECK-GI-NEXT:    mov d23, v20.d[1] +; CHECK-GI-NEXT:    mov d24, v17.d[1] +; CHECK-GI-NEXT:    smlal v0.2d, v16.2s, v19.2s +; CHECK-GI-NEXT:    smlal v2.2d, v16.2s, v18.2s +; CHECK-GI-NEXT:    smlal v4.2d, v16.2s, v20.2s +; CHECK-GI-NEXT:    smlal v6.2d, v16.2s, v17.2s +; CHECK-GI-NEXT:    smlal v1.2d, v16.2s, v21.2s +; CHECK-GI-NEXT:    smlal v3.2d, v16.2s, v22.2s +; CHECK-GI-NEXT:    smlal v5.2d, v16.2s, v23.2s +; CHECK-GI-NEXT:    smlal v7.2d, v16.2s, v24.2s  ; CHECK-GI-NEXT:    b.ne .LBB6_4  ; CHECK-GI-NEXT:  // %bb.5: // %middle.block -; CHECK-GI-NEXT:    add v0.2d, v0.2d, v3.2d +; CHECK-GI-NEXT:    add v0.2d, v0.2d, v1.2d +; CHECK-GI-NEXT:    add v1.2d, v2.2d, v3.2d +; CHECK-GI-NEXT:    cmp x10, x9  ; CHECK-GI-NEXT:    add v2.2d, v4.2d, v5.2d -; CHECK-GI-NEXT:    cmp x11, x10  ; CHECK-GI-NEXT:    add v3.2d, v6.2d, v7.2d -; CHECK-GI-NEXT:    add v4.2d, v16.2d, v17.2d -; CHECK-GI-NEXT:    add v0.2d, v0.2d, v2.2d -; CHECK-GI-NEXT:    add v2.2d, v3.2d, v4.2d -; CHECK-GI-NEXT:    add v0.2d, v0.2d, v2.2d +; CHECK-GI-NEXT:    add v0.2d, v0.2d, v1.2d +; CHECK-GI-NEXT:    add v1.2d, v2.2d, v3.2d +; CHECK-GI-NEXT:    add v0.2d, v0.2d, v1.2d  ; CHECK-GI-NEXT:    addp d0, v0.2d  ; CHECK-GI-NEXT:    b.ne .LBB6_8  ; CHECK-GI-NEXT:  // %bb.6: @@ -1027,50 +946,54 @@ define i64 @red_mla_dup_ext_u8_s8_s64(ptr noalias noundef readonly captures(none  ; CHECK-GI-NEXT:  .LBB6_8: // %vec.epilog.iter.check  ; CHECK-GI-NEXT:    cbz x8, .LBB6_12  ; CHECK-GI-NEXT:  .LBB6_9: // %vec.epilog.ph +; CHECK-GI-NEXT:    mov w8, w1  ; CHECK-GI-NEXT:    mov v0.d[1], xzr -; CHECK-GI-NEXT:    movi v2.2d, #0000000000000000 -; CHECK-GI-NEXT:    mov x12, x11 -; CHECK-GI-NEXT:    xtn v1.2s, v1.2d -; CHECK-GI-NEXT:    and x11, x10, #0xfffffffc -; CHECK-GI-NEXT:    sub x8, x12, x11 -; CHECK-GI-NEXT:    add x12, x0, x12 +; CHECK-GI-NEXT:    movi v1.2d, #0000000000000000 +; CHECK-GI-NEXT:    sxtb x8, w8 +; CHECK-GI-NEXT:    mov x11, x10 +; CHECK-GI-NEXT:    and x10, x9, #0xfffffffc +; CHECK-GI-NEXT:    dup v2.2d, x8 +; CHECK-GI-NEXT:    sub x8, x11, x10 +; CHECK-GI-NEXT:    add x11, x0, x11 +; CHECK-GI-NEXT:    xtn v2.2s, v2.2d  ; CHECK-GI-NEXT:  .LBB6_10: // %vec.epilog.vector.body  ; CHECK-GI-NEXT:    // =>This Inner Loop Header: Depth=1 -; CHECK-GI-NEXT:    ldr w13, [x12], #4 +; CHECK-GI-NEXT:    ldr w12, [x11], #4  ; CHECK-GI-NEXT:    adds x8, x8, #4 -; CHECK-GI-NEXT:    fmov s3, w13 -; CHECK-GI-NEXT:    uxtb w13, w13 +; CHECK-GI-NEXT:    fmov s3, w12 +; CHECK-GI-NEXT:    uxtb w12, w12  ; CHECK-GI-NEXT:    mov b4, v3.b[2]  ; CHECK-GI-NEXT:    mov b5, v3.b[1]  ; CHECK-GI-NEXT:    mov b6, v3.b[3] -; CHECK-GI-NEXT:    fmov s3, w13 -; CHECK-GI-NEXT:    fmov w14, s4 -; CHECK-GI-NEXT:    fmov w15, s5 -; CHECK-GI-NEXT:    fmov w16, s6 +; CHECK-GI-NEXT:    fmov s3, w12 +; CHECK-GI-NEXT:    fmov w13, s4 +; CHECK-GI-NEXT:    fmov w14, s5 +; CHECK-GI-NEXT:    fmov w15, s6 +; CHECK-GI-NEXT:    uxtb w13, w13  ; CHECK-GI-NEXT:    uxtb w14, w14  ; CHECK-GI-NEXT:    uxtb w15, w15 -; CHECK-GI-NEXT:    uxtb w16, w16 -; CHECK-GI-NEXT:    fmov s4, w14 -; CHECK-GI-NEXT:    mov v3.s[1], w15 -; CHECK-GI-NEXT:    mov v4.s[1], w16 -; CHECK-GI-NEXT:    smlal v0.2d, v1.2s, v3.2s -; CHECK-GI-NEXT:    smlal v2.2d, v1.2s, v4.2s +; CHECK-GI-NEXT:    fmov s4, w13 +; CHECK-GI-NEXT:    mov v3.s[1], w14 +; CHECK-GI-NEXT:    mov v4.s[1], w15 +; CHECK-GI-NEXT:    smlal v0.2d, v2.2s, v3.2s +; CHECK-GI-NEXT:    smlal v1.2d, v2.2s, v4.2s  ; CHECK-GI-NEXT:    b.ne .LBB6_10  ; CHECK-GI-NEXT:  // %bb.11: // %vec.epilog.middle.block -; CHECK-GI-NEXT:    add v0.2d, v0.2d, v2.2d -; CHECK-GI-NEXT:    cmp x11, x10 +; CHECK-GI-NEXT:    add v0.2d, v0.2d, v1.2d +; CHECK-GI-NEXT:    cmp x10, x9  ; CHECK-GI-NEXT:    addp d0, v0.2d  ; CHECK-GI-NEXT:    fmov x8, d0  ; CHECK-GI-NEXT:    b.eq .LBB6_14  ; CHECK-GI-NEXT:  .LBB6_12: // %for.body.preheader -; CHECK-GI-NEXT:    sub x10, x10, x11 -; CHECK-GI-NEXT:    add x11, x0, x11 +; CHECK-GI-NEXT:    sxtb x11, w1 +; CHECK-GI-NEXT:    sub x9, x9, x10 +; CHECK-GI-NEXT:    add x10, x0, x10  ; CHECK-GI-NEXT:  .LBB6_13: // %for.body  ; CHECK-GI-NEXT:    // =>This Inner Loop Header: Depth=1 -; CHECK-GI-NEXT:    ldrb w8, [x11], #1 +; CHECK-GI-NEXT:    ldrb w8, [x10], #1  ; CHECK-GI-NEXT:    fmov x12, d0 -; CHECK-GI-NEXT:    subs x10, x10, #1 -; CHECK-GI-NEXT:    madd x8, x8, x9, x12 +; CHECK-GI-NEXT:    subs x9, x9, #1 +; CHECK-GI-NEXT:    madd x8, x8, x11, x12  ; CHECK-GI-NEXT:    fmov d0, x8  ; CHECK-GI-NEXT:    b.ne .LBB6_13  ; CHECK-GI-NEXT:  .LBB6_14: // %for.cond.cleanup diff --git a/llvm/test/CodeGen/AArch64/cfguard-arm64ec.ll b/llvm/test/CodeGen/AArch64/cfguard-arm64ec.ll index bdbc99e..75e7ac90 100644 --- a/llvm/test/CodeGen/AArch64/cfguard-arm64ec.ll +++ b/llvm/test/CodeGen/AArch64/cfguard-arm64ec.ll @@ -2,15 +2,58 @@  declare void @called()  declare void @escaped() -define void @f(ptr %dst) { +define void @f(ptr %dst, ptr readonly %f) {    call void @called() +; CHECK:         bl      "#called"    store ptr @escaped, ptr %dst -  ret void +  call void %f() +; CHECK:       adrp    x10, $iexit_thunk$cdecl$v$v +; CHECK-NEXT:  add     x10, x10, :lo12:$iexit_thunk$cdecl$v$v +; CHECK-NEXT:  str     x8, [x20] +; CHECK-NEXT:  adrp    x8, __os_arm64x_check_icall_cfg +; CHECK-NEXT:  ldr     x8, [x8, :lo12:__os_arm64x_check_icall_cfg] +; CHECK-NEXT:  mov     x11, +; CHECK-NEXT:  blr     x8 +; CHECK-NEXT:  blr     x11 +    ret void  } +; CHECK-LABEL:    .def "#called$exit_thunk"; +; CHECK-NEXT:     .scl 2; +; CHECK-NEXT:     .type 32; +; CHECK-NEXT:     .endef +; CHECK-NEXT:     .section .wowthk$aa,"xr",discard,"#called$exit_thunk" +; CHECK-NEXT:     .globl "#called$exit_thunk"            // -- Begin function #called$exit_thunk +; CHECK-NEXT:     .p2align 2 +; CHECK-NEXT: "#called$exit_thunk":                   // @"#called$exit_thunk" +; CHECK-NEXT:     .weak_anti_dep called +; CHECK-NEXT: called = "#called" +; CHECK-NEXT:     .weak_anti_dep "#called" +; CHECK-NEXT: "#called" = "#called$exit_thunk" +; CHECK-NEXT:    .seh_proc "#called$exit_thunk" +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT:     str x30, [sp, #-16]!                // 8-byte Folded Spill +; CHECK-NEXT:     .seh_save_reg_x x30, 16 +; CHECK-NEXT:     .seh_endprologue +; CHECK-NEXT:     adrp x8, __os_arm64x_check_icall +; CHECK-NEXT:     adrp x11, called +; CHECK-NEXT:     add x11, x11, :lo12:called +; CHECK-NEXT:     ldr x8, [x8, :lo12:__os_arm64x_check_icall] +; CHECK-NEXT:     adrp x10, $iexit_thunk$cdecl$v$v +; CHECK-NEXT:     add x10, x10, :lo12:$iexit_thunk$cdecl$v$v +; CHECK-NEXT:     blr x8 +; CHECK-NEXT:     .seh_startepilogue +; CHECK-NEXT:     ldr x30, [sp], #16                  // 8-byte Folded Reload +; CHECK-NEXT:     .seh_save_reg_x x30, 16 +; CHECK-NEXT:     .seh_endepilogue +; CHECK-NEXT:     br x11 +; CHECK-NEXT:     .seh_endfunclet +; CHECK-NEXT:     .seh_endproc +  !llvm.module.flags = !{!0} -!0 = !{i32 2, !"cfguard", i32 1} +!0 = !{i32 2, !"cfguard", i32 2}  ; CHECK-LABEL: .section .gfids$y,"dr"  ; CHECK-NEXT:  .symidx escaped +; CHECK-NEXT:  .symidx $iexit_thunk$cdecl$v$v  ; CHECK-NOT:   .symidx diff --git a/llvm/test/CodeGen/AArch64/framelayout-split-sve.mir b/llvm/test/CodeGen/AArch64/framelayout-split-sve.mir index 35eafe8..f535e0f 100644 --- a/llvm/test/CodeGen/AArch64/framelayout-split-sve.mir +++ b/llvm/test/CodeGen/AArch64/framelayout-split-sve.mir @@ -68,13 +68,9 @@  # CHECK:      early-clobber $sp = frame-setup STRXpre killed $fp, $sp, -16 :: (store (s64) into %stack.4)  # CHECK-NEXT: frame-setup CFI_INSTRUCTION def_cfa_offset 16  # CHECK-NEXT: frame-setup CFI_INSTRUCTION offset $w29, -16 -# CHECK-NEXT: $sp = frame-setup SUBXri $sp, 1024, 0 -# CHECK-NEXT: frame-setup CFI_INSTRUCTION def_cfa_offset 1040 -# CHECK-NEXT: $sp = frame-setup ADDVL_XXI $sp, -1, implicit $vg -# CHECK-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x8f, 0x90, 0x08, 0x92, 0x2e, 0x00, 0x38, 0x1e, 0x22 -# CHECK-NEXT: $sp = frame-setup SUBXri $sp, 1040, 0 -# CHECK-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x8f, 0xa0, 0x10, 0x92, 0x2e, 0x00, 0x38, 0x1e, 0x22 -# CHECK-NEXT: $sp = frame-setup ADDVL_XXI $sp, -2, implicit $vg +# CHECK-NEXT: $sp = frame-setup SUBXri $sp, 2064, 0 +# CHECK-NEXT: frame-setup CFI_INSTRUCTION def_cfa_offset 2080 +# CHECK-NEXT: $sp = frame-setup ADDVL_XXI $sp, -3, implicit $vg  # CHECK-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x8f, 0xa0, 0x10, 0x92, 0x2e, 0x00, 0x48, 0x1e, 0x22  #  # CHECK-NEXT: $x8 = ADDXri $sp, 1040, 0 @@ -83,14 +79,10 @@  # CHECK-NEXT: $x8 = ADDXri $sp, 2064, 0  # CHECK-NEXT: STR_PXI $p0, killed $x8, 18 :: (store (<vscale x 1 x s16>) into %stack.1)  # -# CHECK-NEXT: $sp = frame-destroy ADDVL_XXI $sp, 2, implicit $vg -# CHECK-NEXT: frame-destroy CFI_INSTRUCTION escape 0x0f, 0x09, 0x8f, 0xa0, 0x10, 0x92, 0x2e, 0x00, 0x38, 0x1e, 0x22 -# CHECK-NEXT: $sp = frame-destroy ADDXri $sp, 1024, 0 -# CHECK-NEXT: frame-destroy CFI_INSTRUCTION escape 0x0f, 0x09, 0x8f, 0xa0, 0x08, 0x92, 0x2e, 0x00, 0x38, 0x1e, 0x22 -# CHECK-NEXT: $sp = frame-destroy ADDVL_XXI $sp, 1, implicit $vg -# CHECK-NEXT: frame-destroy CFI_INSTRUCTION def_cfa $wsp, 1056 -# CHECK-NEXT: $sp = frame-destroy ADDXri $sp, 1040, 0 -# CHECK-NEXT: frame-destroy CFI_INSTRUCTION def_cfa_offset 16 +# CHECK-NEXT: $sp = frame-destroy ADDXri $sp, 2064, 0 +# CHECK-NEXT: frame-destroy CFI_INSTRUCTION escape 0x0f, 0x08, 0x8f, 0x10, 0x92, 0x2e, 0x00, 0x48, 0x1e, 0x22 +# CHECK-NEXT: $sp = frame-destroy ADDVL_XXI $sp, 3, implicit $vg +# CHECK-NEXT: frame-destroy CFI_INSTRUCTION def_cfa $wsp, 16  # CHECK-NEXT: early-clobber $sp, $fp = frame-destroy LDRXpost $sp, 16 :: (load (s64) from %stack.4)  # CHECK-NEXT: frame-destroy CFI_INSTRUCTION def_cfa_offset 0  # CHECK-NEXT: frame-destroy CFI_INSTRUCTION restore $w29 @@ -100,38 +92,26 @@  # ASM:       str x29, [sp, #-16]!  # ASM-NEXT:  .cfi_def_cfa_offset 16  # ASM-NEXT:  .cfi_offset w29, -16 -# ASM-NEXT:  sub sp, sp, #1024 -# ASM-NEXT:  .cfi_def_cfa_offset 1040 -# ASM-NEXT:  addvl sp, sp, #-1 -# ASM-NEXT:  .cfi_escape 0x0f, 0x09, 0x8f, 0x90, 0x08, 0x92, 0x2e, 0x00, 0x38, 0x1e, 0x22 // sp + 1040 + 8 * VG -# ASM-NEXT:  sub sp, sp, #1040 -# ASM-NEXT:  .cfi_escape 0x0f, 0x09, 0x8f, 0xa0, 0x10, 0x92, 0x2e, 0x00, 0x38, 0x1e, 0x22 // sp + 2080 + 8 * VG -# ASM-NEXT:  addvl sp, sp, #-2 +# ASM-NEXT:  sub sp, sp, #2064 +# ASM-NEXT:  .cfi_def_cfa_offset 2080 +# ASM-NEXT:  addvl sp, sp, #-3  # ASM-NEXT:  .cfi_escape 0x0f, 0x09, 0x8f, 0xa0, 0x10, 0x92, 0x2e, 0x00, 0x48, 0x1e, 0x22 // sp + 2080 + 24 * VG  # -# ASM:	     addvl sp, sp, #2 -# ASM-NEXT:  .cfi_escape 0x0f, 0x09, 0x8f, 0xa0, 0x10, 0x92, 0x2e, 0x00, 0x38, 0x1e, 0x22 // sp + 2080 + 8 * VG -# ASM-NEXT:  add sp, sp, #1024 -# ASM-NEXT:  .cfi_escape 0x0f, 0x09, 0x8f, 0xa0, 0x08, 0x92, 0x2e, 0x00, 0x38, 0x1e, 0x22 // sp + 1056 + 8 * VG -# ASM-NEXT:  addvl sp, sp, #1 -# ASM-NEXT:  .cfi_def_cfa wsp, 1056 -# ASM-NEXT:  add sp, sp, #1040 -# ASM-NEXT:  .cfi_def_cfa_offset 16 +# ASM:	     add sp, sp, #2064 +# ASM-NEXT:  .cfi_escape 0x0f, 0x08, 0x8f, 0x10, 0x92, 0x2e, 0x00, 0x48, 0x1e, 0x22 // sp + 16 + 24 * VG +# ASM-NEXT:  addvl   sp, sp, #3 +# ASM-NEXT:  .cfi_def_cfa wsp, 16  # ASM-NEXT:  ldr x29, [sp], #16  # ASM-NEXT:  .cfi_def_cfa_offset 0  # ASM-NEXT:  .cfi_restore w29  # UNWINDINFO:      DW_CFA_def_cfa_offset: +16  # UNWINDINFO-NEXT: DW_CFA_offset: reg29 -16 -# UNWINDINFO:      DW_CFA_def_cfa_offset: +1040 -# UNWINDINFO:      DW_CFA_def_cfa_expression: DW_OP_breg31 +1040, DW_OP_bregx 0x2e +0, DW_OP_lit8, DW_OP_mul, DW_OP_plus -# UNWINDINFO:      DW_CFA_def_cfa_expression: DW_OP_breg31 +2080, DW_OP_bregx 0x2e +0, DW_OP_lit8, DW_OP_mul, DW_OP_plus +# UNWINDINFO:      DW_CFA_def_cfa_offset: +2080  # UNWINDINFO:      DW_CFA_def_cfa_expression: DW_OP_breg31 +2080, DW_OP_bregx 0x2e +0, DW_OP_lit24, DW_OP_mul, DW_OP_plus  # -# UNWINDINFO:      DW_CFA_def_cfa_expression: DW_OP_breg31 +2080, DW_OP_bregx 0x2e +0, DW_OP_lit8, DW_OP_mul, DW_OP_plus -# UNWINDINFO:      DW_CFA_def_cfa_expression: DW_OP_breg31 +1056, DW_OP_bregx 0x2e +0, DW_OP_lit8, DW_OP_mul, DW_OP_plus -# UNWINDINFO:      DW_CFA_def_cfa: reg31 +1056 -# UNWINDINFO:      DW_CFA_def_cfa_offset: +16 +# UNWINDINFO:      DW_CFA_def_cfa_expression: DW_OP_breg31 +16, DW_OP_bregx 0x2e +0, DW_OP_lit24, DW_OP_mul, DW_OP_plus +# UNWINDINFO:      DW_CFA_def_cfa: reg31 +16  # UNWINDINFO:      DW_CFA_def_cfa_offset: +0  # UNWINDINFO-NEXT: DW_CFA_restore: reg29 @@ -270,13 +250,9 @@ body:             |  # CHECK-NEXT: early-clobber $sp = frame-setup STRXpre killed $fp, $sp, -16 :: (store (s64) into %stack.5)  # CHECK-NEXT: frame-setup CFI_INSTRUCTION def_cfa_offset 16  # CHECK-NEXT: frame-setup CFI_INSTRUCTION offset $w29, -16 -# CHECK-NEXT: $sp = frame-setup SUBXri $sp, 1024, 0 -# CHECK-NEXT: frame-setup CFI_INSTRUCTION def_cfa_offset 1040 -# CHECK-NEXT: $sp = frame-setup ADDVL_XXI $sp, -1, implicit $vg -# CHECK-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x8f, 0x90, 0x08, 0x92, 0x2e, 0x00, 0x38, 0x1e, 0x22 -# CHECK-NEXT: $sp = frame-setup SUBXri $sp, 1040, 0 -# CHECK-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x8f, 0xa0, 0x10, 0x92, 0x2e, 0x00, 0x38, 0x1e, 0x22 -# CHECK-NEXT: $sp = frame-setup ADDVL_XXI $sp, -2, implicit $vg +# CHECK-NEXT: $sp = frame-setup SUBXri $sp, 2064, 0 +# CHECK-NEXT: frame-setup CFI_INSTRUCTION def_cfa_offset 2080 +# CHECK-NEXT: $sp = frame-setup ADDVL_XXI $sp, -3, implicit $vg  # CHECK-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x8f, 0xa0, 0x10, 0x92, 0x2e, 0x00, 0x48, 0x1e, 0x22  #  # CHECK-NEXT: $[[TMP:x[0-9]+]] = ADDXri $sp, 1040, 0 @@ -286,14 +262,10 @@ body:             |  # CHECK-NEXT: $[[TMP:x[0-9]+]] = ADDXri $sp, 2064, 0  # CHECK-NEXT: STR_PXI $p0, killed $[[TMP]], 23  # -# CHECK-NEXT: $sp = frame-destroy ADDVL_XXI $sp, 2, implicit $vg -# CHECK-NEXT: frame-destroy CFI_INSTRUCTION escape 0x0f, 0x09, 0x8f, 0xa0, 0x10, 0x92, 0x2e, 0x00, 0x38, 0x1e, 0x22 -# CHECK-NEXT: $sp = frame-destroy ADDXri $sp, 1024, 0 -# CHECK-NEXT: frame-destroy CFI_INSTRUCTION escape 0x0f, 0x09, 0x8f, 0xa0, 0x08, 0x92, 0x2e, 0x00, 0x38, 0x1e, 0x22 -# CHECK-NEXT: $sp = frame-destroy ADDVL_XXI $sp, 1, implicit $vg -# CHECK-NEXT: frame-destroy CFI_INSTRUCTION def_cfa $wsp, 1056 -# CHECK-NEXT: $sp = frame-destroy ADDXri $sp, 1040, 0 -# CHECK-NEXT: frame-destroy CFI_INSTRUCTION def_cfa_offset 16 +# CHECK-NEXT: $sp = frame-destroy ADDXri $sp, 2064, 0 +# CHECK-NEXT: frame-destroy CFI_INSTRUCTION escape 0x0f, 0x08, 0x8f, 0x10, 0x92, 0x2e, 0x00, 0x48, 0x1e, 0x22 +# CHECK-NEXT: $sp = frame-destroy ADDVL_XXI $sp, 3, implicit $vg +# CHECK-NEXT: frame-destroy CFI_INSTRUCTION def_cfa $wsp, 16  # CHECK-NEXT: early-clobber $sp, $fp = frame-destroy LDRXpost $sp, 16 :: (load (s64) from %stack.5)  # CHECK-NEXT: frame-destroy CFI_INSTRUCTION def_cfa_offset 0  # CHECK-NEXT: frame-destroy CFI_INSTRUCTION restore $w29 @@ -303,38 +275,27 @@ body:             |  # ASM:       str x29, [sp, #-16]!  # ASM-NEXT:  .cfi_def_cfa_offset 16  # ASM-NEXT:  .cfi_offset w29, -16 -# ASM-NEXT:  sub sp, sp, #1024 -# ASM-NEXT:  .cfi_def_cfa_offset 1040 -# ASM-NEXT:  addvl sp, sp, #-1 -# ASM-NEXT:  .cfi_escape 0x0f, 0x09, 0x8f, 0x90, 0x08, 0x92, 0x2e, 0x00, 0x38, 0x1e, 0x22 // sp + 1040 + 8 * VG -# ASM-NEXT:  sub sp, sp, #1040 -# ASM-NEXT:  .cfi_escape 0x0f, 0x09, 0x8f, 0xa0, 0x10, 0x92, 0x2e, 0x00, 0x38, 0x1e, 0x22 // sp + 2080 + 8 * VG -# ASM-NEXT:  addvl sp, sp, #-2 +# ASM-NEXT:  sub sp, sp, #2064 +# ASM-NEXT:  .cfi_def_cfa_offset 2080 +# ASM-NEXT:  addvl sp, sp, #-3  # ASM-NEXT:  .cfi_escape 0x0f, 0x09, 0x8f, 0xa0, 0x10, 0x92, 0x2e, 0x00, 0x48, 0x1e, 0x22 // sp + 2080 + 24 * VG  # -# ASM:       addvl sp, sp, #2 -# ASM-NEXT:  .cfi_escape 0x0f, 0x09, 0x8f, 0xa0, 0x10, 0x92, 0x2e, 0x00, 0x38, 0x1e, 0x22 // sp + 2080 + 8 * VG -# ASM-NEXT:  add sp, sp, #1024 -# ASM-NEXT:  .cfi_escape 0x0f, 0x09, 0x8f, 0xa0, 0x08, 0x92, 0x2e, 0x00, 0x38, 0x1e, 0x22 // sp + 1056 + 8 * VG -# ASM-NEXT:  addvl sp, sp, #1 -# ASM-NEXT:  .cfi_def_cfa wsp, 1056 -# ASM-NEXT:  add sp, sp, #1040 -# ASM-NEXT:  .cfi_def_cfa_offset 16 +# ASM:       add sp, sp, #2064 +# ASM-NEXT:  .cfi_escape 0x0f, 0x08, 0x8f, 0x10, 0x92, 0x2e, 0x00, 0x48, 0x1e, 0x22 // sp + 16 + 24 * VG +# ASM-NEXT:  addvl   sp, sp, #3 +# ASM-NEXT:  .cfi_def_cfa wsp, 16  # ASM-NEXT:  ldr x29, [sp], #16  # ASM-NEXT:  .cfi_def_cfa_offset 0  # ASM-NEXT:  .cfi_restore w29 +# ASM-NEXT:  ret  # UNWINDINFO: DW_CFA_def_cfa_offset: +16  # UNWINDINFO-NEXT: DW_CFA_offset: reg29 -16 -# UNWINDINFO: DW_CFA_def_cfa_offset: +1040 -# UNWINDINFO: DW_CFA_def_cfa_expression: DW_OP_breg31 +1040, DW_OP_bregx 0x2e +0, DW_OP_lit8, DW_OP_mul, DW_OP_plus -# UNWINDINFO: DW_CFA_def_cfa_expression: DW_OP_breg31 +2080, DW_OP_bregx 0x2e +0, DW_OP_lit8, DW_OP_mul, DW_OP_plus +# UNWINDINFO: DW_CFA_def_cfa_offset: +2080  # UNWINDINFO: DW_CFA_def_cfa_expression: DW_OP_breg31 +2080, DW_OP_bregx 0x2e +0, DW_OP_lit24, DW_OP_mul, DW_OP_plus  # -# UNWINDINFO: DW_CFA_def_cfa_expression: DW_OP_breg31 +2080, DW_OP_bregx 0x2e +0, DW_OP_lit8, DW_OP_mul, DW_OP_plus -# UNWINDINFO: DW_CFA_def_cfa_expression: DW_OP_breg31 +1056, DW_OP_bregx 0x2e +0, DW_OP_lit8, DW_OP_mul, DW_OP_plus -# UNWINDINFO: DW_CFA_def_cfa: reg31 +1056 -# UNWINDINFO: DW_CFA_def_cfa_offset: +16 +# UNWINDINFO: DW_CFA_def_cfa_expression: DW_OP_breg31 +16, DW_OP_bregx 0x2e +0, DW_OP_lit24, DW_OP_mul, DW_OP_plus +# UNWINDINFO: DW_CFA_def_cfa: reg31 +16  # UNWINDINFO: DW_CFA_def_cfa_offset: +0  # UNWINDINFO-NEXT: DW_CFA_restore: reg29 @@ -385,10 +346,8 @@ body:             |  # CHECK-NEXT: frame-setup CFI_INSTRUCTION def_cfa $w29, 16  # CHECK-NEXT: frame-setup CFI_INSTRUCTION offset $w30, -8  # CHECK-NEXT: frame-setup CFI_INSTRUCTION offset $w29, -16 -# CHECK-NEXT: $sp = frame-setup SUBXri $sp, 1024, 0 -# CHECK-NEXT: $sp = frame-setup ADDVL_XXI $sp, -1, implicit $vg -# CHECK-NEXT: $sp = frame-setup SUBXri $sp, 1040, 0 -# CHECK-NEXT: $sp = frame-setup ADDVL_XXI $sp, -2, implicit $vg +# CHECK-NEXT: $sp = frame-setup SUBXri $sp, 2064, 0 +# CHECK-NEXT: $sp = frame-setup ADDVL_XXI $sp, -3, implicit $vg  #  # CHECK-NEXT: $[[TMP:x[0-9]+]] = SUBXri $fp, 1024, 0  # CHECK-NEXT: STR_ZXI $z0, killed $[[TMP]], -2 @@ -396,10 +355,8 @@ body:             |  # CHECK-NEXT: STR_ZXI $z1, killed $[[TMP]], -3  # CHECK-NEXT: STR_PXI $p0, $fp, -1  # -# CHECK-NEXT: $sp = frame-destroy ADDVL_XXI $sp, 2, implicit $vg -# CHECK-NEXT: $sp = frame-destroy ADDXri $sp, 1024, 0 -# CHECK-NEXT: $sp = frame-destroy ADDVL_XXI $sp, 1, implicit $vg -# CHECK-NEXT: $sp = frame-destroy ADDXri $sp, 1040, 0 +# CHECK-NEXT: $sp = frame-destroy ADDXri $sp, 2064, 0 +# CHECK-NEXT: $sp = frame-destroy ADDVL_XXI $sp, 3, implicit $vg  # CHECK-NEXT: frame-destroy CFI_INSTRUCTION def_cfa $wsp, 16  # CHECK-NEXT: early-clobber $sp, $fp, $lr = frame-destroy LDPXpost $sp, 2 :: (load (s64) from %stack.6), (load (s64) from %stack.5)  # CHECK-NEXT: frame-destroy CFI_INSTRUCTION def_cfa_offset 0 @@ -414,15 +371,11 @@ body:             |  # ASM-NEXT:  .cfi_def_cfa w29, 16  # ASM-NEXT:  .cfi_offset w30, -8  # ASM-NEXT:  .cfi_offset w29, -16 -# ASM-NEXT:  sub sp, sp, #1024 -# ASM-NEXT:  addvl sp, sp, #-1 -# ASM-NEXT:  sub sp, sp, #1040 -# ASM-NEXT:  addvl sp, sp, #-2 +# ASM-NEXT:  sub sp, sp, #2064 +# ASM-NEXT:  addvl sp, sp, #-3  # -# ASM:       addvl sp, sp, #2 -# ASM-NEXT:  add sp, sp, #1024 -# ASM-NEXT:  addvl sp, sp, #1 -# ASM-NEXT:  add sp, sp, #1040 +# ASM:       add sp, sp, #2064 +# ASM-NEXT:  addvl sp, sp, #3  # ASM-NEXT:  .cfi_def_cfa wsp, 16  # ASM-NEXT:  ldp x29, x30, [sp], #16  # ASM-NEXT:  .cfi_def_cfa_offset 0 diff --git a/llvm/test/CodeGen/AArch64/split-sve-stack-frame-layout.ll b/llvm/test/CodeGen/AArch64/split-sve-stack-frame-layout.ll index 690a39d..c13dd33 100644 --- a/llvm/test/CodeGen/AArch64/split-sve-stack-frame-layout.ll +++ b/llvm/test/CodeGen/AArch64/split-sve-stack-frame-layout.ll @@ -19,20 +19,16 @@ define void @zpr_and_ppr_local(<vscale x 16 x i1> %pred, <vscale x 16 x i8> %vec  ; CHECK-LABEL: zpr_and_ppr_local:  ; CHECK:       // %bb.0:  ; CHECK-NEXT:    str x29, [sp, #-16]! // 8-byte Folded Spill -; CHECK-NEXT:    sub sp, sp, #1024 -; CHECK-NEXT:    addvl sp, sp, #-1 -; CHECK-NEXT:    sub sp, sp, #1024 -; CHECK-NEXT:    addvl sp, sp, #-1 +; CHECK-NEXT:    sub sp, sp, #2048 +; CHECK-NEXT:    addvl sp, sp, #-2  ; CHECK-NEXT:    .cfi_escape 0x0f, 0x09, 0x8f, 0x90, 0x10, 0x92, 0x2e, 0x00, 0x40, 0x1e, 0x22 // sp + 2064 + 16 * VG  ; CHECK-NEXT:    .cfi_offset w29, -16  ; CHECK-NEXT:    add x8, sp, #2048  ; CHECK-NEXT:    str p0, [x8, #15, mul vl]  ; CHECK-NEXT:    add x8, sp, #1024  ; CHECK-NEXT:    str z0, [x8] -; CHECK-NEXT:    addvl sp, sp, #1 -; CHECK-NEXT:    add sp, sp, #1024 -; CHECK-NEXT:    addvl sp, sp, #1 -; CHECK-NEXT:    add sp, sp, #1024 +; CHECK-NEXT:    add sp, sp, #2048 +; CHECK-NEXT:    addvl sp, sp, #2  ; CHECK-NEXT:    ldr x29, [sp], #16 // 8-byte Folded Reload  ; CHECK-NEXT:    ret    %ppr_local = alloca <vscale x 16 x i1> @@ -62,20 +58,16 @@ define void @zpr_and_ppr_local_fp(<vscale x 16 x i1> %pred, <vscale x 16 x i8> %  ; CHECK:       // %bb.0:  ; CHECK-NEXT:    stp x29, x30, [sp, #-16]! // 16-byte Folded Spill  ; CHECK-NEXT:    mov x29, sp -; CHECK-NEXT:    sub sp, sp, #1024 -; CHECK-NEXT:    addvl sp, sp, #-1 -; CHECK-NEXT:    sub sp, sp, #1024 -; CHECK-NEXT:    addvl sp, sp, #-1 +; CHECK-NEXT:    sub sp, sp, #2048 +; CHECK-NEXT:    addvl sp, sp, #-2  ; CHECK-NEXT:    .cfi_def_cfa w29, 16  ; CHECK-NEXT:    .cfi_offset w30, -8  ; CHECK-NEXT:    .cfi_offset w29, -16  ; CHECK-NEXT:    sub x8, x29, #1024  ; CHECK-NEXT:    str p0, [x29, #-1, mul vl]  ; CHECK-NEXT:    str z0, [x8, #-2, mul vl] -; CHECK-NEXT:    addvl sp, sp, #1 -; CHECK-NEXT:    add sp, sp, #1024 -; CHECK-NEXT:    addvl sp, sp, #1 -; CHECK-NEXT:    add sp, sp, #1024 +; CHECK-NEXT:    add sp, sp, #2048 +; CHECK-NEXT:    addvl sp, sp, #2  ; CHECK-NEXT:    ldp x29, x30, [sp], #16 // 16-byte Folded Reload  ; CHECK-NEXT:    ret    %ppr_local = alloca <vscale x 16 x i1> @@ -103,17 +95,15 @@ define void @fpr_and_ppr_local(<vscale x 16 x i1> %pred, double %double) "aarch6  ; CHECK-LABEL: fpr_and_ppr_local:  ; CHECK:       // %bb.0:  ; CHECK-NEXT:    str x29, [sp, #-16]! // 8-byte Folded Spill -; CHECK-NEXT:    sub sp, sp, #1024 +; CHECK-NEXT:    sub sp, sp, #2064  ; CHECK-NEXT:    addvl sp, sp, #-1 -; CHECK-NEXT:    sub sp, sp, #1040  ; CHECK-NEXT:    .cfi_escape 0x0f, 0x09, 0x8f, 0xa0, 0x10, 0x92, 0x2e, 0x00, 0x38, 0x1e, 0x22 // sp + 2080 + 8 * VG  ; CHECK-NEXT:    .cfi_offset w29, -16  ; CHECK-NEXT:    add x8, sp, #2064  ; CHECK-NEXT:    str p0, [x8, #7, mul vl]  ; CHECK-NEXT:    str d0, [sp, #1032] -; CHECK-NEXT:    add sp, sp, #1024 +; CHECK-NEXT:    add sp, sp, #2064  ; CHECK-NEXT:    addvl sp, sp, #1 -; CHECK-NEXT:    add sp, sp, #1040  ; CHECK-NEXT:    ldr x29, [sp], #16 // 8-byte Folded Reload  ; CHECK-NEXT:    ret    %ppr_local = alloca <vscale x 16 x i1> @@ -144,17 +134,15 @@ define void @fpr_and_ppr_local_fp(<vscale x 16 x i1> %pred, double %double) "aar  ; CHECK:       // %bb.0:  ; CHECK-NEXT:    stp x29, x30, [sp, #-16]! // 16-byte Folded Spill  ; CHECK-NEXT:    mov x29, sp -; CHECK-NEXT:    sub sp, sp, #1024 +; CHECK-NEXT:    sub sp, sp, #2064  ; CHECK-NEXT:    addvl sp, sp, #-1 -; CHECK-NEXT:    sub sp, sp, #1040  ; CHECK-NEXT:    .cfi_def_cfa w29, 16  ; CHECK-NEXT:    .cfi_offset w30, -8  ; CHECK-NEXT:    .cfi_offset w29, -16  ; CHECK-NEXT:    str p0, [x29, #-1, mul vl]  ; CHECK-NEXT:    str d0, [sp, #1032] -; CHECK-NEXT:    add sp, sp, #1024 +; CHECK-NEXT:    add sp, sp, #2064  ; CHECK-NEXT:    addvl sp, sp, #1 -; CHECK-NEXT:    add sp, sp, #1040  ; CHECK-NEXT:    ldp x29, x30, [sp], #16 // 16-byte Folded Reload  ; CHECK-NEXT:    ret    %ppr_local = alloca <vscale x 16 x i1> @@ -793,11 +781,8 @@ define void @zpr_and_ppr_local_stack_probing(<vscale x 16 x i1> %pred, <vscale x  ; CHECK-LABEL: zpr_and_ppr_local_stack_probing:  ; CHECK:       // %bb.0:  ; CHECK-NEXT:    str x29, [sp, #-16]! // 8-byte Folded Spill -; CHECK-NEXT:    sub sp, sp, #1024 -; CHECK-NEXT:    addvl sp, sp, #-1 -; CHECK-NEXT:    str xzr, [sp] -; CHECK-NEXT:    sub sp, sp, #1824 -; CHECK-NEXT:    addvl sp, sp, #-1 +; CHECK-NEXT:    sub sp, sp, #2848 +; CHECK-NEXT:    addvl sp, sp, #-2  ; CHECK-NEXT:    str xzr, [sp]  ; CHECK-NEXT:    .cfi_escape 0x0f, 0x09, 0x8f, 0xb0, 0x16, 0x92, 0x2e, 0x00, 0x40, 0x1e, 0x22 // sp + 2864 + 16 * VG  ; CHECK-NEXT:    .cfi_offset w29, -16 @@ -806,10 +791,8 @@ define void @zpr_and_ppr_local_stack_probing(<vscale x 16 x i1> %pred, <vscale x  ; CHECK-NEXT:    add x8, sp, #1824  ; CHECK-NEXT:    str z0, [x8]  ; CHECK-NEXT:    str x0, [sp] -; CHECK-NEXT:    addvl sp, sp, #1 -; CHECK-NEXT:    add sp, sp, #1024 -; CHECK-NEXT:    addvl sp, sp, #1 -; CHECK-NEXT:    add sp, sp, #1824 +; CHECK-NEXT:    add sp, sp, #2848 +; CHECK-NEXT:    addvl sp, sp, #2  ; CHECK-NEXT:    ldr x29, [sp], #16 // 8-byte Folded Reload  ; CHECK-NEXT:    ret    "probe-stack"="inline-asm" "stack-probe-size"="4096" "frame-pointer"="none" "aarch64_pstate_sm_compatible" diff --git a/llvm/test/CodeGen/AArch64/sve-fixed-length-loads-stores.ll b/llvm/test/CodeGen/AArch64/sve-fixed-length-loads-stores.ll index becddae..b2ed8de 100644 --- a/llvm/test/CodeGen/AArch64/sve-fixed-length-loads-stores.ll +++ b/llvm/test/CodeGen/AArch64/sve-fixed-length-loads-stores.ll @@ -1,19 +1,8 @@  ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py  ; RUN: llc -aarch64-sve-vector-bits-min=128  < %s | not grep ptrue  ; RUN: llc -aarch64-sve-vector-bits-min=256  < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_256 -; RUN: llc -aarch64-sve-vector-bits-min=384  < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_256  ; RUN: llc -aarch64-sve-vector-bits-min=512  < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512 -; RUN: llc -aarch64-sve-vector-bits-min=640  < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512 -; RUN: llc -aarch64-sve-vector-bits-min=768  < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512 -; RUN: llc -aarch64-sve-vector-bits-min=896  < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512  ; RUN: llc -aarch64-sve-vector-bits-min=1024 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_1024 -; RUN: llc -aarch64-sve-vector-bits-min=1152 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_1024 -; RUN: llc -aarch64-sve-vector-bits-min=1280 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_1024 -; RUN: llc -aarch64-sve-vector-bits-min=1408 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_1024 -; RUN: llc -aarch64-sve-vector-bits-min=1536 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_1024 -; RUN: llc -aarch64-sve-vector-bits-min=1664 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_1024 -; RUN: llc -aarch64-sve-vector-bits-min=1792 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_1024 -; RUN: llc -aarch64-sve-vector-bits-min=1920 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_1024  ; RUN: llc -aarch64-sve-vector-bits-min=2048 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_2048  target triple = "aarch64-unknown-linux-gnu" diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/memory-legalizer-atomic-fence.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/memory-legalizer-atomic-fence.ll index e86f747..37b5422 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/memory-legalizer-atomic-fence.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/memory-legalizer-atomic-fence.ll @@ -1,11 +1,11 @@  ; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 4 -; RUN: llc -global-isel -stop-after=si-memory-legalizer -mtriple=amdgcn-amd- -mcpu=gfx600 < %s | FileCheck -check-prefix=GFX6 %s -; RUN: llc -global-isel -stop-after=si-memory-legalizer -mtriple=amdgcn-amd- -mcpu=gfx803 < %s | FileCheck -check-prefix=GFX8 %s -; RUN: llc -global-isel -stop-after=si-memory-legalizer -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 < %s | FileCheck -check-prefix=GFX8 %s -; RUN: llc -global-isel -stop-after=si-memory-legalizer -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 < %s | FileCheck -check-prefix=GFX10WGP %s -; RUN: llc -global-isel -stop-after=si-memory-legalizer -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 -mattr=+cumode < %s | FileCheck -check-prefix=GFX10CU %s -; RUN: llc -global-isel -stop-after=si-memory-legalizer -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 < %s | FileCheck -check-prefix=GFX11WGP %s -; RUN: llc -global-isel -stop-after=si-memory-legalizer -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 -mattr=+cumode < %s | FileCheck -check-prefix=GFX11CU %s +; RUN: llc -global-isel -new-reg-bank-select -stop-after=si-memory-legalizer -mtriple=amdgcn-amd- -mcpu=gfx600 < %s | FileCheck -check-prefix=GFX6 %s +; RUN: llc -global-isel -new-reg-bank-select -stop-after=si-memory-legalizer -mtriple=amdgcn-amd- -mcpu=gfx803 < %s | FileCheck -check-prefix=GFX8 %s +; RUN: llc -global-isel -new-reg-bank-select -stop-after=si-memory-legalizer -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 < %s | FileCheck -check-prefix=GFX8 %s +; RUN: llc -global-isel -new-reg-bank-select -stop-after=si-memory-legalizer -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 < %s | FileCheck -check-prefix=GFX10WGP %s +; RUN: llc -global-isel -new-reg-bank-select -stop-after=si-memory-legalizer -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 -mattr=+cumode < %s | FileCheck -check-prefix=GFX10CU %s +; RUN: llc -global-isel -new-reg-bank-select -stop-after=si-memory-legalizer -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 < %s | FileCheck -check-prefix=GFX11WGP %s +; RUN: llc -global-isel -new-reg-bank-select -stop-after=si-memory-legalizer -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 -mattr=+cumode < %s | FileCheck -check-prefix=GFX11CU %s  ; Note: we use MIR test checks + stop after legalizer to prevent  ; tests from being optimized out. diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/mmra.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/mmra.ll index 44b12a9..61a6137 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/mmra.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/mmra.ll @@ -1,5 +1,5 @@  ; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 2 -; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx900 -stop-after=finalize-isel < %s | FileCheck %s +; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn -mcpu=gfx900 -stop-after=finalize-isel < %s | FileCheck %s  declare void @readsMem(ptr) #0  declare void @writesMem(ptr) #1 diff --git a/llvm/test/CodeGen/AMDGPU/callbr.ll b/llvm/test/CodeGen/AMDGPU/callbr.ll new file mode 100644 index 0000000..253a6ec --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/callbr.ll @@ -0,0 +1,54 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc -mtriple=amdgcn -mcpu=gfx90a < %s | FileCheck %s + +define void @callbr_inline_asm(ptr %src, ptr %dst1, ptr %dst2, i32 %c) { +; CHECK-LABEL: callbr_inline_asm: +; CHECK:       ; %bb.0: +; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; CHECK-NEXT:    flat_load_dword v0, v[0:1] +; CHECK-NEXT:    ;;#ASMSTART +; CHECK-NEXT:    v_cmp_gt_i32 vcc v6, 42; s_cbranch_vccnz .LBB0_2 +; CHECK-NEXT:    ;;#ASMEND +; CHECK-NEXT:  ; %bb.1: ; %fallthrough +; CHECK-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0) +; CHECK-NEXT:    flat_store_dword v[2:3], v0 +; CHECK-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0) +; CHECK-NEXT:    s_setpc_b64 s[30:31] +; CHECK-NEXT:  .LBB0_2: ; Inline asm indirect target +; CHECK-NEXT:    ; %indirect +; CHECK-NEXT:    ; Label of block must be emitted +; CHECK-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0) +; CHECK-NEXT:    flat_store_dword v[4:5], v0 +; CHECK-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0) +; CHECK-NEXT:    s_setpc_b64 s[30:31] +	%a = load i32, ptr %src, align 4 +	callbr void asm "v_cmp_gt_i32 vcc $0, 42; s_cbranch_vccnz ${1:l}", "r,!i"(i32 %c) to label %fallthrough [label %indirect] +fallthrough: +	store i32 %a, ptr %dst1, align 4 +	br label %ret +indirect: +	store i32 %a, ptr %dst2, align 4 +	br label %ret +ret: +	ret void +} + +define void @callbr_self_loop(i1 %c) { +; CHECK-LABEL: callbr_self_loop: +; CHECK:       ; %bb.0: +; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; CHECK-NEXT:  .LBB1_1: ; %callbr +; CHECK-NEXT:    ; =>This Inner Loop Header: Depth=1 +; CHECK-NEXT:    ;;#ASMSTART +; CHECK-NEXT:    ;;#ASMEND +; CHECK-NEXT:    s_branch .LBB1_1 +; CHECK-NEXT:  .LBB1_2: ; Inline asm indirect target +; CHECK-NEXT:    ; %callbr.target.ret +; CHECK-NEXT:    ; Label of block must be emitted +; CHECK-NEXT:    s_setpc_b64 s[30:31] +  br label %callbr +callbr: +  callbr void asm "", "!i"() to label %callbr [label %ret] +ret: +  ret void +} diff --git a/llvm/test/CodeGen/AMDGPU/do-not-unify-divergent-exit-nodes-with-musttail.ll b/llvm/test/CodeGen/AMDGPU/do-not-unify-divergent-exit-nodes-with-musttail.ll index 007e3f0..076a99f 100644 --- a/llvm/test/CodeGen/AMDGPU/do-not-unify-divergent-exit-nodes-with-musttail.ll +++ b/llvm/test/CodeGen/AMDGPU/do-not-unify-divergent-exit-nodes-with-musttail.ll @@ -3,6 +3,7 @@  declare void @foo(ptr)  declare i1 @bar(ptr) +declare i32 @bar32(ptr)  define void @musttail_call_without_return_value(ptr %p) {  ; CHECK-LABEL: define void @musttail_call_without_return_value( @@ -28,6 +29,31 @@ bb.1:    ret void  } +define void @musttail_call_without_return_value_callbr(ptr %p) { +; CHECK-LABEL: define void @musttail_call_without_return_value_callbr( +; CHECK-SAME: ptr [[P:%.*]]) #[[ATTR0]] { +; CHECK-NEXT:  [[ENTRY:.*:]] +; CHECK-NEXT:    [[LOAD:%.*]] = load i32, ptr [[P]], align 1 +; CHECK-NEXT:    callbr void asm "", "r,!i"(i32 [[LOAD]]) +; CHECK-NEXT:            to label %[[BB_0:.*]] [label %bb.1] +; CHECK:       [[BB_0]]: +; CHECK-NEXT:    musttail call void @foo(ptr [[P]]) +; CHECK-NEXT:    ret void +; CHECK:       [[BB_1:.*:]] +; CHECK-NEXT:    ret void +; +entry: +  %load = load i32, ptr %p, align 1 +  callbr void asm "", "r,!i"(i32 %load) to label %bb.0 [label %bb.1] + +bb.0: +  musttail call void @foo(ptr %p) +  ret void + +bb.1: +  ret void +} +  define i1 @musttail_call_with_return_value(ptr %p) {  ; CHECK-LABEL: define i1 @musttail_call_with_return_value(  ; CHECK-SAME: ptr [[P:%.*]]) #[[ATTR0]] { @@ -51,3 +77,28 @@ bb.0:  bb.1:    ret i1 %load  } + +define i32 @musttail_call_with_return_value_callbr(ptr %p) { +; CHECK-LABEL: define i32 @musttail_call_with_return_value_callbr( +; CHECK-SAME: ptr [[P:%.*]]) #[[ATTR0]] { +; CHECK-NEXT:  [[ENTRY:.*:]] +; CHECK-NEXT:    [[LOAD:%.*]] = load i32, ptr [[P]], align 1 +; CHECK-NEXT:    callbr void asm "", "r,!i"(i32 [[LOAD]]) +; CHECK-NEXT:            to label %[[BB_0:.*]] [label %bb.1] +; CHECK:       [[BB_0]]: +; CHECK-NEXT:    [[RET:%.*]] = musttail call i32 @bar32(ptr [[P]]) +; CHECK-NEXT:    ret i32 [[RET]] +; CHECK:       [[BB_1:.*:]] +; CHECK-NEXT:    ret i32 [[LOAD]] +; +entry: +  %load = load i32, ptr %p, align 1 +  callbr void asm "", "r,!i"(i32 %load) to label %bb.0 [label %bb.1] + +bb.0: +  %ret = musttail call i32 @bar32(ptr %p) +  ret i32 %ret + +bb.1: +  ret i32 %load +} diff --git a/llvm/test/CodeGen/AMDGPU/infinite-loop.ll b/llvm/test/CodeGen/AMDGPU/infinite-loop.ll index 3e2e43f..df63592 100644 --- a/llvm/test/CodeGen/AMDGPU/infinite-loop.ll +++ b/llvm/test/CodeGen/AMDGPU/infinite-loop.ll @@ -36,26 +36,60 @@ loop:    br label %loop  } +define amdgpu_kernel void @infinite_loop_callbr(ptr addrspace(1) %out) { +; SI-LABEL: infinite_loop_callbr: +; SI:       ; %bb.0: ; %entry +; SI-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x9 +; SI-NEXT:    ;;#ASMSTART +; SI-NEXT:    ;;#ASMEND +; SI-NEXT:    s_mov_b32 s3, 0xf000 +; SI-NEXT:    s_mov_b32 s2, -1 +; SI-NEXT:    v_mov_b32_e32 v0, 0x3e7 +; SI-NEXT:    s_waitcnt lgkmcnt(0) +; SI-NEXT:    buffer_store_dword v0, off, s[0:3], 0 +; SI-NEXT:    s_waitcnt vmcnt(0) +; SI-NEXT:    s_endpgm +; IR-LABEL: @infinite_loop_callbr( +; IR-NEXT:  entry: +; IR-NEXT:    callbr void asm "", ""() +; IR-NEXT:            to label [[LOOP:%.*]] [] +; IR:       loop: +; IR-NEXT:    store volatile i32 999, ptr addrspace(1) [[OUT:%.*]], align 4 +; IR-NEXT:    br i1 true, label [[TRANSITIONBLOCK:%.*]], label [[DUMMYRETURNBLOCK:%.*]] +; IR:       TransitionBlock: +; IR-NEXT:    callbr void asm "", ""() +; IR-NEXT:            to label [[LOOP]] [] +; IR:       DummyReturnBlock: +; IR-NEXT:    ret void +; +entry: +  callbr void asm "", ""() to label %loop [] + +loop: +  store volatile i32 999, ptr addrspace(1) %out, align 4 +  callbr void asm "", ""() to label %loop [] +} +  define amdgpu_kernel void @infinite_loop_ret(ptr addrspace(1) %out) {  ; SI-LABEL: infinite_loop_ret:  ; SI:       ; %bb.0: ; %entry  ; SI-NEXT:    v_cmp_eq_u32_e32 vcc, 1, v0  ; SI-NEXT:    s_and_saveexec_b64 s[0:1], vcc -; SI-NEXT:    s_cbranch_execz .LBB1_3 +; SI-NEXT:    s_cbranch_execz .LBB2_3  ; SI-NEXT:  ; %bb.1: ; %loop.preheader  ; SI-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x9  ; SI-NEXT:    s_mov_b32 s3, 0xf000  ; SI-NEXT:    s_mov_b32 s2, -1  ; SI-NEXT:    v_mov_b32_e32 v0, 0x3e7  ; SI-NEXT:    s_and_b64 vcc, exec, -1 -; SI-NEXT:  .LBB1_2: ; %loop +; SI-NEXT:  .LBB2_2: ; %loop  ; SI-NEXT:    ; =>This Inner Loop Header: Depth=1  ; SI-NEXT:    s_waitcnt lgkmcnt(0)  ; SI-NEXT:    buffer_store_dword v0, off, s[0:3], 0  ; SI-NEXT:    s_waitcnt vmcnt(0)  ; SI-NEXT:    s_mov_b64 vcc, vcc -; SI-NEXT:    s_cbranch_vccnz .LBB1_2 -; SI-NEXT:  .LBB1_3: ; %UnifiedReturnBlock +; SI-NEXT:    s_cbranch_vccnz .LBB2_2 +; SI-NEXT:  .LBB2_3: ; %UnifiedReturnBlock  ; SI-NEXT:    s_endpgm  ; IR-LABEL: @infinite_loop_ret(  ; IR-NEXT:  entry: @@ -81,44 +115,93 @@ return:    ret void  } +define amdgpu_kernel void @infinite_loop_ret_callbr(ptr addrspace(1) %out) { +; SI-LABEL: infinite_loop_ret_callbr: +; SI:       ; %bb.0: ; %entry +; SI-NEXT:    v_cmp_eq_u32_e32 vcc, 1, v0 +; SI-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc +; SI-NEXT:    ;;#ASMSTART +; SI-NEXT:    ;;#ASMEND +; SI-NEXT:  ; %bb.1: ; %loop.preheader +; SI-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x9 +; SI-NEXT:    s_mov_b32 s3, 0xf000 +; SI-NEXT:    s_mov_b32 s2, -1 +; SI-NEXT:    v_mov_b32_e32 v0, 0x3e7 +; SI-NEXT:    s_waitcnt lgkmcnt(0) +; SI-NEXT:    buffer_store_dword v0, off, s[0:3], 0 +; SI-NEXT:    s_waitcnt vmcnt(0) +; SI-NEXT:  .LBB3_2: ; Inline asm indirect target +; SI-NEXT:    ; %UnifiedReturnBlock +; SI-NEXT:    ; Label of block must be emitted +; SI-NEXT:    s_endpgm +; IR-LABEL: @infinite_loop_ret_callbr( +; IR-NEXT:  entry: +; IR-NEXT:    [[TMP:%.*]] = tail call i32 @llvm.amdgcn.workitem.id.x() +; IR-NEXT:    [[COND:%.*]] = icmp eq i32 [[TMP]], 1 +; IR-NEXT:    [[COND32:%.*]] = zext i1 [[COND]] to i32 +; IR-NEXT:    callbr void asm "", "r,!i"(i32 [[COND32]]) +; IR-NEXT:            to label [[LOOP:%.*]] [label %UnifiedReturnBlock] +; IR:       loop: +; IR-NEXT:    store volatile i32 999, ptr addrspace(1) [[OUT:%.*]], align 4 +; IR-NEXT:    br i1 true, label [[TRANSITIONBLOCK:%.*]], label [[UNIFIEDRETURNBLOCK:%.*]] +; IR:       TransitionBlock: +; IR-NEXT:    callbr void asm "", ""() +; IR-NEXT:            to label [[LOOP]] [] +; IR:       UnifiedReturnBlock: +; IR-NEXT:    ret void +; +entry: +  %tmp = tail call i32 @llvm.amdgcn.workitem.id.x() +  %cond = icmp eq i32 %tmp, 1 +  %cond32 = zext i1 %cond to i32 +  callbr void asm "", "r,!i"(i32 %cond32) to label %loop [label %return] + +loop: +  store volatile i32 999, ptr addrspace(1) %out, align 4 +  callbr void asm "", ""() to label %loop [] + +return: +  ret void +} +  define amdgpu_kernel void @infinite_loops(ptr addrspace(1) %out) {  ; SI-LABEL: infinite_loops:  ; SI:       ; %bb.0: ; %entry  ; SI-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x9  ; SI-NEXT:    s_mov_b64 s[2:3], -1 -; SI-NEXT:    s_cbranch_scc1 .LBB2_4 +; SI-NEXT:    s_cbranch_scc1 .LBB4_4  ; SI-NEXT:  ; %bb.1:  ; SI-NEXT:    s_mov_b32 s3, 0xf000  ; SI-NEXT:    s_mov_b32 s2, -1  ; SI-NEXT:    v_mov_b32_e32 v0, 0x378  ; SI-NEXT:    s_and_b64 vcc, exec, -1 -; SI-NEXT:  .LBB2_2: ; %loop2 +; SI-NEXT:  .LBB4_2: ; %loop2  ; SI-NEXT:    ; =>This Inner Loop Header: Depth=1  ; SI-NEXT:    s_waitcnt lgkmcnt(0)  ; SI-NEXT:    buffer_store_dword v0, off, s[0:3], 0  ; SI-NEXT:    s_waitcnt vmcnt(0)  ; SI-NEXT:    s_mov_b64 vcc, vcc -; SI-NEXT:    s_cbranch_vccnz .LBB2_2 +; SI-NEXT:    s_cbranch_vccnz .LBB4_2  ; SI-NEXT:  ; %bb.3: ; %Flow  ; SI-NEXT:    s_mov_b64 s[2:3], 0 -; SI-NEXT:  .LBB2_4: ; %Flow2 +; SI-NEXT:  .LBB4_4: ; %Flow2  ; SI-NEXT:    s_and_b64 vcc, exec, s[2:3]  ; SI-NEXT:    s_waitcnt lgkmcnt(0)  ; SI-NEXT:    s_mov_b64 vcc, vcc -; SI-NEXT:    s_cbranch_vccz .LBB2_7 +; SI-NEXT:    s_cbranch_vccz .LBB4_7  ; SI-NEXT:  ; %bb.5:  ; SI-NEXT:    s_mov_b32 s3, 0xf000  ; SI-NEXT:    s_mov_b32 s2, -1  ; SI-NEXT:    s_waitcnt expcnt(0)  ; SI-NEXT:    v_mov_b32_e32 v0, 0x3e7  ; SI-NEXT:    s_and_b64 vcc, exec, 0 -; SI-NEXT:  .LBB2_6: ; %loop1 +; SI-NEXT:  .LBB4_6: ; %loop1  ; SI-NEXT:    ; =>This Inner Loop Header: Depth=1  ; SI-NEXT:    buffer_store_dword v0, off, s[0:3], 0  ; SI-NEXT:    s_waitcnt vmcnt(0)  ; SI-NEXT:    s_mov_b64 vcc, vcc -; SI-NEXT:    s_cbranch_vccz .LBB2_6 -; SI-NEXT:  .LBB2_7: ; %DummyReturnBlock +; SI-NEXT:    s_cbranch_vccz .LBB4_6 +; SI-NEXT:  .LBB4_7: ; %DummyReturnBlock  ; SI-NEXT:    s_endpgm  ; IR-LABEL: @infinite_loops(  ; IR-NEXT:  entry: @@ -144,24 +227,78 @@ loop2:    br label %loop2  } +define amdgpu_kernel void @infinite_loops_callbr(ptr addrspace(1) %out) { +; SI-LABEL: infinite_loops_callbr: +; SI:       ; %bb.0: ; %entry +; SI-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x9 +; SI-NEXT:    s_waitcnt lgkmcnt(0) +; SI-NEXT:    ;;#ASMSTART +; SI-NEXT:    ;;#ASMEND +; SI-NEXT:  ; %bb.1: ; %loop1 +; SI-NEXT:    s_mov_b32 s3, 0xf000 +; SI-NEXT:    s_mov_b32 s2, -1 +; SI-NEXT:    v_mov_b32_e32 v0, 0x3e7 +; SI-NEXT:    buffer_store_dword v0, off, s[0:3], 0 +; SI-NEXT:    s_waitcnt vmcnt(0) +; SI-NEXT:    s_endpgm +; SI-NEXT:  .LBB5_2: ; Inline asm indirect target +; SI-NEXT:    ; %loop2.preheader +; SI-NEXT:    ; Label of block must be emitted +; SI-NEXT:    s_mov_b32 s3, 0xf000 +; SI-NEXT:    s_mov_b32 s2, -1 +; SI-NEXT:    v_mov_b32_e32 v0, 0x378 +; SI-NEXT:    buffer_store_dword v0, off, s[0:3], 0 +; SI-NEXT:    s_waitcnt vmcnt(0) +; SI-NEXT:    s_endpgm +; IR-LABEL: @infinite_loops_callbr( +; IR-NEXT:  entry: +; IR-NEXT:    callbr void asm "", "r,!i"(i32 poison) +; IR-NEXT:            to label [[LOOP1:%.*]] [label %loop2] +; IR:       loop1: +; IR-NEXT:    store volatile i32 999, ptr addrspace(1) [[OUT:%.*]], align 4 +; IR-NEXT:    br i1 true, label [[TRANSITIONBLOCK:%.*]], label [[DUMMYRETURNBLOCK:%.*]] +; IR:       TransitionBlock: +; IR-NEXT:    callbr void asm "", ""() +; IR-NEXT:            to label [[LOOP1]] [] +; IR:       loop2: +; IR-NEXT:    store volatile i32 888, ptr addrspace(1) [[OUT]], align 4 +; IR-NEXT:    br i1 true, label [[TRANSITIONBLOCK1:%.*]], label [[DUMMYRETURNBLOCK]] +; IR:       TransitionBlock1: +; IR-NEXT:    callbr void asm "", ""() +; IR-NEXT:            to label [[LOOP2:%.*]] [] +; IR:       DummyReturnBlock: +; IR-NEXT:    ret void +; +entry: +  callbr void asm "", "r,!i"(i32 poison) to label %loop1 [label %loop2] + +loop1: +  store volatile i32 999, ptr addrspace(1) %out, align 4 +  callbr void asm "", ""() to label %loop1 [] + +loop2: +  store volatile i32 888, ptr addrspace(1) %out, align 4 +  callbr void asm "", ""() to label %loop2 [] +} +  define amdgpu_kernel void @infinite_loop_nest_ret(ptr addrspace(1) %out) {  ; SI-LABEL: infinite_loop_nest_ret:  ; SI:       ; %bb.0: ; %entry  ; SI-NEXT:    v_cmp_ne_u32_e32 vcc, 1, v0  ; SI-NEXT:    s_and_saveexec_b64 s[0:1], vcc -; SI-NEXT:    s_cbranch_execz .LBB3_5 +; SI-NEXT:    s_cbranch_execz .LBB6_5  ; SI-NEXT:  ; %bb.1: ; %outer_loop.preheader  ; SI-NEXT:    s_load_dwordx2 s[4:5], s[4:5], 0x9  ; SI-NEXT:    v_cmp_ne_u32_e64 s[0:1], 3, v0  ; SI-NEXT:    s_mov_b32 s7, 0xf000  ; SI-NEXT:    s_mov_b32 s6, -1  ; SI-NEXT:    v_mov_b32_e32 v0, 0x3e7 -; SI-NEXT:  .LBB3_2: ; %outer_loop +; SI-NEXT:  .LBB6_2: ; %outer_loop  ; SI-NEXT:    ; =>This Loop Header: Depth=1 -; SI-NEXT:    ; Child Loop BB3_3 Depth 2 +; SI-NEXT:    ; Child Loop BB6_3 Depth 2  ; SI-NEXT:    s_mov_b64 s[2:3], 0 -; SI-NEXT:  .LBB3_3: ; %inner_loop -; SI-NEXT:    ; Parent Loop BB3_2 Depth=1 +; SI-NEXT:  .LBB6_3: ; %inner_loop +; SI-NEXT:    ; Parent Loop BB6_2 Depth=1  ; SI-NEXT:    ; => This Inner Loop Header: Depth=2  ; SI-NEXT:    s_and_b64 s[8:9], exec, s[0:1]  ; SI-NEXT:    s_or_b64 s[2:3], s[8:9], s[2:3] @@ -169,13 +306,13 @@ define amdgpu_kernel void @infinite_loop_nest_ret(ptr addrspace(1) %out) {  ; SI-NEXT:    buffer_store_dword v0, off, s[4:7], 0  ; SI-NEXT:    s_waitcnt vmcnt(0)  ; SI-NEXT:    s_andn2_b64 exec, exec, s[2:3] -; SI-NEXT:    s_cbranch_execnz .LBB3_3 +; SI-NEXT:    s_cbranch_execnz .LBB6_3  ; SI-NEXT:  ; %bb.4: ; %loop.exit.guard -; SI-NEXT:    ; in Loop: Header=BB3_2 Depth=1 +; SI-NEXT:    ; in Loop: Header=BB6_2 Depth=1  ; SI-NEXT:    s_or_b64 exec, exec, s[2:3]  ; SI-NEXT:    s_mov_b64 vcc, 0 -; SI-NEXT:    s_branch .LBB3_2 -; SI-NEXT:  .LBB3_5: ; %UnifiedReturnBlock +; SI-NEXT:    s_branch .LBB6_2 +; SI-NEXT:  .LBB6_5: ; %UnifiedReturnBlock  ; SI-NEXT:    s_endpgm  ; IR-LABEL: @infinite_loop_nest_ret(  ; IR-NEXT:  entry: @@ -212,4 +349,82 @@ return:    ret void  } +define amdgpu_kernel void @infinite_loop_nest_ret_callbr(ptr addrspace(1) %out) { +; SI-LABEL: infinite_loop_nest_ret_callbr: +; SI:       ; %bb.0: ; %entry +; SI-NEXT:    v_cmp_ne_u32_e32 vcc, 1, v0 +; SI-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc +; SI-NEXT:    ;;#ASMSTART +; SI-NEXT:    ;;#ASMEND +; SI-NEXT:  ; %bb.1: ; %outer_loop.preheader +; SI-NEXT:    s_load_dwordx2 s[4:5], s[4:5], 0x9 +; SI-NEXT:    s_mov_b32 s7, 0xf000 +; SI-NEXT:    s_mov_b32 s6, -1 +; SI-NEXT:    v_mov_b32_e32 v0, 0x3e7 +; SI-NEXT:    s_and_b64 s[0:1], exec, 0 +; SI-NEXT:    s_branch .LBB7_3 +; SI-NEXT:  .LBB7_2: ; %loop.exit.guard +; SI-NEXT:    ; in Loop: Header=BB7_3 Depth=1 +; SI-NEXT:    s_and_b64 vcc, exec, s[2:3] +; SI-NEXT:    s_cbranch_vccnz .LBB7_5 +; SI-NEXT:  .LBB7_3: ; %outer_loop +; SI-NEXT:    ; =>This Inner Loop Header: Depth=1 +; SI-NEXT:    ;;#ASMSTART +; SI-NEXT:    ;;#ASMEND +; SI-NEXT:    s_waitcnt lgkmcnt(0) +; SI-NEXT:    buffer_store_dword v0, off, s[4:7], 0 +; SI-NEXT:    s_waitcnt vmcnt(0) +; SI-NEXT:    s_mov_b64 s[2:3], -1 +; SI-NEXT:    s_mov_b64 vcc, s[0:1] +; SI-NEXT:    s_cbranch_vccz .LBB7_2 +; SI-NEXT:  ; %bb.4: ; %TransitionBlock.target.outer_loop +; SI-NEXT:    ; in Loop: Header=BB7_3 Depth=1 +; SI-NEXT:    s_mov_b64 s[2:3], 0 +; SI-NEXT:    s_branch .LBB7_2 +; SI-NEXT:  .LBB7_5: ; Inline asm indirect target +; SI-NEXT:    ; %UnifiedReturnBlock +; SI-NEXT:    ; Label of block must be emitted +; SI-NEXT:    s_endpgm +; IR-LABEL: @infinite_loop_nest_ret_callbr( +; IR-NEXT:  entry: +; IR-NEXT:    [[TMP:%.*]] = tail call i32 @llvm.amdgcn.workitem.id.x() +; IR-NEXT:    [[COND1:%.*]] = icmp ne i32 [[TMP]], 1 +; IR-NEXT:    [[COND1_32:%.*]] = zext i1 [[COND1]] to i32 +; IR-NEXT:    callbr void asm "", "r,!i"(i32 [[COND1_32]]) +; IR-NEXT:            to label [[OUTER_LOOP:%.*]] [label %UnifiedReturnBlock] +; IR:       outer_loop: +; IR-NEXT:    callbr void asm "", ""() +; IR-NEXT:            to label [[INNER_LOOP:%.*]] [] +; IR:       inner_loop: +; IR-NEXT:    store volatile i32 999, ptr addrspace(1) [[OUT:%.*]], align 4 +; IR-NEXT:    [[COND3:%.*]] = icmp eq i32 [[TMP]], 3 +; IR-NEXT:    [[COND3_32:%.*]] = zext i1 [[COND3]] to i32 +; IR-NEXT:    br i1 true, label [[TRANSITIONBLOCK:%.*]], label [[UNIFIEDRETURNBLOCK:%.*]] +; IR:       TransitionBlock: +; IR-NEXT:    callbr void asm "", "r,!i"(i32 [[COND3_32]]) +; IR-NEXT:            to label [[INNER_LOOP]] [label %outer_loop] +; IR:       UnifiedReturnBlock: +; IR-NEXT:    ret void +; +entry: +  %tmp = tail call i32 @llvm.amdgcn.workitem.id.x() +  %cond1 = icmp ne i32 %tmp, 1  ; avoid following BB optimizing away through the domination +  %cond1_32 = zext i1 %cond1 to i32 +  callbr void asm "", "r,!i"(i32 %cond1_32) to label %outer_loop [label %return] + +outer_loop: +  ; %cond2 = icmp eq i32 %tmp, 2 +  ; br i1 %cond2, label %outer_loop, label %inner_loop +  callbr void asm "", ""() to label %inner_loop [] + +inner_loop:                                     ; preds = %LeafBlock, %LeafBlock1 +  store volatile i32 999, ptr addrspace(1) %out, align 4 +  %cond3 = icmp eq i32 %tmp, 3 +  %cond3_32 = zext i1 %cond3 to i32 +  callbr void asm "", "r,!i"(i32 %cond3_32) to label %inner_loop [label %outer_loop] + +return: +  ret void +} +  declare i32 @llvm.amdgcn.workitem.id.x() diff --git a/llvm/test/CodeGen/AMDGPU/si-annotate-nested-control-flows.ll b/llvm/test/CodeGen/AMDGPU/si-annotate-nested-control-flows.ll index 34de1e4..01bcdad 100644 --- a/llvm/test/CodeGen/AMDGPU/si-annotate-nested-control-flows.ll +++ b/llvm/test/CodeGen/AMDGPU/si-annotate-nested-control-flows.ll @@ -3,15 +3,16 @@  ; RUN: llc -mtriple=amdgcn-amd-amdhsa %s -o - | FileCheck %s --check-prefix=ISA  define void @nested_inf_loop(i1 %0, i1 %1) { -; OPT-LABEL: @nested_inf_loop( -; OPT-NEXT:  BB: -; OPT-NEXT:    br label [[BB1:%.*]] -; OPT:       BB1: -; OPT-NEXT:    [[BRMERGE:%.*]] = select i1 [[TMP0:%.*]], i1 true, i1 [[TMP1:%.*]] -; OPT-NEXT:    br i1 [[BRMERGE]], label [[BB1]], label [[INFLOOP:%.*]] -; OPT:       infloop: -; OPT-NEXT:    br i1 true, label [[INFLOOP]], label [[DUMMYRETURNBLOCK:%.*]] -; OPT:       DummyReturnBlock: +; OPT-LABEL: define void @nested_inf_loop( +; OPT-SAME: i1 [[TMP0:%.*]], i1 [[TMP1:%.*]]) { +; OPT-NEXT:  [[BB:.*:]] +; OPT-NEXT:    br label %[[BB1:.*]] +; OPT:       [[BB1]]: +; OPT-NEXT:    [[BRMERGE:%.*]] = select i1 [[TMP0]], i1 true, i1 [[TMP1]] +; OPT-NEXT:    br i1 [[BRMERGE]], label %[[BB1]], label %[[INFLOOP:.*]] +; OPT:       [[INFLOOP]]: +; OPT-NEXT:    br i1 true, label %[[INFLOOP]], label %[[DUMMYRETURNBLOCK:.*]] +; OPT:       [[DUMMYRETURNBLOCK]]:  ; OPT-NEXT:    ret void  ;  ; ISA-LABEL: nested_inf_loop: @@ -63,3 +64,84 @@ BB4:  BB3:    br label %BB1  } + +define void @nested_inf_loop_callbr(i32 %0, i32 %1) { +; OPT-LABEL: define void @nested_inf_loop_callbr( +; OPT-SAME: i32 [[TMP0:%.*]], i32 [[TMP1:%.*]]) { +; OPT-NEXT:  [[BB:.*:]] +; OPT-NEXT:    callbr void asm "", ""() +; OPT-NEXT:            to label %[[BB1:.*]] [] +; OPT:       [[BB1]]: +; OPT-NEXT:    callbr void asm "", "r,!i"(i32 [[TMP0]]) +; OPT-NEXT:            to label %[[BB3:.*]] [label %BB2] +; OPT:       [[BB2:.*:]] +; OPT-NEXT:    callbr void asm "", ""() +; OPT-NEXT:            to label %[[BB4:.*]] [] +; OPT:       [[BB4]]: +; OPT-NEXT:    br i1 true, label %[[TRANSITIONBLOCK:.*]], label %[[DUMMYRETURNBLOCK:.*]] +; OPT:       [[TRANSITIONBLOCK]]: +; OPT-NEXT:    callbr void asm "", "r,!i"(i32 [[TMP1]]) +; OPT-NEXT:            to label %[[BB3]] [label %BB4] +; OPT:       [[BB3]]: +; OPT-NEXT:    callbr void asm "", ""() +; OPT-NEXT:            to label %[[BB1]] [] +; OPT:       [[DUMMYRETURNBLOCK]]: +; OPT-NEXT:    ret void +; +; ISA-LABEL: nested_inf_loop_callbr: +; ISA:       ; %bb.0: ; %BB +; ISA-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; ISA-NEXT:    ;;#ASMSTART +; ISA-NEXT:    ;;#ASMEND +; ISA-NEXT:    ; implicit-def: $sgpr6_sgpr7 +; ISA-NEXT:    ; implicit-def: $sgpr4_sgpr5 +; ISA-NEXT:  .LBB1_1: ; %BB1 +; ISA-NEXT:    ; =>This Inner Loop Header: Depth=1 +; ISA-NEXT:    ;;#ASMSTART +; ISA-NEXT:    ;;#ASMEND +; ISA-NEXT:    s_andn2_b64 s[6:7], s[6:7], exec +; ISA-NEXT:    s_and_b64 s[8:9], s[4:5], exec +; ISA-NEXT:    s_or_b64 s[6:7], s[6:7], s[8:9] +; ISA-NEXT:  .LBB1_2: ; %BB3 +; ISA-NEXT:    ; in Loop: Header=BB1_1 Depth=1 +; ISA-NEXT:    ;;#ASMSTART +; ISA-NEXT:    ;;#ASMEND +; ISA-NEXT:    s_andn2_b64 s[4:5], s[4:5], exec +; ISA-NEXT:    s_and_b64 s[8:9], s[6:7], exec +; ISA-NEXT:    s_or_b64 s[4:5], s[4:5], s[8:9] +; ISA-NEXT:    s_branch .LBB1_1 +; ISA-NEXT:  .LBB1_3: ; Inline asm indirect target +; ISA-NEXT:    ; %BB2 +; ISA-NEXT:    ; in Loop: Header=BB1_1 Depth=1 +; ISA-NEXT:    ; Label of block must be emitted +; ISA-NEXT:    ;;#ASMSTART +; ISA-NEXT:    ;;#ASMEND +; ISA-NEXT:    s_mov_b64 s[6:7], -1 +; ISA-NEXT:    s_and_saveexec_b64 s[8:9], s[4:5] +; ISA-NEXT:    s_cbranch_execz .LBB1_5 +; ISA-NEXT:  ; %bb.4: ; %TransitionBlock.target.BB3 +; ISA-NEXT:    ; in Loop: Header=BB1_1 Depth=1 +; ISA-NEXT:    s_xor_b64 s[6:7], exec, -1 +; ISA-NEXT:  .LBB1_5: ; %loop.exit.guard +; ISA-NEXT:    ; in Loop: Header=BB1_1 Depth=1 +; ISA-NEXT:    s_or_b64 exec, exec, s[8:9] +; ISA-NEXT:    s_and_b64 vcc, exec, s[6:7] +; ISA-NEXT:    s_mov_b64 s[6:7], 0 +; ISA-NEXT:    s_cbranch_vccz .LBB1_2 +; ISA-NEXT:  ; %bb.6: ; %DummyReturnBlock +; ISA-NEXT:    s_setpc_b64 s[30:31] +BB: +  callbr void asm "", ""() to label %BB1 [] + +BB1: +  callbr void asm "", "r,!i"(i32 %0) to label %BB3 [label %BB2] + +BB2: +  callbr void asm "", ""() to label %BB4 [] + +BB4: +  callbr void asm "", "r,!i"(i32 %1) to label %BB3 [label %BB4] + +BB3: +  callbr void asm "", ""() to label %BB1 [] +} diff --git a/llvm/test/CodeGen/AMDGPU/si-unify-exit-multiple-unreachables.ll b/llvm/test/CodeGen/AMDGPU/si-unify-exit-multiple-unreachables.ll index 4cbe682..004c279 100644 --- a/llvm/test/CodeGen/AMDGPU/si-unify-exit-multiple-unreachables.ll +++ b/llvm/test/CodeGen/AMDGPU/si-unify-exit-multiple-unreachables.ll @@ -1,5 +1,5 @@ -; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc < %s -mtriple=amdgcn-amd-amdhsa -mcpu=gfx906 -stop-after=amdgpu-unify-divergent-exit-nodes | FileCheck %s --check-prefix=UNIFY +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt < %s -S -mtriple=amdgcn-amd-amdhsa -mcpu=gfx906 -stop-after=amdgpu-unify-divergent-exit-nodes | FileCheck %s --check-prefix=UNIFY  ; RUN: llc < %s -mtriple=amdgcn-amd-amdhsa -mcpu=gfx906 | FileCheck %s  declare void @llvm.trap() @@ -70,8 +70,33 @@ define amdgpu_kernel void @kernel(i32 %a, ptr addrspace(1) %x, i32 noundef %n) {  ; CHECK-NEXT:    s_mov_b64 s[2:3], -1  ; CHECK-NEXT:    s_trap 2  ; CHECK-NEXT:    s_branch .LBB0_4 - - +; UNIFY-LABEL: @kernel( +; UNIFY-NEXT:  entry: +; UNIFY-NEXT:    [[TID:%.*]] = call i32 @llvm.amdgcn.workitem.id.x() +; UNIFY-NEXT:    [[CMP:%.*]] = icmp eq i32 [[N:%.*]], 256 +; UNIFY-NEXT:    br i1 [[CMP]], label [[IF_THEN:%.*]], label [[IF_ELSE:%.*]] +; UNIFY:       if.then: +; UNIFY-NEXT:    [[CMP1:%.*]] = icmp eq i32 [[A:%.*]], 0 +; UNIFY-NEXT:    br i1 [[CMP1]], label [[IF_END6_SINK_SPLIT:%.*]], label [[COND_FALSE:%.*]] +; UNIFY:       cond.false: +; UNIFY-NEXT:    call void @llvm.trap() +; UNIFY-NEXT:    unreachable +; UNIFY:       if.else: +; UNIFY-NEXT:    [[CMP2:%.*]] = icmp ult i32 [[TID]], 10 +; UNIFY-NEXT:    br i1 [[CMP2]], label [[IF_THEN3:%.*]], label [[IF_END6:%.*]] +; UNIFY:       if.then3: +; UNIFY-NEXT:    [[CMP1_I7:%.*]] = icmp eq i32 [[A]], 0 +; UNIFY-NEXT:    br i1 [[CMP1_I7]], label [[IF_END6_SINK_SPLIT]], label [[COND_FALSE_I8:%.*]] +; UNIFY:       cond.false.i8: +; UNIFY-NEXT:    call void @llvm.trap() +; UNIFY-NEXT:    unreachable +; UNIFY:       if.end6.sink.split: +; UNIFY-NEXT:    [[X1:%.*]] = getelementptr inbounds i32, ptr addrspace(1) [[X:%.*]], i32 [[TID]] +; UNIFY-NEXT:    store i32 [[A]], ptr addrspace(1) [[X1]], align 4 +; UNIFY-NEXT:    br label [[IF_END6]] +; UNIFY:       if.end6: +; UNIFY-NEXT:    ret void +;  entry:    %tid = call i32 @llvm.amdgcn.workitem.id.x()    %cmp = icmp eq i32 %n, 256 @@ -105,5 +130,129 @@ if.end6.sink.split:  if.end6:    ret void  } -;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: -; UNIFY: {{.*}} + +define amdgpu_kernel void @kernel_callbr(i32 %a, ptr addrspace(1) %x, i32 noundef %n) { +; CHECK-LABEL: kernel_callbr: +; CHECK:       ; %bb.0: ; %entry +; CHECK-NEXT:    s_load_dword s1, s[8:9], 0x10 +; CHECK-NEXT:    s_load_dword s0, s[8:9], 0x0 +; CHECK-NEXT:    s_waitcnt lgkmcnt(0) +; CHECK-NEXT:    s_cmpk_eq_i32 s1, 0x100 +; CHECK-NEXT:    s_cselect_b64 s[2:3], -1, 0 +; CHECK-NEXT:    v_cndmask_b32_e64 v1, 0, 1, s[2:3] +; CHECK-NEXT:    ;;#ASMSTART +; CHECK-NEXT:    ;;#ASMEND +; CHECK-NEXT:  ; %bb.1: ; %if.then +; CHECK-NEXT:    s_cmp_eq_u32 s0, 0 +; CHECK-NEXT:    s_cselect_b64 s[2:3], -1, 0 +; CHECK-NEXT:    v_cndmask_b32_e64 v1, 0, 1, s[2:3] +; CHECK-NEXT:    ;;#ASMSTART +; CHECK-NEXT:    ;;#ASMEND +; CHECK-NEXT:  .LBB1_2: ; %if.end6.sink.split +; CHECK-NEXT:    s_load_dwordx2 s[2:3], s[8:9], 0x8 +; CHECK-NEXT:    v_lshlrev_b32_e32 v0, 2, v0 +; CHECK-NEXT:    v_mov_b32_e32 v1, s0 +; CHECK-NEXT:    s_waitcnt lgkmcnt(0) +; CHECK-NEXT:    global_store_dword v0, v1, s[2:3] +; CHECK-NEXT:    ;;#ASMSTART +; CHECK-NEXT:    ;;#ASMEND +; CHECK-NEXT:  .LBB1_3: ; Inline asm indirect target +; CHECK-NEXT:    ; %UnifiedReturnBlock +; CHECK-NEXT:    ; Label of block must be emitted +; CHECK-NEXT:    s_endpgm +; CHECK-NEXT:  .LBB1_4: ; Inline asm indirect target +; CHECK-NEXT:    ; %if.else +; CHECK-NEXT:    ; Label of block must be emitted +; CHECK-NEXT:    v_cmp_gt_u32_e32 vcc, 10, v0 +; CHECK-NEXT:    v_cndmask_b32_e64 v1, 0, 1, vcc +; CHECK-NEXT:    ;;#ASMSTART +; CHECK-NEXT:    ;;#ASMEND +; CHECK-NEXT:  ; %bb.5: ; %if.then3 +; CHECK-NEXT:    s_cmp_eq_u32 s0, 0 +; CHECK-NEXT:    s_cselect_b64 s[2:3], -1, 0 +; CHECK-NEXT:    v_cndmask_b32_e64 v1, 0, 1, s[2:3] +; CHECK-NEXT:    ;;#ASMSTART +; CHECK-NEXT:    ;;#ASMEND +; CHECK-NEXT:    s_branch .LBB1_2 +; CHECK-NEXT:  .LBB1_6: ; Inline asm indirect target +; CHECK-NEXT:    ; %cond.false.i8 +; CHECK-NEXT:    ; Label of block must be emitted +; CHECK-NEXT:  .LBB1_7: ; Inline asm indirect target +; CHECK-NEXT:    ; %cond.false +; CHECK-NEXT:    ; Label of block must be emitted +; CHECK-NEXT:    s_trap 2 +; CHECK-NEXT:    ; divergent unreachable +; CHECK-NEXT:    s_branch .LBB1_3 +; UNIFY-LABEL: @kernel_callbr( +; UNIFY-NEXT:  entry: +; UNIFY-NEXT:    [[TID:%.*]] = call i32 @llvm.amdgcn.workitem.id.x() +; UNIFY-NEXT:    [[CMP:%.*]] = icmp eq i32 [[N:%.*]], 256 +; UNIFY-NEXT:    [[CMP32:%.*]] = zext i1 [[CMP]] to i32 +; UNIFY-NEXT:    callbr void asm "", "r,!i"(i32 [[CMP32]]) +; UNIFY-NEXT:            to label [[IF_THEN:%.*]] [label %if.else] +; UNIFY:       if.then: +; UNIFY-NEXT:    [[CMP1:%.*]] = icmp eq i32 [[A:%.*]], 0 +; UNIFY-NEXT:    [[CMP1_32:%.*]] = zext i1 [[CMP1]] to i32 +; UNIFY-NEXT:    callbr void asm "", "r,!i"(i32 [[CMP1_32]]) +; UNIFY-NEXT:            to label [[IF_END6_SINK_SPLIT:%.*]] [label %cond.false] +; UNIFY:       cond.false: +; UNIFY-NEXT:    call void @llvm.trap() +; UNIFY-NEXT:    unreachable +; UNIFY:       if.else: +; UNIFY-NEXT:    [[CMP2:%.*]] = icmp ult i32 [[TID]], 10 +; UNIFY-NEXT:    [[CMP2_32:%.*]] = zext i1 [[CMP2]] to i32 +; UNIFY-NEXT:    callbr void asm "", "r,!i"(i32 [[CMP2_32]]) +; UNIFY-NEXT:            to label [[IF_THEN3:%.*]] [label %if.end6] +; UNIFY:       if.then3: +; UNIFY-NEXT:    [[CMP1_I7:%.*]] = icmp eq i32 [[A]], 0 +; UNIFY-NEXT:    [[CMP1_I7_32:%.*]] = zext i1 [[CMP1_I7]] to i32 +; UNIFY-NEXT:    callbr void asm "", "r,!i"(i32 [[CMP1_I7_32]]) +; UNIFY-NEXT:            to label [[IF_END6_SINK_SPLIT]] [label %cond.false.i8] +; UNIFY:       cond.false.i8: +; UNIFY-NEXT:    call void @llvm.trap() +; UNIFY-NEXT:    unreachable +; UNIFY:       if.end6.sink.split: +; UNIFY-NEXT:    [[X1:%.*]] = getelementptr inbounds i32, ptr addrspace(1) [[X:%.*]], i32 [[TID]] +; UNIFY-NEXT:    store i32 [[A]], ptr addrspace(1) [[X1]], align 4 +; UNIFY-NEXT:    callbr void asm "", ""() +; UNIFY-NEXT:            to label [[IF_END6:%.*]] [] +; UNIFY:       if.end6: +; UNIFY-NEXT:    ret void +; +entry: +  %tid = call i32 @llvm.amdgcn.workitem.id.x() +  %cmp = icmp eq i32 %n, 256 +  %cmp32 = zext i1 %cmp to i32 +  callbr void asm "", "r,!i"(i32 %cmp32) to label %if.then [label %if.else] + +if.then: +  %cmp1 = icmp eq i32 %a, 0 +  %cmp1_32 = zext i1 %cmp1 to i32 +  callbr void asm "", "r,!i"(i32 %cmp1_32) to label %if.end6.sink.split [label %cond.false] + +cond.false: +  call void @llvm.trap() +  unreachable + +if.else: +  %cmp2 = icmp ult i32 %tid, 10 +  %cmp2_32 = zext i1 %cmp2 to i32 +  callbr void asm "", "r,!i"(i32 %cmp2_32) to label %if.then3 [label %if.end6] + +if.then3: +  %cmp1.i7 = icmp eq i32 %a, 0 +  %cmp1.i7_32 = zext i1 %cmp1.i7 to i32 +  callbr void asm "", "r,!i"(i32 %cmp1.i7_32) to label %if.end6.sink.split [label %cond.false.i8] + +cond.false.i8: +  call void @llvm.trap() +  unreachable + +if.end6.sink.split: +  %x1 = getelementptr inbounds i32, ptr addrspace(1) %x, i32 %tid +  store i32 %a, ptr addrspace(1) %x1, align 4 +  callbr void asm "", ""() to label %if.end6 [] + +if.end6: +  ret void +} diff --git a/llvm/test/CodeGen/AMDGPU/update-phi.ll b/llvm/test/CodeGen/AMDGPU/update-phi.ll index 50666be..684dc1a 100644 --- a/llvm/test/CodeGen/AMDGPU/update-phi.ll +++ b/llvm/test/CodeGen/AMDGPU/update-phi.ll @@ -37,3 +37,42 @@ n28:                                               ; preds = %.loopexit, %n28  n31:                                               ; preds =    ret void  } + +define amdgpu_ps void @_amdgpu_ps_main_callbr() local_unnamed_addr #3 { +; IR-LABEL: @_amdgpu_ps_main_callbr( +; IR-NEXT:  .entry: +; IR-NEXT:    callbr void asm "", ""() +; IR-NEXT:            to label [[DOTLOOPEXIT:%.*]] [] +; IR:       .loopexit: +; IR-NEXT:    callbr void asm "", ""() +; IR-NEXT:            to label [[N28:%.*]] [] +; IR:       n28: +; IR-NEXT:    [[DOT01:%.*]] = phi float [ 0.000000e+00, [[DOTLOOPEXIT]] ], [ [[N29:%.*]], [[TRANSITIONBLOCK:%.*]] ] +; IR-NEXT:    [[N29]] = fadd float [[DOT01]], 1.000000e+00 +; IR-NEXT:    [[N30:%.*]] = fcmp ogt float [[N29]], 4.000000e+00 +; IR-NEXT:    [[N30_32:%.*]] = zext i1 [[N30]] to i32 +; IR-NEXT:    br i1 true, label [[TRANSITIONBLOCK]], label [[DUMMYRETURNBLOCK:%.*]] +; IR:       TransitionBlock: +; IR-NEXT:    callbr void asm "", "r,!i"(i32 [[N30_32]]) +; IR-NEXT:            to label [[DOTLOOPEXIT]] [label %n28] +; IR:       n31: +; IR-NEXT:    ret void +; IR:       DummyReturnBlock: +; IR-NEXT:    ret void +; +.entry: +  callbr void asm "", ""() to label %.loopexit [] + +.loopexit:                                        ; preds = %n28, %.entry +  callbr void asm "", ""() to label %n28 [] + +n28:                                               ; preds = %.loopexit, %n28 +  %.01 = phi float [ 0.000000e+00, %.loopexit ], [ %n29, %n28 ] +  %n29 = fadd float %.01, 1.0 +  %n30 = fcmp ogt float %n29, 4.000000e+00 +  %n30.32 = zext i1 %n30 to i32 +  callbr void asm "", "r,!i"(i32 %n30.32) to label %.loopexit [label %n28] + +n31:                                               ; preds = +  ret void +} diff --git a/llvm/test/CodeGen/ARM/llvm.sincos.ll b/llvm/test/CodeGen/ARM/llvm.sincos.ll index 9628405..1448fac 100644 --- a/llvm/test/CodeGen/ARM/llvm.sincos.ll +++ b/llvm/test/CodeGen/ARM/llvm.sincos.ll @@ -1,223 +1,1004 @@  ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2 -; RUN: llc -mtriple=thumbv7-gnu-linux < %s | FileCheck -check-prefixes=CHECK %s +; RUN: llc -mtriple=thumbv7-gnu-linux < %s | FileCheck -check-prefix=GNU %s +; RUN: llc -mtriple=armv7-linux-gnueabi -mcpu=cortex-a8 < %s | FileCheck -check-prefix=GNUEABI %s +; RUN: llc -mtriple=armv7-apple-ios6 -mcpu=cortex-a8 < %s | FileCheck -check-prefixes=IOS,IOS-NO-STRET %s +; RUN: llc -mtriple=armv7-apple-ios7 -mcpu=cortex-a8 < %s | FileCheck -check-prefixes=IOS,IOS-WITH-STRET %s +; RUN: llc -mtriple=thumbv7k-apple-watchos2.0 < %s | FileCheck -check-prefix=WATCHABI %s  define { half, half } @test_sincos_f16(half %a) { -; CHECK-LABEL: test_sincos_f16: -; CHECK:       @ %bb.0: -; CHECK-NEXT:    push {r4, lr} -; CHECK-NEXT:    sub sp, #8 -; CHECK-NEXT:    bl __gnu_h2f_ieee -; CHECK-NEXT:    add r1, sp, #4 -; CHECK-NEXT:    mov r2, sp -; CHECK-NEXT:    bl sincosf -; CHECK-NEXT:    ldr r0, [sp, #4] -; CHECK-NEXT:    bl __gnu_f2h_ieee -; CHECK-NEXT:    mov r4, r0 -; CHECK-NEXT:    ldr r0, [sp] -; CHECK-NEXT:    bl __gnu_f2h_ieee -; CHECK-NEXT:    mov r1, r0 -; CHECK-NEXT:    mov r0, r4 -; CHECK-NEXT:    add sp, #8 -; CHECK-NEXT:    pop {r4, pc} +; GNU-LABEL: test_sincos_f16: +; GNU:       @ %bb.0: +; GNU-NEXT:    push {r4, lr} +; GNU-NEXT:    sub sp, #8 +; GNU-NEXT:    bl __gnu_h2f_ieee +; GNU-NEXT:    add r1, sp, #4 +; GNU-NEXT:    mov r2, sp +; GNU-NEXT:    bl sincosf +; GNU-NEXT:    ldr r0, [sp, #4] +; GNU-NEXT:    bl __gnu_f2h_ieee +; GNU-NEXT:    mov r4, r0 +; GNU-NEXT:    ldr r0, [sp] +; GNU-NEXT:    bl __gnu_f2h_ieee +; GNU-NEXT:    mov r1, r0 +; GNU-NEXT:    mov r0, r4 +; GNU-NEXT:    add sp, #8 +; GNU-NEXT:    pop {r4, pc} +; +; GNUEABI-LABEL: test_sincos_f16: +; GNUEABI:       @ %bb.0: +; GNUEABI-NEXT:    .save {r4, lr} +; GNUEABI-NEXT:    push {r4, lr} +; GNUEABI-NEXT:    .pad #8 +; GNUEABI-NEXT:    sub sp, sp, #8 +; GNUEABI-NEXT:    bl __gnu_h2f_ieee +; GNUEABI-NEXT:    add r1, sp, #4 +; GNUEABI-NEXT:    mov r2, sp +; GNUEABI-NEXT:    bl sincosf +; GNUEABI-NEXT:    ldr r0, [sp, #4] +; GNUEABI-NEXT:    bl __gnu_f2h_ieee +; GNUEABI-NEXT:    mov r4, r0 +; GNUEABI-NEXT:    ldr r0, [sp] +; GNUEABI-NEXT:    bl __gnu_f2h_ieee +; GNUEABI-NEXT:    mov r1, r0 +; GNUEABI-NEXT:    mov r0, r4 +; GNUEABI-NEXT:    add sp, sp, #8 +; GNUEABI-NEXT:    pop {r4, pc} +; +; IOS-NO-STRET-LABEL: test_sincos_f16: +; IOS-NO-STRET:       @ %bb.0: +; IOS-NO-STRET-NEXT:    push {r4, r5, lr} +; IOS-NO-STRET-NEXT:    bl ___extendhfsf2 +; IOS-NO-STRET-NEXT:    mov r4, r0 +; IOS-NO-STRET-NEXT:    bl _sinf +; IOS-NO-STRET-NEXT:    bl ___truncsfhf2 +; IOS-NO-STRET-NEXT:    mov r5, r0 +; IOS-NO-STRET-NEXT:    mov r0, r4 +; IOS-NO-STRET-NEXT:    bl _cosf +; IOS-NO-STRET-NEXT:    bl ___truncsfhf2 +; IOS-NO-STRET-NEXT:    mov r1, r0 +; IOS-NO-STRET-NEXT:    mov r0, r5 +; IOS-NO-STRET-NEXT:    pop {r4, r5, pc} +; +; IOS-WITH-STRET-LABEL: test_sincos_f16: +; IOS-WITH-STRET:       @ %bb.0: +; IOS-WITH-STRET-NEXT:    push {r4, r5, lr} +; IOS-WITH-STRET-NEXT:    sub sp, sp, #8 +; IOS-WITH-STRET-NEXT:    bl ___extendhfsf2 +; IOS-WITH-STRET-NEXT:    mov r1, r0 +; IOS-WITH-STRET-NEXT:    mov r0, sp +; IOS-WITH-STRET-NEXT:    bl ___sincosf_stret +; IOS-WITH-STRET-NEXT:    ldm sp, {r0, r4} +; IOS-WITH-STRET-NEXT:    bl ___truncsfhf2 +; IOS-WITH-STRET-NEXT:    mov r5, r0 +; IOS-WITH-STRET-NEXT:    mov r0, r4 +; IOS-WITH-STRET-NEXT:    bl ___truncsfhf2 +; IOS-WITH-STRET-NEXT:    mov r1, r0 +; IOS-WITH-STRET-NEXT:    mov r0, r5 +; IOS-WITH-STRET-NEXT:    add sp, sp, #8 +; IOS-WITH-STRET-NEXT:    pop {r4, r5, pc} +; +; WATCHABI-LABEL: test_sincos_f16: +; WATCHABI:         .cfi_startproc +; WATCHABI-NEXT:  @ %bb.0: +; WATCHABI-NEXT:    push {r7, lr} +; WATCHABI-NEXT:    .cfi_def_cfa_offset 8 +; WATCHABI-NEXT:    .cfi_offset lr, -4 +; WATCHABI-NEXT:    .cfi_offset r7, -8 +; WATCHABI-NEXT:    sub sp, #8 +; WATCHABI-NEXT:    .cfi_def_cfa_offset 16 +; WATCHABI-NEXT:    vcvtb.f32.f16 s0, s0 +; WATCHABI-NEXT:    bl ___sincosf_stret +; WATCHABI-NEXT:    vcvtb.f16.f32 s0, s0 +; WATCHABI-NEXT:    vcvtb.f16.f32 s1, s1 +; WATCHABI-NEXT:    add sp, #8 +; WATCHABI-NEXT:    pop {r7, pc} +; WATCHABI-NEXT:    .cfi_endproc    %result = call { half, half } @llvm.sincos.f16(half %a)    ret { half, half } %result  }  define half @test_sincos_f16_only_use_sin(half %a) { -; CHECK-LABEL: test_sincos_f16_only_use_sin: -; CHECK:       @ %bb.0: -; CHECK-NEXT:    push {r7, lr} -; CHECK-NEXT:    sub sp, #8 -; CHECK-NEXT:    bl __gnu_h2f_ieee -; CHECK-NEXT:    add r1, sp, #4 -; CHECK-NEXT:    mov r2, sp -; CHECK-NEXT:    bl sincosf -; CHECK-NEXT:    ldr r0, [sp, #4] -; CHECK-NEXT:    bl __gnu_f2h_ieee -; CHECK-NEXT:    add sp, #8 -; CHECK-NEXT:    pop {r7, pc} +; GNU-LABEL: test_sincos_f16_only_use_sin: +; GNU:       @ %bb.0: +; GNU-NEXT:    push {r7, lr} +; GNU-NEXT:    sub sp, #8 +; GNU-NEXT:    bl __gnu_h2f_ieee +; GNU-NEXT:    add r1, sp, #4 +; GNU-NEXT:    mov r2, sp +; GNU-NEXT:    bl sincosf +; GNU-NEXT:    ldr r0, [sp, #4] +; GNU-NEXT:    bl __gnu_f2h_ieee +; GNU-NEXT:    add sp, #8 +; GNU-NEXT:    pop {r7, pc} +; +; GNUEABI-LABEL: test_sincos_f16_only_use_sin: +; GNUEABI:       @ %bb.0: +; GNUEABI-NEXT:    .save {r11, lr} +; GNUEABI-NEXT:    push {r11, lr} +; GNUEABI-NEXT:    .pad #8 +; GNUEABI-NEXT:    sub sp, sp, #8 +; GNUEABI-NEXT:    bl __gnu_h2f_ieee +; GNUEABI-NEXT:    add r1, sp, #4 +; GNUEABI-NEXT:    mov r2, sp +; GNUEABI-NEXT:    bl sincosf +; GNUEABI-NEXT:    ldr r0, [sp, #4] +; GNUEABI-NEXT:    bl __gnu_f2h_ieee +; GNUEABI-NEXT:    add sp, sp, #8 +; GNUEABI-NEXT:    pop {r11, pc} +; +; IOS-NO-STRET-LABEL: test_sincos_f16_only_use_sin: +; IOS-NO-STRET:       @ %bb.0: +; IOS-NO-STRET-NEXT:    push {lr} +; IOS-NO-STRET-NEXT:    bl ___extendhfsf2 +; IOS-NO-STRET-NEXT:    bl _sinf +; IOS-NO-STRET-NEXT:    bl ___truncsfhf2 +; IOS-NO-STRET-NEXT:    pop {lr} +; IOS-NO-STRET-NEXT:    bx lr +; +; IOS-WITH-STRET-LABEL: test_sincos_f16_only_use_sin: +; IOS-WITH-STRET:       @ %bb.0: +; IOS-WITH-STRET-NEXT:    push {lr} +; IOS-WITH-STRET-NEXT:    sub sp, sp, #8 +; IOS-WITH-STRET-NEXT:    bl ___extendhfsf2 +; IOS-WITH-STRET-NEXT:    mov r1, r0 +; IOS-WITH-STRET-NEXT:    mov r0, sp +; IOS-WITH-STRET-NEXT:    bl ___sincosf_stret +; IOS-WITH-STRET-NEXT:    ldr r0, [sp] +; IOS-WITH-STRET-NEXT:    bl ___truncsfhf2 +; IOS-WITH-STRET-NEXT:    add sp, sp, #8 +; IOS-WITH-STRET-NEXT:    pop {lr} +; IOS-WITH-STRET-NEXT:    bx lr +; +; WATCHABI-LABEL: test_sincos_f16_only_use_sin: +; WATCHABI:         .cfi_startproc +; WATCHABI-NEXT:  @ %bb.0: +; WATCHABI-NEXT:    push {r7, lr} +; WATCHABI-NEXT:    .cfi_def_cfa_offset 8 +; WATCHABI-NEXT:    .cfi_offset lr, -4 +; WATCHABI-NEXT:    .cfi_offset r7, -8 +; WATCHABI-NEXT:    sub sp, #8 +; WATCHABI-NEXT:    .cfi_def_cfa_offset 16 +; WATCHABI-NEXT:    vcvtb.f32.f16 s0, s0 +; WATCHABI-NEXT:    bl ___sincosf_stret +; WATCHABI-NEXT:    vcvtb.f16.f32 s0, s0 +; WATCHABI-NEXT:    add sp, #8 +; WATCHABI-NEXT:    pop {r7, pc} +; WATCHABI-NEXT:    .cfi_endproc    %result = call { half, half } @llvm.sincos.f16(half %a)    %result.0 = extractvalue { half, half } %result, 0    ret half %result.0  }  define half @test_sincos_f16_only_use_cos(half %a) { -; CHECK-LABEL: test_sincos_f16_only_use_cos: -; CHECK:       @ %bb.0: -; CHECK-NEXT:    push {r7, lr} -; CHECK-NEXT:    sub sp, #8 -; CHECK-NEXT:    bl __gnu_h2f_ieee -; CHECK-NEXT:    add r1, sp, #4 -; CHECK-NEXT:    mov r2, sp -; CHECK-NEXT:    bl sincosf -; CHECK-NEXT:    ldr r0, [sp] -; CHECK-NEXT:    bl __gnu_f2h_ieee -; CHECK-NEXT:    add sp, #8 -; CHECK-NEXT:    pop {r7, pc} +; GNU-LABEL: test_sincos_f16_only_use_cos: +; GNU:       @ %bb.0: +; GNU-NEXT:    push {r7, lr} +; GNU-NEXT:    sub sp, #8 +; GNU-NEXT:    bl __gnu_h2f_ieee +; GNU-NEXT:    add r1, sp, #4 +; GNU-NEXT:    mov r2, sp +; GNU-NEXT:    bl sincosf +; GNU-NEXT:    ldr r0, [sp] +; GNU-NEXT:    bl __gnu_f2h_ieee +; GNU-NEXT:    add sp, #8 +; GNU-NEXT:    pop {r7, pc} +; +; GNUEABI-LABEL: test_sincos_f16_only_use_cos: +; GNUEABI:       @ %bb.0: +; GNUEABI-NEXT:    .save {r11, lr} +; GNUEABI-NEXT:    push {r11, lr} +; GNUEABI-NEXT:    .pad #8 +; GNUEABI-NEXT:    sub sp, sp, #8 +; GNUEABI-NEXT:    bl __gnu_h2f_ieee +; GNUEABI-NEXT:    add r1, sp, #4 +; GNUEABI-NEXT:    mov r2, sp +; GNUEABI-NEXT:    bl sincosf +; GNUEABI-NEXT:    ldr r0, [sp] +; GNUEABI-NEXT:    bl __gnu_f2h_ieee +; GNUEABI-NEXT:    add sp, sp, #8 +; GNUEABI-NEXT:    pop {r11, pc} +; +; IOS-NO-STRET-LABEL: test_sincos_f16_only_use_cos: +; IOS-NO-STRET:       @ %bb.0: +; IOS-NO-STRET-NEXT:    push {lr} +; IOS-NO-STRET-NEXT:    bl ___extendhfsf2 +; IOS-NO-STRET-NEXT:    bl _cosf +; IOS-NO-STRET-NEXT:    bl ___truncsfhf2 +; IOS-NO-STRET-NEXT:    pop {lr} +; IOS-NO-STRET-NEXT:    bx lr +; +; IOS-WITH-STRET-LABEL: test_sincos_f16_only_use_cos: +; IOS-WITH-STRET:       @ %bb.0: +; IOS-WITH-STRET-NEXT:    push {lr} +; IOS-WITH-STRET-NEXT:    sub sp, sp, #8 +; IOS-WITH-STRET-NEXT:    bl ___extendhfsf2 +; IOS-WITH-STRET-NEXT:    mov r1, r0 +; IOS-WITH-STRET-NEXT:    mov r0, sp +; IOS-WITH-STRET-NEXT:    bl ___sincosf_stret +; IOS-WITH-STRET-NEXT:    ldr r0, [sp, #4] +; IOS-WITH-STRET-NEXT:    bl ___truncsfhf2 +; IOS-WITH-STRET-NEXT:    add sp, sp, #8 +; IOS-WITH-STRET-NEXT:    pop {lr} +; IOS-WITH-STRET-NEXT:    bx lr +; +; WATCHABI-LABEL: test_sincos_f16_only_use_cos: +; WATCHABI:         .cfi_startproc +; WATCHABI-NEXT:  @ %bb.0: +; WATCHABI-NEXT:    push {r7, lr} +; WATCHABI-NEXT:    .cfi_def_cfa_offset 8 +; WATCHABI-NEXT:    .cfi_offset lr, -4 +; WATCHABI-NEXT:    .cfi_offset r7, -8 +; WATCHABI-NEXT:    sub sp, #8 +; WATCHABI-NEXT:    .cfi_def_cfa_offset 16 +; WATCHABI-NEXT:    vcvtb.f32.f16 s0, s0 +; WATCHABI-NEXT:    bl ___sincosf_stret +; WATCHABI-NEXT:    vcvtb.f16.f32 s0, s1 +; WATCHABI-NEXT:    add sp, #8 +; WATCHABI-NEXT:    pop {r7, pc} +; WATCHABI-NEXT:    .cfi_endproc    %result = call { half, half } @llvm.sincos.f16(half %a)    %result.1 = extractvalue { half, half } %result, 1    ret half %result.1  }  define { <2 x half>, <2 x half> } @test_sincos_v2f16(<2 x half> %a) { -; CHECK-LABEL: test_sincos_v2f16: -; CHECK:       @ %bb.0: -; CHECK-NEXT:    push {r4, lr} -; CHECK-NEXT:    vpush {d8} -; CHECK-NEXT:    sub sp, #24 -; CHECK-NEXT:    mov r4, r0 -; CHECK-NEXT:    mov r0, r1 -; CHECK-NEXT:    bl __gnu_h2f_ieee -; CHECK-NEXT:    add r1, sp, #12 -; CHECK-NEXT:    add r2, sp, #8 -; CHECK-NEXT:    bl sincosf -; CHECK-NEXT:    mov r0, r4 -; CHECK-NEXT:    bl __gnu_h2f_ieee -; CHECK-NEXT:    add r1, sp, #4 -; CHECK-NEXT:    mov r2, sp -; CHECK-NEXT:    bl sincosf -; CHECK-NEXT:    ldr r0, [sp, #12] -; CHECK-NEXT:    bl __gnu_f2h_ieee -; CHECK-NEXT:    ldr r1, [sp, #4] -; CHECK-NEXT:    strh.w r0, [sp, #22] -; CHECK-NEXT:    mov r0, r1 -; CHECK-NEXT:    bl __gnu_f2h_ieee -; CHECK-NEXT:    strh.w r0, [sp, #20] -; CHECK-NEXT:    add r0, sp, #20 -; CHECK-NEXT:    vld1.32 {d8[0]}, [r0:32] -; CHECK-NEXT:    ldr r0, [sp, #8] -; CHECK-NEXT:    bl __gnu_f2h_ieee -; CHECK-NEXT:    ldr r1, [sp] -; CHECK-NEXT:    strh.w r0, [sp, #18] -; CHECK-NEXT:    mov r0, r1 -; CHECK-NEXT:    bl __gnu_f2h_ieee -; CHECK-NEXT:    strh.w r0, [sp, #16] -; CHECK-NEXT:    add r0, sp, #16 -; CHECK-NEXT:    vmovl.u16 q9, d8 -; CHECK-NEXT:    vld1.32 {d16[0]}, [r0:32] -; CHECK-NEXT:    vmovl.u16 q8, d16 -; CHECK-NEXT:    vmov.32 r0, d18[0] -; CHECK-NEXT:    vmov.32 r1, d18[1] -; CHECK-NEXT:    vmov.32 r2, d16[0] -; CHECK-NEXT:    vmov.32 r3, d16[1] -; CHECK-NEXT:    add sp, #24 -; CHECK-NEXT:    vpop {d8} -; CHECK-NEXT:    pop {r4, pc} +; GNU-LABEL: test_sincos_v2f16: +; GNU:       @ %bb.0: +; GNU-NEXT:    push {r4, lr} +; GNU-NEXT:    vpush {d8} +; GNU-NEXT:    sub sp, #24 +; GNU-NEXT:    mov r4, r0 +; GNU-NEXT:    mov r0, r1 +; GNU-NEXT:    bl __gnu_h2f_ieee +; GNU-NEXT:    add r1, sp, #12 +; GNU-NEXT:    add r2, sp, #8 +; GNU-NEXT:    bl sincosf +; GNU-NEXT:    mov r0, r4 +; GNU-NEXT:    bl __gnu_h2f_ieee +; GNU-NEXT:    add r1, sp, #4 +; GNU-NEXT:    mov r2, sp +; GNU-NEXT:    bl sincosf +; GNU-NEXT:    ldr r0, [sp, #12] +; GNU-NEXT:    bl __gnu_f2h_ieee +; GNU-NEXT:    ldr r1, [sp, #4] +; GNU-NEXT:    strh.w r0, [sp, #22] +; GNU-NEXT:    mov r0, r1 +; GNU-NEXT:    bl __gnu_f2h_ieee +; GNU-NEXT:    strh.w r0, [sp, #20] +; GNU-NEXT:    add r0, sp, #20 +; GNU-NEXT:    vld1.32 {d8[0]}, [r0:32] +; GNU-NEXT:    ldr r0, [sp, #8] +; GNU-NEXT:    bl __gnu_f2h_ieee +; GNU-NEXT:    ldr r1, [sp] +; GNU-NEXT:    strh.w r0, [sp, #18] +; GNU-NEXT:    mov r0, r1 +; GNU-NEXT:    bl __gnu_f2h_ieee +; GNU-NEXT:    strh.w r0, [sp, #16] +; GNU-NEXT:    add r0, sp, #16 +; GNU-NEXT:    vmovl.u16 q9, d8 +; GNU-NEXT:    vld1.32 {d16[0]}, [r0:32] +; GNU-NEXT:    vmovl.u16 q8, d16 +; GNU-NEXT:    vmov.32 r0, d18[0] +; GNU-NEXT:    vmov.32 r1, d18[1] +; GNU-NEXT:    vmov.32 r2, d16[0] +; GNU-NEXT:    vmov.32 r3, d16[1] +; GNU-NEXT:    add sp, #24 +; GNU-NEXT:    vpop {d8} +; GNU-NEXT:    pop {r4, pc} +; +; GNUEABI-LABEL: test_sincos_v2f16: +; GNUEABI:       @ %bb.0: +; GNUEABI-NEXT:    .save {r4, lr} +; GNUEABI-NEXT:    push {r4, lr} +; GNUEABI-NEXT:    .vsave {d8} +; GNUEABI-NEXT:    vpush {d8} +; GNUEABI-NEXT:    .pad #24 +; GNUEABI-NEXT:    sub sp, sp, #24 +; GNUEABI-NEXT:    mov r4, r0 +; GNUEABI-NEXT:    mov r0, r1 +; GNUEABI-NEXT:    bl __gnu_h2f_ieee +; GNUEABI-NEXT:    add r1, sp, #12 +; GNUEABI-NEXT:    add r2, sp, #8 +; GNUEABI-NEXT:    bl sincosf +; GNUEABI-NEXT:    mov r0, r4 +; GNUEABI-NEXT:    bl __gnu_h2f_ieee +; GNUEABI-NEXT:    add r1, sp, #4 +; GNUEABI-NEXT:    mov r2, sp +; GNUEABI-NEXT:    bl sincosf +; GNUEABI-NEXT:    ldr r0, [sp, #12] +; GNUEABI-NEXT:    bl __gnu_f2h_ieee +; GNUEABI-NEXT:    ldr r1, [sp, #4] +; GNUEABI-NEXT:    strh r0, [sp, #22] +; GNUEABI-NEXT:    mov r0, r1 +; GNUEABI-NEXT:    bl __gnu_f2h_ieee +; GNUEABI-NEXT:    strh r0, [sp, #20] +; GNUEABI-NEXT:    add r0, sp, #20 +; GNUEABI-NEXT:    vld1.32 {d8[0]}, [r0:32] +; GNUEABI-NEXT:    ldr r0, [sp, #8] +; GNUEABI-NEXT:    bl __gnu_f2h_ieee +; GNUEABI-NEXT:    ldr r1, [sp] +; GNUEABI-NEXT:    strh r0, [sp, #18] +; GNUEABI-NEXT:    mov r0, r1 +; GNUEABI-NEXT:    bl __gnu_f2h_ieee +; GNUEABI-NEXT:    strh r0, [sp, #16] +; GNUEABI-NEXT:    add r0, sp, #16 +; GNUEABI-NEXT:    vmovl.u16 q9, d8 +; GNUEABI-NEXT:    vld1.32 {d16[0]}, [r0:32] +; GNUEABI-NEXT:    vmovl.u16 q8, d16 +; GNUEABI-NEXT:    vmov.32 r0, d18[0] +; GNUEABI-NEXT:    vmov.32 r1, d18[1] +; GNUEABI-NEXT:    vmov.32 r2, d16[0] +; GNUEABI-NEXT:    vmov.32 r3, d16[1] +; GNUEABI-NEXT:    add sp, sp, #24 +; GNUEABI-NEXT:    vpop {d8} +; GNUEABI-NEXT:    pop {r4, pc} +; +; IOS-NO-STRET-LABEL: test_sincos_v2f16: +; IOS-NO-STRET:       @ %bb.0: +; IOS-NO-STRET-NEXT:    push {r4, r5, lr} +; IOS-NO-STRET-NEXT:    vpush {d8} +; IOS-NO-STRET-NEXT:    sub sp, sp, #8 +; IOS-NO-STRET-NEXT:    mov r5, r0 +; IOS-NO-STRET-NEXT:    mov r0, r1 +; IOS-NO-STRET-NEXT:    bl ___extendhfsf2 +; IOS-NO-STRET-NEXT:    mov r4, r0 +; IOS-NO-STRET-NEXT:    bl _sinf +; IOS-NO-STRET-NEXT:    bl ___truncsfhf2 +; IOS-NO-STRET-NEXT:    strh r0, [sp, #6] +; IOS-NO-STRET-NEXT:    mov r0, r5 +; IOS-NO-STRET-NEXT:    bl ___extendhfsf2 +; IOS-NO-STRET-NEXT:    mov r5, r0 +; IOS-NO-STRET-NEXT:    bl _sinf +; IOS-NO-STRET-NEXT:    bl ___truncsfhf2 +; IOS-NO-STRET-NEXT:    strh r0, [sp, #4] +; IOS-NO-STRET-NEXT:    add r0, sp, #4 +; IOS-NO-STRET-NEXT:    vld1.32 {d8[0]}, [r0:32] +; IOS-NO-STRET-NEXT:    mov r0, r4 +; IOS-NO-STRET-NEXT:    bl _cosf +; IOS-NO-STRET-NEXT:    bl ___truncsfhf2 +; IOS-NO-STRET-NEXT:    strh r0, [sp, #2] +; IOS-NO-STRET-NEXT:    mov r0, r5 +; IOS-NO-STRET-NEXT:    bl _cosf +; IOS-NO-STRET-NEXT:    bl ___truncsfhf2 +; IOS-NO-STRET-NEXT:    strh r0, [sp] +; IOS-NO-STRET-NEXT:    mov r0, sp +; IOS-NO-STRET-NEXT:    vld1.32 {d16[0]}, [r0:32] +; IOS-NO-STRET-NEXT:    vmovl.u16 q9, d8 +; IOS-NO-STRET-NEXT:    vmovl.u16 q8, d16 +; IOS-NO-STRET-NEXT:    vmov.32 r0, d18[0] +; IOS-NO-STRET-NEXT:    vmov.32 r1, d18[1] +; IOS-NO-STRET-NEXT:    vmov.32 r2, d16[0] +; IOS-NO-STRET-NEXT:    vmov.32 r3, d16[1] +; IOS-NO-STRET-NEXT:    add sp, sp, #8 +; IOS-NO-STRET-NEXT:    vpop {d8} +; IOS-NO-STRET-NEXT:    pop {r4, r5, pc} +; +; IOS-WITH-STRET-LABEL: test_sincos_v2f16: +; IOS-WITH-STRET:       @ %bb.0: +; IOS-WITH-STRET-NEXT:    push {r4, r5, lr} +; IOS-WITH-STRET-NEXT:    vpush {d8} +; IOS-WITH-STRET-NEXT:    sub sp, sp, #24 +; IOS-WITH-STRET-NEXT:    mov r4, r0 +; IOS-WITH-STRET-NEXT:    mov r0, r1 +; IOS-WITH-STRET-NEXT:    bl ___extendhfsf2 +; IOS-WITH-STRET-NEXT:    mov r1, r0 +; IOS-WITH-STRET-NEXT:    add r0, sp, #8 +; IOS-WITH-STRET-NEXT:    bl ___sincosf_stret +; IOS-WITH-STRET-NEXT:    mov r0, r4 +; IOS-WITH-STRET-NEXT:    bl ___extendhfsf2 +; IOS-WITH-STRET-NEXT:    mov r1, r0 +; IOS-WITH-STRET-NEXT:    mov r0, sp +; IOS-WITH-STRET-NEXT:    bl ___sincosf_stret +; IOS-WITH-STRET-NEXT:    ldr r0, [sp, #8] +; IOS-WITH-STRET-NEXT:    ldr r4, [sp, #12] +; IOS-WITH-STRET-NEXT:    bl ___truncsfhf2 +; IOS-WITH-STRET-NEXT:    ldm sp, {r1, r5} +; IOS-WITH-STRET-NEXT:    strh r0, [sp, #22] +; IOS-WITH-STRET-NEXT:    mov r0, r1 +; IOS-WITH-STRET-NEXT:    bl ___truncsfhf2 +; IOS-WITH-STRET-NEXT:    strh r0, [sp, #20] +; IOS-WITH-STRET-NEXT:    add r0, sp, #20 +; IOS-WITH-STRET-NEXT:    vld1.32 {d8[0]}, [r0:32] +; IOS-WITH-STRET-NEXT:    mov r0, r4 +; IOS-WITH-STRET-NEXT:    bl ___truncsfhf2 +; IOS-WITH-STRET-NEXT:    strh r0, [sp, #18] +; IOS-WITH-STRET-NEXT:    mov r0, r5 +; IOS-WITH-STRET-NEXT:    bl ___truncsfhf2 +; IOS-WITH-STRET-NEXT:    strh r0, [sp, #16] +; IOS-WITH-STRET-NEXT:    add r0, sp, #16 +; IOS-WITH-STRET-NEXT:    vmovl.u16 q9, d8 +; IOS-WITH-STRET-NEXT:    vld1.32 {d16[0]}, [r0:32] +; IOS-WITH-STRET-NEXT:    vmovl.u16 q8, d16 +; IOS-WITH-STRET-NEXT:    vmov.32 r0, d18[0] +; IOS-WITH-STRET-NEXT:    vmov.32 r1, d18[1] +; IOS-WITH-STRET-NEXT:    vmov.32 r2, d16[0] +; IOS-WITH-STRET-NEXT:    vmov.32 r3, d16[1] +; IOS-WITH-STRET-NEXT:    add sp, sp, #24 +; IOS-WITH-STRET-NEXT:    vpop {d8} +; IOS-WITH-STRET-NEXT:    pop {r4, r5, pc} +; +; WATCHABI-LABEL: test_sincos_v2f16: +; WATCHABI:         .cfi_startproc +; WATCHABI-NEXT:  @ %bb.0: +; WATCHABI-NEXT:    push {r7, lr} +; WATCHABI-NEXT:    .cfi_def_cfa_offset 8 +; WATCHABI-NEXT:    .cfi_offset lr, -4 +; WATCHABI-NEXT:    .cfi_offset r7, -8 +; WATCHABI-NEXT:    vpush {d10} +; WATCHABI-NEXT:    .cfi_def_cfa_offset 16 +; WATCHABI-NEXT:    vpush {d8} +; WATCHABI-NEXT:    .cfi_def_cfa_offset 24 +; WATCHABI-NEXT:    .cfi_offset d10, -16 +; WATCHABI-NEXT:    .cfi_offset d8, -24 +; WATCHABI-NEXT:    sub sp, #8 +; WATCHABI-NEXT:    .cfi_def_cfa_offset 32 +; WATCHABI-NEXT:    vmov.f32 s16, s0 +; WATCHABI-NEXT:    vcvtb.f32.f16 s0, s1 +; WATCHABI-NEXT:    bl ___sincosf_stret +; WATCHABI-NEXT:    vcvtb.f16.f32 s0, s0 +; WATCHABI-NEXT:    vcvtb.f32.f16 s4, s16 +; WATCHABI-NEXT:    vmov r0, s0 +; WATCHABI-NEXT:    vmov.f32 s0, s4 +; WATCHABI-NEXT:    vmov.f32 s20, s1 +; WATCHABI-NEXT:    strh.w r0, [sp, #6] +; WATCHABI-NEXT:    bl ___sincosf_stret +; WATCHABI-NEXT:    vcvtb.f16.f32 s0, s0 +; WATCHABI-NEXT:    vmov r0, s0 +; WATCHABI-NEXT:    vcvtb.f16.f32 s0, s20 +; WATCHABI-NEXT:    strh.w r0, [sp, #4] +; WATCHABI-NEXT:    add r0, sp, #4 +; WATCHABI-NEXT:    vld1.32 {d16[0]}, [r0:32] +; WATCHABI-NEXT:    vmov r0, s0 +; WATCHABI-NEXT:    vcvtb.f16.f32 s0, s1 +; WATCHABI-NEXT:    strh.w r0, [sp, #2] +; WATCHABI-NEXT:    vmov r0, s0 +; WATCHABI-NEXT:    vmovl.u16 q0, d16 +; WATCHABI-NEXT:    strh.w r0, [sp] +; WATCHABI-NEXT:    mov r0, sp +; WATCHABI-NEXT:    vld1.32 {d18[0]}, [r0:32] +; WATCHABI-NEXT:    vmovl.u16 q1, d18 +; WATCHABI-NEXT:    vmov.f32 s2, s4 +; WATCHABI-NEXT:    vmov.f32 s3, s5 +; WATCHABI-NEXT:    add sp, #8 +; WATCHABI-NEXT:    vpop {d8} +; WATCHABI-NEXT:    vpop {d10} +; WATCHABI-NEXT:    pop {r7, pc} +; WATCHABI-NEXT:    .cfi_endproc    %result = call { <2 x half>, <2 x half> } @llvm.sincos.v2f16(<2 x half> %a)    ret { <2 x half>, <2 x half> } %result  }  define { float, float } @test_sincos_f32(float %a) { -; CHECK-LABEL: test_sincos_f32: -; CHECK:       @ %bb.0: -; CHECK-NEXT:    push {r7, lr} -; CHECK-NEXT:    sub sp, #8 -; CHECK-NEXT:    add r1, sp, #4 -; CHECK-NEXT:    mov r2, sp -; CHECK-NEXT:    bl sincosf -; CHECK-NEXT:    ldrd r1, r0, [sp], #8 -; CHECK-NEXT:    pop {r7, pc} +; GNU-LABEL: test_sincos_f32: +; GNU:       @ %bb.0: +; GNU-NEXT:    push {r7, lr} +; GNU-NEXT:    sub sp, #8 +; GNU-NEXT:    add r1, sp, #4 +; GNU-NEXT:    mov r2, sp +; GNU-NEXT:    bl sincosf +; GNU-NEXT:    ldrd r1, r0, [sp], #8 +; GNU-NEXT:    pop {r7, pc} +; +; GNUEABI-LABEL: test_sincos_f32: +; GNUEABI:       @ %bb.0: +; GNUEABI-NEXT:    .save {r11, lr} +; GNUEABI-NEXT:    push {r11, lr} +; GNUEABI-NEXT:    .pad #8 +; GNUEABI-NEXT:    sub sp, sp, #8 +; GNUEABI-NEXT:    add r1, sp, #4 +; GNUEABI-NEXT:    mov r2, sp +; GNUEABI-NEXT:    bl sincosf +; GNUEABI-NEXT:    ldr r0, [sp, #4] +; GNUEABI-NEXT:    ldr r1, [sp], #8 +; GNUEABI-NEXT:    pop {r11, pc} +; +; IOS-NO-STRET-LABEL: test_sincos_f32: +; IOS-NO-STRET:       @ %bb.0: +; IOS-NO-STRET-NEXT:    push {r4, r5, lr} +; IOS-NO-STRET-NEXT:    mov r4, r0 +; IOS-NO-STRET-NEXT:    bl _sinf +; IOS-NO-STRET-NEXT:    mov r5, r0 +; IOS-NO-STRET-NEXT:    mov r0, r4 +; IOS-NO-STRET-NEXT:    bl _cosf +; IOS-NO-STRET-NEXT:    mov r1, r0 +; IOS-NO-STRET-NEXT:    mov r0, r5 +; IOS-NO-STRET-NEXT:    pop {r4, r5, pc} +; +; IOS-WITH-STRET-LABEL: test_sincos_f32: +; IOS-WITH-STRET:       @ %bb.0: +; IOS-WITH-STRET-NEXT:    push {lr} +; IOS-WITH-STRET-NEXT:    sub sp, sp, #8 +; IOS-WITH-STRET-NEXT:    mov r1, r0 +; IOS-WITH-STRET-NEXT:    mov r0, sp +; IOS-WITH-STRET-NEXT:    bl ___sincosf_stret +; IOS-WITH-STRET-NEXT:    pop {r0, r1} +; IOS-WITH-STRET-NEXT:    pop {lr} +; IOS-WITH-STRET-NEXT:    bx lr +; +; WATCHABI-LABEL: test_sincos_f32: +; WATCHABI:         .cfi_startproc +; WATCHABI-NEXT:  @ %bb.0: +; WATCHABI-NEXT:    push {r7, lr} +; WATCHABI-NEXT:    .cfi_def_cfa_offset 8 +; WATCHABI-NEXT:    .cfi_offset lr, -4 +; WATCHABI-NEXT:    .cfi_offset r7, -8 +; WATCHABI-NEXT:    sub sp, #8 +; WATCHABI-NEXT:    .cfi_def_cfa_offset 16 +; WATCHABI-NEXT:    bl ___sincosf_stret +; WATCHABI-NEXT:    add sp, #8 +; WATCHABI-NEXT:    pop {r7, pc} +; WATCHABI-NEXT:    .cfi_endproc    %result = call { float, float } @llvm.sincos.f32(float %a)    ret { float, float } %result  }  define { <2 x float>, <2 x float> } @test_sincos_v2f32(<2 x float> %a) { -; CHECK-LABEL: test_sincos_v2f32: -; CHECK:       @ %bb.0: -; CHECK-NEXT:    push {r7, lr} -; CHECK-NEXT:    vpush {d8} -; CHECK-NEXT:    sub sp, #16 -; CHECK-NEXT:    vmov d8, r0, r1 -; CHECK-NEXT:    add r1, sp, #4 -; CHECK-NEXT:    mov r2, sp -; CHECK-NEXT:    vmov r0, s17 -; CHECK-NEXT:    bl sincosf -; CHECK-NEXT:    vmov r0, s16 -; CHECK-NEXT:    add r1, sp, #12 -; CHECK-NEXT:    add r2, sp, #8 -; CHECK-NEXT:    bl sincosf -; CHECK-NEXT:    vldr s1, [sp, #4] -; CHECK-NEXT:    vldr s3, [sp] -; CHECK-NEXT:    vldr s0, [sp, #12] -; CHECK-NEXT:    vldr s2, [sp, #8] -; CHECK-NEXT:    vmov r0, r1, d0 -; CHECK-NEXT:    vmov r2, r3, d1 -; CHECK-NEXT:    add sp, #16 -; CHECK-NEXT:    vpop {d8} -; CHECK-NEXT:    pop {r7, pc} +; GNU-LABEL: test_sincos_v2f32: +; GNU:       @ %bb.0: +; GNU-NEXT:    push {r7, lr} +; GNU-NEXT:    vpush {d8} +; GNU-NEXT:    sub sp, #16 +; GNU-NEXT:    vmov d8, r0, r1 +; GNU-NEXT:    add r1, sp, #4 +; GNU-NEXT:    mov r2, sp +; GNU-NEXT:    vmov r0, s17 +; GNU-NEXT:    bl sincosf +; GNU-NEXT:    vmov r0, s16 +; GNU-NEXT:    add r1, sp, #12 +; GNU-NEXT:    add r2, sp, #8 +; GNU-NEXT:    bl sincosf +; GNU-NEXT:    vldr s1, [sp, #4] +; GNU-NEXT:    vldr s3, [sp] +; GNU-NEXT:    vldr s0, [sp, #12] +; GNU-NEXT:    vldr s2, [sp, #8] +; GNU-NEXT:    vmov r0, r1, d0 +; GNU-NEXT:    vmov r2, r3, d1 +; GNU-NEXT:    add sp, #16 +; GNU-NEXT:    vpop {d8} +; GNU-NEXT:    pop {r7, pc} +; +; GNUEABI-LABEL: test_sincos_v2f32: +; GNUEABI:       @ %bb.0: +; GNUEABI-NEXT:    .save {r11, lr} +; GNUEABI-NEXT:    push {r11, lr} +; GNUEABI-NEXT:    .vsave {d8} +; GNUEABI-NEXT:    vpush {d8} +; GNUEABI-NEXT:    .pad #16 +; GNUEABI-NEXT:    sub sp, sp, #16 +; GNUEABI-NEXT:    vmov d8, r0, r1 +; GNUEABI-NEXT:    add r1, sp, #4 +; GNUEABI-NEXT:    mov r2, sp +; GNUEABI-NEXT:    vmov r0, s17 +; GNUEABI-NEXT:    bl sincosf +; GNUEABI-NEXT:    vmov r0, s16 +; GNUEABI-NEXT:    add r1, sp, #12 +; GNUEABI-NEXT:    add r2, sp, #8 +; GNUEABI-NEXT:    bl sincosf +; GNUEABI-NEXT:    vldr s1, [sp, #4] +; GNUEABI-NEXT:    vldr s3, [sp] +; GNUEABI-NEXT:    vldr s0, [sp, #12] +; GNUEABI-NEXT:    vldr s2, [sp, #8] +; GNUEABI-NEXT:    vmov r0, r1, d0 +; GNUEABI-NEXT:    vmov r2, r3, d1 +; GNUEABI-NEXT:    add sp, sp, #16 +; GNUEABI-NEXT:    vpop {d8} +; GNUEABI-NEXT:    pop {r11, pc} +; +; IOS-NO-STRET-LABEL: test_sincos_v2f32: +; IOS-NO-STRET:       @ %bb.0: +; IOS-NO-STRET-NEXT:    push {r4, r5, r6, r7, lr} +; IOS-NO-STRET-NEXT:    vpush {d8} +; IOS-NO-STRET-NEXT:    vmov d8, r0, r1 +; IOS-NO-STRET-NEXT:    vmov r4, s17 +; IOS-NO-STRET-NEXT:    mov r0, r4 +; IOS-NO-STRET-NEXT:    bl _sinf +; IOS-NO-STRET-NEXT:    mov r5, r0 +; IOS-NO-STRET-NEXT:    mov r0, r4 +; IOS-NO-STRET-NEXT:    bl _cosf +; IOS-NO-STRET-NEXT:    vmov r6, s16 +; IOS-NO-STRET-NEXT:    mov r4, r0 +; IOS-NO-STRET-NEXT:    mov r0, r6 +; IOS-NO-STRET-NEXT:    bl _sinf +; IOS-NO-STRET-NEXT:    mov r7, r0 +; IOS-NO-STRET-NEXT:    mov r0, r6 +; IOS-NO-STRET-NEXT:    bl _cosf +; IOS-NO-STRET-NEXT:    mov r2, r0 +; IOS-NO-STRET-NEXT:    mov r0, r7 +; IOS-NO-STRET-NEXT:    mov r1, r5 +; IOS-NO-STRET-NEXT:    mov r3, r4 +; IOS-NO-STRET-NEXT:    vpop {d8} +; IOS-NO-STRET-NEXT:    pop {r4, r5, r6, r7, pc} +; +; IOS-WITH-STRET-LABEL: test_sincos_v2f32: +; IOS-WITH-STRET:       @ %bb.0: +; IOS-WITH-STRET-NEXT:    push {lr} +; IOS-WITH-STRET-NEXT:    vpush {d8} +; IOS-WITH-STRET-NEXT:    sub sp, sp, #16 +; IOS-WITH-STRET-NEXT:    vmov d8, r0, r1 +; IOS-WITH-STRET-NEXT:    mov r0, sp +; IOS-WITH-STRET-NEXT:    vmov r1, s17 +; IOS-WITH-STRET-NEXT:    bl ___sincosf_stret +; IOS-WITH-STRET-NEXT:    vmov r1, s16 +; IOS-WITH-STRET-NEXT:    add r0, sp, #8 +; IOS-WITH-STRET-NEXT:    bl ___sincosf_stret +; IOS-WITH-STRET-NEXT:    vldr s1, [sp] +; IOS-WITH-STRET-NEXT:    vldr s3, [sp, #4] +; IOS-WITH-STRET-NEXT:    vldr s0, [sp, #8] +; IOS-WITH-STRET-NEXT:    vldr s2, [sp, #12] +; IOS-WITH-STRET-NEXT:    vmov r0, r1, d0 +; IOS-WITH-STRET-NEXT:    vmov r2, r3, d1 +; IOS-WITH-STRET-NEXT:    add sp, sp, #16 +; IOS-WITH-STRET-NEXT:    vpop {d8} +; IOS-WITH-STRET-NEXT:    pop {lr} +; IOS-WITH-STRET-NEXT:    bx lr +; +; WATCHABI-LABEL: test_sincos_v2f32: +; WATCHABI:         .cfi_startproc +; WATCHABI-NEXT:  @ %bb.0: +; WATCHABI-NEXT:    push {r7, lr} +; WATCHABI-NEXT:    .cfi_def_cfa_offset 8 +; WATCHABI-NEXT:    .cfi_offset lr, -4 +; WATCHABI-NEXT:    .cfi_offset r7, -8 +; WATCHABI-NEXT:    vpush {d8, d9, d10} +; WATCHABI-NEXT:    .cfi_def_cfa_offset 32 +; WATCHABI-NEXT:    .cfi_offset d10, -16 +; WATCHABI-NEXT:    .cfi_offset d9, -24 +; WATCHABI-NEXT:    .cfi_offset d8, -32 +; WATCHABI-NEXT:    vmov.f64 d8, d0 +; WATCHABI-NEXT:    vmov.f32 s0, s17 +; WATCHABI-NEXT:    bl ___sincosf_stret +; WATCHABI-NEXT:    vmov.f32 s19, s0 +; WATCHABI-NEXT:    vmov.f32 s0, s16 +; WATCHABI-NEXT:    vmov.f32 s21, s1 +; WATCHABI-NEXT:    bl ___sincosf_stret +; WATCHABI-NEXT:    vmov.f32 s20, s1 +; WATCHABI-NEXT:    vmov.f32 s18, s0 +; WATCHABI-NEXT:    vmov.f64 d1, d10 +; WATCHABI-NEXT:    vmov.f64 d0, d9 +; WATCHABI-NEXT:    vpop {d8, d9, d10} +; WATCHABI-NEXT:    pop {r7, pc} +; WATCHABI-NEXT:    .cfi_endproc    %result = call { <2 x float>, <2 x float> } @llvm.sincos.v2f32(<2 x float> %a)    ret { <2 x float>, <2 x float> } %result  }  define { double, double } @test_sincos_f64(double %a) { -; CHECK-LABEL: test_sincos_f64: -; CHECK:       @ %bb.0: -; CHECK-NEXT:    push {r7, lr} -; CHECK-NEXT:    sub sp, #16 -; CHECK-NEXT:    add r2, sp, #8 -; CHECK-NEXT:    mov r3, sp -; CHECK-NEXT:    bl sincos -; CHECK-NEXT:    ldrd r0, r1, [sp, #8] -; CHECK-NEXT:    ldrd r2, r3, [sp], #16 -; CHECK-NEXT:    pop {r7, pc} +; GNU-LABEL: test_sincos_f64: +; GNU:       @ %bb.0: +; GNU-NEXT:    push {r7, lr} +; GNU-NEXT:    sub sp, #16 +; GNU-NEXT:    add r2, sp, #8 +; GNU-NEXT:    mov r3, sp +; GNU-NEXT:    bl sincos +; GNU-NEXT:    ldrd r0, r1, [sp, #8] +; GNU-NEXT:    ldrd r2, r3, [sp], #16 +; GNU-NEXT:    pop {r7, pc} +; +; GNUEABI-LABEL: test_sincos_f64: +; GNUEABI:       @ %bb.0: +; GNUEABI-NEXT:    .save {r11, lr} +; GNUEABI-NEXT:    push {r11, lr} +; GNUEABI-NEXT:    .pad #16 +; GNUEABI-NEXT:    sub sp, sp, #16 +; GNUEABI-NEXT:    add r2, sp, #8 +; GNUEABI-NEXT:    mov r3, sp +; GNUEABI-NEXT:    bl sincos +; GNUEABI-NEXT:    ldm sp, {r2, r3} +; GNUEABI-NEXT:    ldr r0, [sp, #8] +; GNUEABI-NEXT:    ldr r1, [sp, #12] +; GNUEABI-NEXT:    add sp, sp, #16 +; GNUEABI-NEXT:    pop {r11, pc} +; +; IOS-NO-STRET-LABEL: test_sincos_f64: +; IOS-NO-STRET:       @ %bb.0: +; IOS-NO-STRET-NEXT:    push {r4, r5, r6, r7, lr} +; IOS-NO-STRET-NEXT:    mov r4, r1 +; IOS-NO-STRET-NEXT:    mov r5, r0 +; IOS-NO-STRET-NEXT:    bl _sin +; IOS-NO-STRET-NEXT:    mov r6, r0 +; IOS-NO-STRET-NEXT:    mov r7, r1 +; IOS-NO-STRET-NEXT:    mov r0, r5 +; IOS-NO-STRET-NEXT:    mov r1, r4 +; IOS-NO-STRET-NEXT:    bl _cos +; IOS-NO-STRET-NEXT:    mov r2, r0 +; IOS-NO-STRET-NEXT:    mov r3, r1 +; IOS-NO-STRET-NEXT:    mov r0, r6 +; IOS-NO-STRET-NEXT:    mov r1, r7 +; IOS-NO-STRET-NEXT:    pop {r4, r5, r6, r7, pc} +; +; IOS-WITH-STRET-LABEL: test_sincos_f64: +; IOS-WITH-STRET:       @ %bb.0: +; IOS-WITH-STRET-NEXT:    push {lr} +; IOS-WITH-STRET-NEXT:    sub sp, sp, #16 +; IOS-WITH-STRET-NEXT:    mov r2, r1 +; IOS-WITH-STRET-NEXT:    mov r1, r0 +; IOS-WITH-STRET-NEXT:    mov r0, sp +; IOS-WITH-STRET-NEXT:    bl ___sincos_stret +; IOS-WITH-STRET-NEXT:    vldr d16, [sp, #8] +; IOS-WITH-STRET-NEXT:    ldm sp, {r0, r1} +; IOS-WITH-STRET-NEXT:    vmov r2, r3, d16 +; IOS-WITH-STRET-NEXT:    add sp, sp, #16 +; IOS-WITH-STRET-NEXT:    pop {lr} +; IOS-WITH-STRET-NEXT:    bx lr +; +; WATCHABI-LABEL: test_sincos_f64: +; WATCHABI:         .cfi_startproc +; WATCHABI-NEXT:  @ %bb.0: +; WATCHABI-NEXT:    push {r7, lr} +; WATCHABI-NEXT:    .cfi_def_cfa_offset 8 +; WATCHABI-NEXT:    .cfi_offset lr, -4 +; WATCHABI-NEXT:    .cfi_offset r7, -8 +; WATCHABI-NEXT:    sub sp, #8 +; WATCHABI-NEXT:    .cfi_def_cfa_offset 16 +; WATCHABI-NEXT:    bl ___sincos_stret +; WATCHABI-NEXT:    add sp, #8 +; WATCHABI-NEXT:    pop {r7, pc} +; WATCHABI-NEXT:    .cfi_endproc    %result = call { double, double } @llvm.sincos.f64(double %a)    ret { double, double } %result  }  define { <2 x double>, <2 x double> } @test_sincos_v2f64(<2 x double> %a) { -; CHECK-LABEL: test_sincos_v2f64: -; CHECK:       @ %bb.0: -; CHECK-NEXT:    push {r4, lr} -; CHECK-NEXT:    sub sp, #32 -; CHECK-NEXT:    mov r1, r3 -; CHECK-NEXT:    mov r12, r2 -; CHECK-NEXT:    add r2, sp, #24 -; CHECK-NEXT:    add r3, sp, #16 -; CHECK-NEXT:    mov r4, r0 -; CHECK-NEXT:    mov r0, r12 -; CHECK-NEXT:    bl sincos -; CHECK-NEXT:    ldrd r0, r1, [sp, #40] -; CHECK-NEXT:    add r2, sp, #8 -; CHECK-NEXT:    mov r3, sp -; CHECK-NEXT:    bl sincos -; CHECK-NEXT:    vldr d19, [sp, #8] -; CHECK-NEXT:    vldr d18, [sp, #24] -; CHECK-NEXT:    vldr d17, [sp] -; CHECK-NEXT:    vldr d16, [sp, #16] -; CHECK-NEXT:    vst1.64 {d18, d19}, [r4]! -; CHECK-NEXT:    vst1.64 {d16, d17}, [r4] -; CHECK-NEXT:    add sp, #32 -; CHECK-NEXT:    pop {r4, pc} +; GNU-LABEL: test_sincos_v2f64: +; GNU:       @ %bb.0: +; GNU-NEXT:    push {r4, lr} +; GNU-NEXT:    sub sp, #32 +; GNU-NEXT:    mov r1, r3 +; GNU-NEXT:    mov r12, r2 +; GNU-NEXT:    add r2, sp, #24 +; GNU-NEXT:    add r3, sp, #16 +; GNU-NEXT:    mov r4, r0 +; GNU-NEXT:    mov r0, r12 +; GNU-NEXT:    bl sincos +; GNU-NEXT:    ldrd r0, r1, [sp, #40] +; GNU-NEXT:    add r2, sp, #8 +; GNU-NEXT:    mov r3, sp +; GNU-NEXT:    bl sincos +; GNU-NEXT:    vldr d19, [sp, #8] +; GNU-NEXT:    vldr d18, [sp, #24] +; GNU-NEXT:    vldr d17, [sp] +; GNU-NEXT:    vldr d16, [sp, #16] +; GNU-NEXT:    vst1.64 {d18, d19}, [r4]! +; GNU-NEXT:    vst1.64 {d16, d17}, [r4] +; GNU-NEXT:    add sp, #32 +; GNU-NEXT:    pop {r4, pc} +; +; GNUEABI-LABEL: test_sincos_v2f64: +; GNUEABI:       @ %bb.0: +; GNUEABI-NEXT:    .save {r4, lr} +; GNUEABI-NEXT:    push {r4, lr} +; GNUEABI-NEXT:    .pad #32 +; GNUEABI-NEXT:    sub sp, sp, #32 +; GNUEABI-NEXT:    mov r1, r3 +; GNUEABI-NEXT:    mov r12, r2 +; GNUEABI-NEXT:    add r2, sp, #24 +; GNUEABI-NEXT:    add r3, sp, #16 +; GNUEABI-NEXT:    mov r4, r0 +; GNUEABI-NEXT:    mov r0, r12 +; GNUEABI-NEXT:    bl sincos +; GNUEABI-NEXT:    ldr r0, [sp, #40] +; GNUEABI-NEXT:    add r2, sp, #8 +; GNUEABI-NEXT:    ldr r1, [sp, #44] +; GNUEABI-NEXT:    mov r3, sp +; GNUEABI-NEXT:    bl sincos +; GNUEABI-NEXT:    vldr d19, [sp, #8] +; GNUEABI-NEXT:    vldr d18, [sp, #24] +; GNUEABI-NEXT:    vldr d17, [sp] +; GNUEABI-NEXT:    vldr d16, [sp, #16] +; GNUEABI-NEXT:    vst1.64 {d18, d19}, [r4]! +; GNUEABI-NEXT:    vst1.64 {d16, d17}, [r4] +; GNUEABI-NEXT:    add sp, sp, #32 +; GNUEABI-NEXT:    pop {r4, pc} +; +; IOS-NO-STRET-LABEL: test_sincos_v2f64: +; IOS-NO-STRET:       @ %bb.0: +; IOS-NO-STRET-NEXT:    push {r4, r5, r6, r7, r8, r10, r11, lr} +; IOS-NO-STRET-NEXT:    vpush {d8, d9, d10, d11} +; IOS-NO-STRET-NEXT:    ldr r8, [sp, #64] +; IOS-NO-STRET-NEXT:    mov r7, r1 +; IOS-NO-STRET-NEXT:    mov r4, r0 +; IOS-NO-STRET-NEXT:    mov r0, r3 +; IOS-NO-STRET-NEXT:    mov r6, r3 +; IOS-NO-STRET-NEXT:    mov r10, r2 +; IOS-NO-STRET-NEXT:    mov r1, r8 +; IOS-NO-STRET-NEXT:    bl _sin +; IOS-NO-STRET-NEXT:    mov r11, r0 +; IOS-NO-STRET-NEXT:    mov r5, r1 +; IOS-NO-STRET-NEXT:    mov r0, r6 +; IOS-NO-STRET-NEXT:    mov r1, r8 +; IOS-NO-STRET-NEXT:    bl _cos +; IOS-NO-STRET-NEXT:    vmov d9, r0, r1 +; IOS-NO-STRET-NEXT:    mov r0, r7 +; IOS-NO-STRET-NEXT:    mov r1, r10 +; IOS-NO-STRET-NEXT:    vmov d11, r11, r5 +; IOS-NO-STRET-NEXT:    bl _sin +; IOS-NO-STRET-NEXT:    vmov d10, r0, r1 +; IOS-NO-STRET-NEXT:    mov r0, r7 +; IOS-NO-STRET-NEXT:    mov r1, r10 +; IOS-NO-STRET-NEXT:    bl _cos +; IOS-NO-STRET-NEXT:    vmov d8, r0, r1 +; IOS-NO-STRET-NEXT:    vst1.32 {d10, d11}, [r4]! +; IOS-NO-STRET-NEXT:    vst1.32 {d8, d9}, [r4] +; IOS-NO-STRET-NEXT:    vpop {d8, d9, d10, d11} +; IOS-NO-STRET-NEXT:    pop {r4, r5, r6, r7, r8, r10, r11, pc} +; +; IOS-WITH-STRET-LABEL: test_sincos_v2f64: +; IOS-WITH-STRET:       @ %bb.0: +; IOS-WITH-STRET-NEXT:    push {r4, r5, r6, lr} +; IOS-WITH-STRET-NEXT:    sub sp, sp, #32 +; IOS-WITH-STRET-NEXT:    mov r4, r2 +; IOS-WITH-STRET-NEXT:    ldr r2, [sp, #48] +; IOS-WITH-STRET-NEXT:    mov r6, r0 +; IOS-WITH-STRET-NEXT:    add r0, sp, #16 +; IOS-WITH-STRET-NEXT:    mov r5, r1 +; IOS-WITH-STRET-NEXT:    mov r1, r3 +; IOS-WITH-STRET-NEXT:    bl ___sincos_stret +; IOS-WITH-STRET-NEXT:    mov r0, sp +; IOS-WITH-STRET-NEXT:    mov r1, r5 +; IOS-WITH-STRET-NEXT:    mov r2, r4 +; IOS-WITH-STRET-NEXT:    bl ___sincos_stret +; IOS-WITH-STRET-NEXT:    vldr d17, [sp, #16] +; IOS-WITH-STRET-NEXT:    vldr d16, [sp] +; IOS-WITH-STRET-NEXT:    vldr d19, [sp, #24] +; IOS-WITH-STRET-NEXT:    vldr d18, [sp, #8] +; IOS-WITH-STRET-NEXT:    vst1.32 {d16, d17}, [r6]! +; IOS-WITH-STRET-NEXT:    vst1.32 {d18, d19}, [r6] +; IOS-WITH-STRET-NEXT:    add sp, sp, #32 +; IOS-WITH-STRET-NEXT:    pop {r4, r5, r6, pc} +; +; WATCHABI-LABEL: test_sincos_v2f64: +; WATCHABI:         .cfi_startproc +; WATCHABI-NEXT:  @ %bb.0: +; WATCHABI-NEXT:    push {r7, lr} +; WATCHABI-NEXT:    .cfi_def_cfa_offset 8 +; WATCHABI-NEXT:    .cfi_offset lr, -4 +; WATCHABI-NEXT:    .cfi_offset r7, -8 +; WATCHABI-NEXT:    vpush {d8, d9, d10, d11, d12, d13} +; WATCHABI-NEXT:    .cfi_def_cfa_offset 56 +; WATCHABI-NEXT:    .cfi_offset d13, -16 +; WATCHABI-NEXT:    .cfi_offset d12, -24 +; WATCHABI-NEXT:    .cfi_offset d11, -32 +; WATCHABI-NEXT:    .cfi_offset d10, -40 +; WATCHABI-NEXT:    .cfi_offset d9, -48 +; WATCHABI-NEXT:    .cfi_offset d8, -56 +; WATCHABI-NEXT:    sub sp, #8 +; WATCHABI-NEXT:    .cfi_def_cfa_offset 64 +; WATCHABI-NEXT:    vorr q4, q0, q0 +; WATCHABI-NEXT:    vorr d0, d9, d9 +; WATCHABI-NEXT:    bl ___sincos_stret +; WATCHABI-NEXT:    vorr d11, d0, d0 +; WATCHABI-NEXT:    vorr d0, d8, d8 +; WATCHABI-NEXT:    vorr d13, d1, d1 +; WATCHABI-NEXT:    bl ___sincos_stret +; WATCHABI-NEXT:    vorr d12, d1, d1 +; WATCHABI-NEXT:    vorr d10, d0, d0 +; WATCHABI-NEXT:    vorr q1, q6, q6 +; WATCHABI-NEXT:    vorr q0, q5, q5 +; WATCHABI-NEXT:    add sp, #8 +; WATCHABI-NEXT:    vpop {d8, d9, d10, d11, d12, d13} +; WATCHABI-NEXT:    pop {r7, pc} +; WATCHABI-NEXT:    .cfi_endproc    %result = call { <2 x double>, <2 x double> } @llvm.sincos.v2f64(<2 x double> %a)    ret { <2 x double>, <2 x double> } %result  }  define { fp128, fp128 } @test_sincos_f128(fp128 %a) { -; CHECK-LABEL: test_sincos_f128: -; CHECK:       @ %bb.0: -; CHECK-NEXT:    push {r4, r5, r7, lr} -; CHECK-NEXT:    sub sp, #40 -; CHECK-NEXT:    mov r12, r3 -; CHECK-NEXT:    ldr r3, [sp, #56] -; CHECK-NEXT:    add.w lr, sp, #8 -; CHECK-NEXT:    mov r4, r0 -; CHECK-NEXT:    add r0, sp, #24 -; CHECK-NEXT:    strd r0, lr, [sp] -; CHECK-NEXT:    mov r0, r1 -; CHECK-NEXT:    mov r1, r2 -; CHECK-NEXT:    mov r2, r12 -; CHECK-NEXT:    bl sincosl -; CHECK-NEXT:    ldrd r2, r3, [sp, #16] -; CHECK-NEXT:    ldrd r12, r1, [sp, #8] -; CHECK-NEXT:    str r3, [r4, #28] -; CHECK-NEXT:    ldrd r3, r5, [sp, #32] -; CHECK-NEXT:    ldrd lr, r0, [sp, #24] -; CHECK-NEXT:    strd r1, r2, [r4, #20] -; CHECK-NEXT:    add.w r1, r4, #8 -; CHECK-NEXT:    stm.w r1, {r3, r5, r12} -; CHECK-NEXT:    strd lr, r0, [r4] -; CHECK-NEXT:    add sp, #40 -; CHECK-NEXT:    pop {r4, r5, r7, pc} +; GNU-LABEL: test_sincos_f128: +; GNU:       @ %bb.0: +; GNU-NEXT:    push {r4, r5, r7, lr} +; GNU-NEXT:    sub sp, #40 +; GNU-NEXT:    mov r12, r3 +; GNU-NEXT:    ldr r3, [sp, #56] +; GNU-NEXT:    add.w lr, sp, #8 +; GNU-NEXT:    mov r4, r0 +; GNU-NEXT:    add r0, sp, #24 +; GNU-NEXT:    strd r0, lr, [sp] +; GNU-NEXT:    mov r0, r1 +; GNU-NEXT:    mov r1, r2 +; GNU-NEXT:    mov r2, r12 +; GNU-NEXT:    bl sincosl +; GNU-NEXT:    ldrd r2, r3, [sp, #16] +; GNU-NEXT:    ldrd r12, r1, [sp, #8] +; GNU-NEXT:    str r3, [r4, #28] +; GNU-NEXT:    ldrd r3, r5, [sp, #32] +; GNU-NEXT:    ldrd lr, r0, [sp, #24] +; GNU-NEXT:    strd r1, r2, [r4, #20] +; GNU-NEXT:    add.w r1, r4, #8 +; GNU-NEXT:    stm.w r1, {r3, r5, r12} +; GNU-NEXT:    strd lr, r0, [r4] +; GNU-NEXT:    add sp, #40 +; GNU-NEXT:    pop {r4, r5, r7, pc} +; +; GNUEABI-LABEL: test_sincos_f128: +; GNUEABI:       @ %bb.0: +; GNUEABI-NEXT:    .save {r4, r5, r11, lr} +; GNUEABI-NEXT:    push {r4, r5, r11, lr} +; GNUEABI-NEXT:    .pad #40 +; GNUEABI-NEXT:    sub sp, sp, #40 +; GNUEABI-NEXT:    mov r12, r3 +; GNUEABI-NEXT:    ldr r3, [sp, #56] +; GNUEABI-NEXT:    mov r4, r0 +; GNUEABI-NEXT:    add r0, sp, #24 +; GNUEABI-NEXT:    add r5, sp, #8 +; GNUEABI-NEXT:    stm sp, {r0, r5} +; GNUEABI-NEXT:    mov r0, r1 +; GNUEABI-NEXT:    mov r1, r2 +; GNUEABI-NEXT:    mov r2, r12 +; GNUEABI-NEXT:    bl sincosl +; GNUEABI-NEXT:    add r3, sp, #12 +; GNUEABI-NEXT:    ldr r12, [sp, #8] +; GNUEABI-NEXT:    ldm r3, {r1, r2, r3} +; GNUEABI-NEXT:    str r3, [r4, #28] +; GNUEABI-NEXT:    ldr r0, [sp, #32] +; GNUEABI-NEXT:    ldr lr, [sp, #24] +; GNUEABI-NEXT:    ldr r5, [sp, #28] +; GNUEABI-NEXT:    ldr r3, [sp, #36] +; GNUEABI-NEXT:    str r2, [r4, #24] +; GNUEABI-NEXT:    str r1, [r4, #20] +; GNUEABI-NEXT:    add r1, r4, #8 +; GNUEABI-NEXT:    stm r1, {r0, r3, r12} +; GNUEABI-NEXT:    str r5, [r4, #4] +; GNUEABI-NEXT:    str lr, [r4] +; GNUEABI-NEXT:    add sp, sp, #40 +; GNUEABI-NEXT:    pop {r4, r5, r11, pc} +; +; IOS-LABEL: test_sincos_f128: +; IOS:       @ %bb.0: +; IOS-NEXT:    push {r4, r5, r6, r7, r8, lr} +; IOS-NEXT:    ldr r8, [sp, #24] +; IOS-NEXT:    mov r4, r0 +; IOS-NEXT:    mov r5, r3 +; IOS-NEXT:    mov r6, r2 +; IOS-NEXT:    mov r7, r1 +; IOS-NEXT:    mov r0, r1 +; IOS-NEXT:    mov r1, r2 +; IOS-NEXT:    mov r2, r3 +; IOS-NEXT:    mov r3, r8 +; IOS-NEXT:    bl _cosl +; IOS-NEXT:    add r9, r4, #16 +; IOS-NEXT:    stm r9, {r0, r1, r2, r3} +; IOS-NEXT:    mov r0, r7 +; IOS-NEXT:    mov r1, r6 +; IOS-NEXT:    mov r2, r5 +; IOS-NEXT:    mov r3, r8 +; IOS-NEXT:    bl _sinl +; IOS-NEXT:    stm r4, {r0, r1, r2, r3} +; IOS-NEXT:    pop {r4, r5, r6, r7, r8, pc} +; +; WATCHABI-LABEL: test_sincos_f128: +; WATCHABI:         .cfi_startproc +; WATCHABI-NEXT:  @ %bb.0: +; WATCHABI-NEXT:    push.w {r4, r5, r6, r7, r8, lr} +; WATCHABI-NEXT:    .cfi_def_cfa_offset 24 +; WATCHABI-NEXT:    .cfi_offset lr, -4 +; WATCHABI-NEXT:    .cfi_offset r7, -8 +; WATCHABI-NEXT:    .cfi_offset r6, -12 +; WATCHABI-NEXT:    .cfi_offset r5, -16 +; WATCHABI-NEXT:    .cfi_offset r4, -20 +; WATCHABI-NEXT:    .cfi_offset r8, -24 +; WATCHABI-NEXT:    sub sp, #8 +; WATCHABI-NEXT:    .cfi_def_cfa_offset 32 +; WATCHABI-NEXT:    ldr.w r8, [sp, #32] +; WATCHABI-NEXT:    mov r4, r0 +; WATCHABI-NEXT:    mov r5, r3 +; WATCHABI-NEXT:    mov r6, r2 +; WATCHABI-NEXT:    mov r7, r1 +; WATCHABI-NEXT:    mov r0, r1 +; WATCHABI-NEXT:    mov r1, r2 +; WATCHABI-NEXT:    mov r2, r3 +; WATCHABI-NEXT:    mov r3, r8 +; WATCHABI-NEXT:    bl _cosl +; WATCHABI-NEXT:    add.w r9, r4, #16 +; WATCHABI-NEXT:    stm.w r9, {r0, r1, r2, r3} +; WATCHABI-NEXT:    mov r0, r7 +; WATCHABI-NEXT:    mov r1, r6 +; WATCHABI-NEXT:    mov r2, r5 +; WATCHABI-NEXT:    mov r3, r8 +; WATCHABI-NEXT:    bl _sinl +; WATCHABI-NEXT:    stm r4!, {r0, r1, r2, r3} +; WATCHABI-NEXT:    add sp, #8 +; WATCHABI-NEXT:    pop.w {r4, r5, r6, r7, r8, pc} +; WATCHABI-NEXT:    .cfi_endproc    %result = call { fp128, fp128 } @llvm.sincos.f16(fp128 %a)    ret { fp128, fp128 } %result  } diff --git a/llvm/test/CodeGen/BPF/bpf_trap.ll b/llvm/test/CodeGen/BPF/bpf_trap.ll new file mode 100644 index 0000000..ab8df5f --- /dev/null +++ b/llvm/test/CodeGen/BPF/bpf_trap.ll @@ -0,0 +1,32 @@ +; RUN: llc < %s | FileCheck %s +; +target triple = "bpf" + +define i32 @test(i8 %x) { +entry: +  %0 = and i8 %x, 3 +  switch i8 %0, label %default.unreachable4 [ +    i8 0, label %return +    i8 1, label %sw.bb1 +    i8 2, label %sw.bb2 +    i8 3, label %sw.bb3 +  ] + +sw.bb1:                                           ; preds = %entry +  br label %return + +sw.bb2:                                           ; preds = %entry +  br label %return + +sw.bb3:                                           ; preds = %entry +  br label %return + +default.unreachable4:                             ; preds = %entry +  unreachable + +return:                                           ; preds = %entry, %sw.bb3, %sw.bb2, %sw.bb1 +  %retval.0 = phi i32 [ 12, %sw.bb1 ], [ 43, %sw.bb2 ], [ 54, %sw.bb3 ], [ 32, %entry ] +  ret i32 %retval.0 +} + +; CHECK-NOT: __bpf_trap diff --git a/llvm/test/CodeGen/LoongArch/lasx/fp-max-min.ll b/llvm/test/CodeGen/LoongArch/lasx/fp-max-min.ll index 48ec98c..8e08e1e 100644 --- a/llvm/test/CodeGen/LoongArch/lasx/fp-max-min.ll +++ b/llvm/test/CodeGen/LoongArch/lasx/fp-max-min.ll @@ -5,40 +5,10 @@  define void @minnum_v8f32(ptr %res, ptr %x, ptr %y) nounwind {  ; CHECK-LABEL: minnum_v8f32:  ; CHECK:       # %bb.0: # %entry -; CHECK-NEXT:    xvld $xr0, $a2, 0 -; CHECK-NEXT:    xvld $xr1, $a1, 0 -; CHECK-NEXT:    xvpickve.w $xr2, $xr0, 5 -; CHECK-NEXT:    xvpickve.w $xr3, $xr1, 5 -; CHECK-NEXT:    fmin.s $fa2, $fa3, $fa2 -; CHECK-NEXT:    xvpickve.w $xr3, $xr0, 4 -; CHECK-NEXT:    xvpickve.w $xr4, $xr1, 4 -; CHECK-NEXT:    fmin.s $fa3, $fa4, $fa3 -; CHECK-NEXT:    vextrins.w $vr3, $vr2, 16 -; CHECK-NEXT:    xvpickve.w $xr2, $xr0, 6 -; CHECK-NEXT:    xvpickve.w $xr4, $xr1, 6 -; CHECK-NEXT:    fmin.s $fa2, $fa4, $fa2 -; CHECK-NEXT:    vextrins.w $vr3, $vr2, 32 -; CHECK-NEXT:    xvpickve.w $xr2, $xr0, 7 -; CHECK-NEXT:    xvpickve.w $xr4, $xr1, 7 -; CHECK-NEXT:    fmin.s $fa2, $fa4, $fa2 -; CHECK-NEXT:    vextrins.w $vr3, $vr2, 48 -; CHECK-NEXT:    xvpickve.w $xr2, $xr0, 1 -; CHECK-NEXT:    xvpickve.w $xr4, $xr1, 1 -; CHECK-NEXT:    fmin.s $fa2, $fa4, $fa2 -; CHECK-NEXT:    xvpickve.w $xr4, $xr0, 0 -; CHECK-NEXT:    xvpickve.w $xr5, $xr1, 0 -; CHECK-NEXT:    fmin.s $fa4, $fa5, $fa4 -; CHECK-NEXT:    vextrins.w $vr4, $vr2, 16 -; CHECK-NEXT:    xvpickve.w $xr2, $xr0, 2 -; CHECK-NEXT:    xvpickve.w $xr5, $xr1, 2 -; CHECK-NEXT:    fmin.s $fa2, $fa5, $fa2 -; CHECK-NEXT:    vextrins.w $vr4, $vr2, 32 -; CHECK-NEXT:    xvpickve.w $xr0, $xr0, 3 -; CHECK-NEXT:    xvpickve.w $xr1, $xr1, 3 -; CHECK-NEXT:    fmin.s $fa0, $fa1, $fa0 -; CHECK-NEXT:    vextrins.w $vr4, $vr0, 48 -; CHECK-NEXT:    xvpermi.q $xr4, $xr3, 2 -; CHECK-NEXT:    xvst $xr4, $a0, 0 +; CHECK-NEXT:    xvld $xr0, $a1, 0 +; CHECK-NEXT:    xvld $xr1, $a2, 0 +; CHECK-NEXT:    xvfmin.s $xr0, $xr0, $xr1 +; CHECK-NEXT:    xvst $xr0, $a0, 0  ; CHECK-NEXT:    ret  entry:    %v0 = load <8 x float>, ptr %x @@ -51,23 +21,9 @@ entry:  define void @minnum_v4f64(ptr %res, ptr %x, ptr %y) nounwind {  ; CHECK-LABEL: minnum_v4f64:  ; CHECK:       # %bb.0: # %entry -; CHECK-NEXT:    xvld $xr0, $a2, 0 -; CHECK-NEXT:    xvld $xr1, $a1, 0 -; CHECK-NEXT:    xvpickve.d $xr2, $xr0, 3 -; CHECK-NEXT:    xvpickve.d $xr3, $xr1, 3 -; CHECK-NEXT:    fmin.d $fa2, $fa3, $fa2 -; CHECK-NEXT:    xvpickve.d $xr3, $xr0, 2 -; CHECK-NEXT:    xvpickve.d $xr4, $xr1, 2 -; CHECK-NEXT:    fmin.d $fa3, $fa4, $fa3 -; CHECK-NEXT:    vextrins.d $vr3, $vr2, 16 -; CHECK-NEXT:    xvpickve.d $xr2, $xr0, 1 -; CHECK-NEXT:    xvpickve.d $xr4, $xr1, 1 -; CHECK-NEXT:    fmin.d $fa2, $fa4, $fa2 -; CHECK-NEXT:    xvpickve.d $xr0, $xr0, 0 -; CHECK-NEXT:    xvpickve.d $xr1, $xr1, 0 -; CHECK-NEXT:    fmin.d $fa0, $fa1, $fa0 -; CHECK-NEXT:    vextrins.d $vr0, $vr2, 16 -; CHECK-NEXT:    xvpermi.q $xr0, $xr3, 2 +; CHECK-NEXT:    xvld $xr0, $a1, 0 +; CHECK-NEXT:    xvld $xr1, $a2, 0 +; CHECK-NEXT:    xvfmin.d $xr0, $xr0, $xr1  ; CHECK-NEXT:    xvst $xr0, $a0, 0  ; CHECK-NEXT:    ret  entry: @@ -81,40 +37,10 @@ entry:  define void @maxnum_v8f32(ptr %res, ptr %x, ptr %y) nounwind {  ; CHECK-LABEL: maxnum_v8f32:  ; CHECK:       # %bb.0: # %entry -; CHECK-NEXT:    xvld $xr0, $a2, 0 -; CHECK-NEXT:    xvld $xr1, $a1, 0 -; CHECK-NEXT:    xvpickve.w $xr2, $xr0, 5 -; CHECK-NEXT:    xvpickve.w $xr3, $xr1, 5 -; CHECK-NEXT:    fmax.s $fa2, $fa3, $fa2 -; CHECK-NEXT:    xvpickve.w $xr3, $xr0, 4 -; CHECK-NEXT:    xvpickve.w $xr4, $xr1, 4 -; CHECK-NEXT:    fmax.s $fa3, $fa4, $fa3 -; CHECK-NEXT:    vextrins.w $vr3, $vr2, 16 -; CHECK-NEXT:    xvpickve.w $xr2, $xr0, 6 -; CHECK-NEXT:    xvpickve.w $xr4, $xr1, 6 -; CHECK-NEXT:    fmax.s $fa2, $fa4, $fa2 -; CHECK-NEXT:    vextrins.w $vr3, $vr2, 32 -; CHECK-NEXT:    xvpickve.w $xr2, $xr0, 7 -; CHECK-NEXT:    xvpickve.w $xr4, $xr1, 7 -; CHECK-NEXT:    fmax.s $fa2, $fa4, $fa2 -; CHECK-NEXT:    vextrins.w $vr3, $vr2, 48 -; CHECK-NEXT:    xvpickve.w $xr2, $xr0, 1 -; CHECK-NEXT:    xvpickve.w $xr4, $xr1, 1 -; CHECK-NEXT:    fmax.s $fa2, $fa4, $fa2 -; CHECK-NEXT:    xvpickve.w $xr4, $xr0, 0 -; CHECK-NEXT:    xvpickve.w $xr5, $xr1, 0 -; CHECK-NEXT:    fmax.s $fa4, $fa5, $fa4 -; CHECK-NEXT:    vextrins.w $vr4, $vr2, 16 -; CHECK-NEXT:    xvpickve.w $xr2, $xr0, 2 -; CHECK-NEXT:    xvpickve.w $xr5, $xr1, 2 -; CHECK-NEXT:    fmax.s $fa2, $fa5, $fa2 -; CHECK-NEXT:    vextrins.w $vr4, $vr2, 32 -; CHECK-NEXT:    xvpickve.w $xr0, $xr0, 3 -; CHECK-NEXT:    xvpickve.w $xr1, $xr1, 3 -; CHECK-NEXT:    fmax.s $fa0, $fa1, $fa0 -; CHECK-NEXT:    vextrins.w $vr4, $vr0, 48 -; CHECK-NEXT:    xvpermi.q $xr4, $xr3, 2 -; CHECK-NEXT:    xvst $xr4, $a0, 0 +; CHECK-NEXT:    xvld $xr0, $a1, 0 +; CHECK-NEXT:    xvld $xr1, $a2, 0 +; CHECK-NEXT:    xvfmax.s $xr0, $xr0, $xr1 +; CHECK-NEXT:    xvst $xr0, $a0, 0  ; CHECK-NEXT:    ret  entry:    %v0 = load <8 x float>, ptr %x @@ -127,23 +53,9 @@ entry:  define void @maxnum_v4f64(ptr %res, ptr %x, ptr %y) nounwind {  ; CHECK-LABEL: maxnum_v4f64:  ; CHECK:       # %bb.0: # %entry -; CHECK-NEXT:    xvld $xr0, $a2, 0 -; CHECK-NEXT:    xvld $xr1, $a1, 0 -; CHECK-NEXT:    xvpickve.d $xr2, $xr0, 3 -; CHECK-NEXT:    xvpickve.d $xr3, $xr1, 3 -; CHECK-NEXT:    fmax.d $fa2, $fa3, $fa2 -; CHECK-NEXT:    xvpickve.d $xr3, $xr0, 2 -; CHECK-NEXT:    xvpickve.d $xr4, $xr1, 2 -; CHECK-NEXT:    fmax.d $fa3, $fa4, $fa3 -; CHECK-NEXT:    vextrins.d $vr3, $vr2, 16 -; CHECK-NEXT:    xvpickve.d $xr2, $xr0, 1 -; CHECK-NEXT:    xvpickve.d $xr4, $xr1, 1 -; CHECK-NEXT:    fmax.d $fa2, $fa4, $fa2 -; CHECK-NEXT:    xvpickve.d $xr0, $xr0, 0 -; CHECK-NEXT:    xvpickve.d $xr1, $xr1, 0 -; CHECK-NEXT:    fmax.d $fa0, $fa1, $fa0 -; CHECK-NEXT:    vextrins.d $vr0, $vr2, 16 -; CHECK-NEXT:    xvpermi.q $xr0, $xr3, 2 +; CHECK-NEXT:    xvld $xr0, $a1, 0 +; CHECK-NEXT:    xvld $xr1, $a2, 0 +; CHECK-NEXT:    xvfmax.d $xr0, $xr0, $xr1  ; CHECK-NEXT:    xvst $xr0, $a0, 0  ; CHECK-NEXT:    ret  entry: diff --git a/llvm/test/CodeGen/LoongArch/lsx/fp-max-min.ll b/llvm/test/CodeGen/LoongArch/lsx/fp-max-min.ll index 27ecb75..c173092 100644 --- a/llvm/test/CodeGen/LoongArch/lsx/fp-max-min.ll +++ b/llvm/test/CodeGen/LoongArch/lsx/fp-max-min.ll @@ -5,24 +5,10 @@  define void @minnum_v4f32(ptr %res, ptr %x, ptr %y) nounwind {  ; CHECK-LABEL: minnum_v4f32:  ; CHECK:       # %bb.0: # %entry -; CHECK-NEXT:    vld $vr0, $a2, 0 -; CHECK-NEXT:    vld $vr1, $a1, 0 -; CHECK-NEXT:    vreplvei.w $vr2, $vr0, 1 -; CHECK-NEXT:    vreplvei.w $vr3, $vr1, 1 -; CHECK-NEXT:    fmin.s $fa2, $fa3, $fa2 -; CHECK-NEXT:    vreplvei.w $vr3, $vr0, 0 -; CHECK-NEXT:    vreplvei.w $vr4, $vr1, 0 -; CHECK-NEXT:    fmin.s $fa3, $fa4, $fa3 -; CHECK-NEXT:    vextrins.w $vr3, $vr2, 16 -; CHECK-NEXT:    vreplvei.w $vr2, $vr0, 2 -; CHECK-NEXT:    vreplvei.w $vr4, $vr1, 2 -; CHECK-NEXT:    fmin.s $fa2, $fa4, $fa2 -; CHECK-NEXT:    vextrins.w $vr3, $vr2, 32 -; CHECK-NEXT:    vreplvei.w $vr0, $vr0, 3 -; CHECK-NEXT:    vreplvei.w $vr1, $vr1, 3 -; CHECK-NEXT:    fmin.s $fa0, $fa1, $fa0 -; CHECK-NEXT:    vextrins.w $vr3, $vr0, 48 -; CHECK-NEXT:    vst $vr3, $a0, 0 +; CHECK-NEXT:    vld $vr0, $a1, 0 +; CHECK-NEXT:    vld $vr1, $a2, 0 +; CHECK-NEXT:    vfmin.s $vr0, $vr0, $vr1 +; CHECK-NEXT:    vst $vr0, $a0, 0  ; CHECK-NEXT:    ret  entry:    %v0 = load <4 x float>, ptr %x @@ -35,15 +21,9 @@ entry:  define void @minnum_v2f64(ptr %res, ptr %x, ptr %y) nounwind {  ; CHECK-LABEL: minnum_v2f64:  ; CHECK:       # %bb.0: # %entry -; CHECK-NEXT:    vld $vr0, $a2, 0 -; CHECK-NEXT:    vld $vr1, $a1, 0 -; CHECK-NEXT:    vreplvei.d $vr2, $vr0, 1 -; CHECK-NEXT:    vreplvei.d $vr3, $vr1, 1 -; CHECK-NEXT:    fmin.d $fa2, $fa3, $fa2 -; CHECK-NEXT:    vreplvei.d $vr0, $vr0, 0 -; CHECK-NEXT:    vreplvei.d $vr1, $vr1, 0 -; CHECK-NEXT:    fmin.d $fa0, $fa1, $fa0 -; CHECK-NEXT:    vextrins.d $vr0, $vr2, 16 +; CHECK-NEXT:    vld $vr0, $a1, 0 +; CHECK-NEXT:    vld $vr1, $a2, 0 +; CHECK-NEXT:    vfmin.d $vr0, $vr0, $vr1  ; CHECK-NEXT:    vst $vr0, $a0, 0  ; CHECK-NEXT:    ret  entry: @@ -57,24 +37,10 @@ entry:  define void @maxnum_v4f32(ptr %res, ptr %x, ptr %y) nounwind {  ; CHECK-LABEL: maxnum_v4f32:  ; CHECK:       # %bb.0: # %entry -; CHECK-NEXT:    vld $vr0, $a2, 0 -; CHECK-NEXT:    vld $vr1, $a1, 0 -; CHECK-NEXT:    vreplvei.w $vr2, $vr0, 1 -; CHECK-NEXT:    vreplvei.w $vr3, $vr1, 1 -; CHECK-NEXT:    fmax.s $fa2, $fa3, $fa2 -; CHECK-NEXT:    vreplvei.w $vr3, $vr0, 0 -; CHECK-NEXT:    vreplvei.w $vr4, $vr1, 0 -; CHECK-NEXT:    fmax.s $fa3, $fa4, $fa3 -; CHECK-NEXT:    vextrins.w $vr3, $vr2, 16 -; CHECK-NEXT:    vreplvei.w $vr2, $vr0, 2 -; CHECK-NEXT:    vreplvei.w $vr4, $vr1, 2 -; CHECK-NEXT:    fmax.s $fa2, $fa4, $fa2 -; CHECK-NEXT:    vextrins.w $vr3, $vr2, 32 -; CHECK-NEXT:    vreplvei.w $vr0, $vr0, 3 -; CHECK-NEXT:    vreplvei.w $vr1, $vr1, 3 -; CHECK-NEXT:    fmax.s $fa0, $fa1, $fa0 -; CHECK-NEXT:    vextrins.w $vr3, $vr0, 48 -; CHECK-NEXT:    vst $vr3, $a0, 0 +; CHECK-NEXT:    vld $vr0, $a1, 0 +; CHECK-NEXT:    vld $vr1, $a2, 0 +; CHECK-NEXT:    vfmax.s $vr0, $vr0, $vr1 +; CHECK-NEXT:    vst $vr0, $a0, 0  ; CHECK-NEXT:    ret  entry:    %v0 = load <4 x float>, ptr %x @@ -87,15 +53,9 @@ entry:  define void @maxnum_v2f64(ptr %res, ptr %x, ptr %y) nounwind {  ; CHECK-LABEL: maxnum_v2f64:  ; CHECK:       # %bb.0: # %entry -; CHECK-NEXT:    vld $vr0, $a2, 0 -; CHECK-NEXT:    vld $vr1, $a1, 0 -; CHECK-NEXT:    vreplvei.d $vr2, $vr0, 1 -; CHECK-NEXT:    vreplvei.d $vr3, $vr1, 1 -; CHECK-NEXT:    fmax.d $fa2, $fa3, $fa2 -; CHECK-NEXT:    vreplvei.d $vr0, $vr0, 0 -; CHECK-NEXT:    vreplvei.d $vr1, $vr1, 0 -; CHECK-NEXT:    fmax.d $fa0, $fa1, $fa0 -; CHECK-NEXT:    vextrins.d $vr0, $vr2, 16 +; CHECK-NEXT:    vld $vr0, $a1, 0 +; CHECK-NEXT:    vld $vr1, $a2, 0 +; CHECK-NEXT:    vfmax.d $vr0, $vr0, $vr1  ; CHECK-NEXT:    vst $vr0, $a0, 0  ; CHECK-NEXT:    ret  entry: diff --git a/llvm/test/CodeGen/NVPTX/cmpxchg-unsupported-syncscope.err.ll b/llvm/test/CodeGen/NVPTX/cmpxchg-unsupported-syncscope.err.ll new file mode 100644 index 0000000..d3853e2 --- /dev/null +++ b/llvm/test/CodeGen/NVPTX/cmpxchg-unsupported-syncscope.err.ll @@ -0,0 +1,11 @@ +; RUN: not llc -mcpu=sm_100a -mtriple=nvptx64 -mattr=+ptx86 %s 2>&1 | FileCheck %s + +; Test that we get a clear error message when using an unsupported syncscope. + +; CHECK: NVPTX backend does not support syncscope "agent" +; CHECK: Supported syncscopes are: singlethread, <empty string>, block, cluster, device +define i32 @cmpxchg_unsupported_syncscope_agent(ptr %addr, i32 %cmp, i32 %new) { +  %result = cmpxchg ptr %addr, i32 %cmp, i32 %new syncscope("agent") monotonic monotonic +  %value = extractvalue { i32, i1 } %result, 0 +  ret i32 %value +} diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/rvv/vloxei-rv64.ll b/llvm/test/CodeGen/RISCV/GlobalISel/rvv/vloxei-rv64.ll new file mode 100644 index 0000000..5cb55f1 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/GlobalISel/rvv/vloxei-rv64.ll @@ -0,0 +1,1341 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=riscv64 -mattr=+v,+zvfhmin,+zvfbfmin -global-isel -verify-machineinstrs \ +; RUN:   < %s | FileCheck %s + +; The intrinsics are not supported with RV32. + +declare <vscale x 1 x i8> @llvm.riscv.vloxei.nxv1i8.nxv1i64( +  <vscale x 1 x i8>, +  ptr, +  <vscale x 1 x i64>, +  i64); + +define <vscale x 1 x i8> @intrinsic_vloxei_v_nxv1i8_nxv1i8_nxv1i64(ptr %0, <vscale x 1 x i64> %1, i64 %2) nounwind { +; CHECK-LABEL: intrinsic_vloxei_v_nxv1i8_nxv1i8_nxv1i64: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e8, mf8, ta, ma +; CHECK-NEXT:    vloxei64.v v9, (a0), v8 +; CHECK-NEXT:    vmv1r.v v8, v9 +; CHECK-NEXT:    ret +entry: +  %a = call <vscale x 1 x i8> @llvm.riscv.vloxei.nxv1i8.nxv1i64( +    <vscale x 1 x i8> poison, +    ptr %0, +    <vscale x 1 x i64> %1, +    i64 %2) + +  ret <vscale x 1 x i8> %a +} + +declare <vscale x 1 x i8> @llvm.riscv.vloxei.mask.nxv1i8.nxv1i64( +  <vscale x 1 x i8>, +  ptr, +  <vscale x 1 x i64>, +  <vscale x 1 x i1>, +  i64, +  i64); + +define <vscale x 1 x i8> @intrinsic_vloxei_mask_v_nxv1i8_nxv1i8_nxv1i64(<vscale x 1 x i8> %0, ptr %1, <vscale x 1 x i64> %2, <vscale x 1 x i1> %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vloxei_mask_v_nxv1i8_nxv1i8_nxv1i64: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e8, mf8, ta, mu +; CHECK-NEXT:    vloxei64.v v8, (a0), v9, v0.t +; CHECK-NEXT:    ret +entry: +  %a = call <vscale x 1 x i8> @llvm.riscv.vloxei.mask.nxv1i8.nxv1i64( +    <vscale x 1 x i8> %0, +    ptr %1, +    <vscale x 1 x i64> %2, +    <vscale x 1 x i1> %3, +    i64 %4, i64 1) + +  ret <vscale x 1 x i8> %a +} + +declare <vscale x 2 x i8> @llvm.riscv.vloxei.nxv2i8.nxv2i64( +  <vscale x 2 x i8>, +  ptr, +  <vscale x 2 x i64>, +  i64); + +define <vscale x 2 x i8> @intrinsic_vloxei_v_nxv2i8_nxv2i8_nxv2i64(ptr %0, <vscale x 2 x i64> %1, i64 %2) nounwind { +; CHECK-LABEL: intrinsic_vloxei_v_nxv2i8_nxv2i8_nxv2i64: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e8, mf4, ta, ma +; CHECK-NEXT:    vloxei64.v v10, (a0), v8 +; CHECK-NEXT:    vmv1r.v v8, v10 +; CHECK-NEXT:    ret +entry: +  %a = call <vscale x 2 x i8> @llvm.riscv.vloxei.nxv2i8.nxv2i64( +    <vscale x 2 x i8> poison, +    ptr %0, +    <vscale x 2 x i64> %1, +    i64 %2) + +  ret <vscale x 2 x i8> %a +} + +declare <vscale x 2 x i8> @llvm.riscv.vloxei.mask.nxv2i8.nxv2i64( +  <vscale x 2 x i8>, +  ptr, +  <vscale x 2 x i64>, +  <vscale x 2 x i1>, +  i64, +  i64); + +define <vscale x 2 x i8> @intrinsic_vloxei_mask_v_nxv2i8_nxv2i8_nxv2i64(<vscale x 2 x i8> %0, ptr %1, <vscale x 2 x i64> %2, <vscale x 2 x i1> %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vloxei_mask_v_nxv2i8_nxv2i8_nxv2i64: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e8, mf4, ta, mu +; CHECK-NEXT:    vloxei64.v v8, (a0), v10, v0.t +; CHECK-NEXT:    ret +entry: +  %a = call <vscale x 2 x i8> @llvm.riscv.vloxei.mask.nxv2i8.nxv2i64( +    <vscale x 2 x i8> %0, +    ptr %1, +    <vscale x 2 x i64> %2, +    <vscale x 2 x i1> %3, +    i64 %4, i64 1) + +  ret <vscale x 2 x i8> %a +} + +declare <vscale x 4 x i8> @llvm.riscv.vloxei.nxv4i8.nxv4i64( +  <vscale x 4 x i8>, +  ptr, +  <vscale x 4 x i64>, +  i64); + +define <vscale x 4 x i8> @intrinsic_vloxei_v_nxv4i8_nxv4i8_nxv4i64(ptr %0, <vscale x 4 x i64> %1, i64 %2) nounwind { +; CHECK-LABEL: intrinsic_vloxei_v_nxv4i8_nxv4i8_nxv4i64: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e8, mf2, ta, ma +; CHECK-NEXT:    vloxei64.v v12, (a0), v8 +; CHECK-NEXT:    vmv1r.v v8, v12 +; CHECK-NEXT:    ret +entry: +  %a = call <vscale x 4 x i8> @llvm.riscv.vloxei.nxv4i8.nxv4i64( +    <vscale x 4 x i8> poison, +    ptr %0, +    <vscale x 4 x i64> %1, +    i64 %2) + +  ret <vscale x 4 x i8> %a +} + +declare <vscale x 4 x i8> @llvm.riscv.vloxei.mask.nxv4i8.nxv4i64( +  <vscale x 4 x i8>, +  ptr, +  <vscale x 4 x i64>, +  <vscale x 4 x i1>, +  i64, +  i64); + +define <vscale x 4 x i8> @intrinsic_vloxei_mask_v_nxv4i8_nxv4i8_nxv4i64(<vscale x 4 x i8> %0, ptr %1, <vscale x 4 x i64> %2, <vscale x 4 x i1> %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vloxei_mask_v_nxv4i8_nxv4i8_nxv4i64: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e8, mf2, ta, mu +; CHECK-NEXT:    vloxei64.v v8, (a0), v12, v0.t +; CHECK-NEXT:    ret +entry: +  %a = call <vscale x 4 x i8> @llvm.riscv.vloxei.mask.nxv4i8.nxv4i64( +    <vscale x 4 x i8> %0, +    ptr %1, +    <vscale x 4 x i64> %2, +    <vscale x 4 x i1> %3, +    i64 %4, i64 1) + +  ret <vscale x 4 x i8> %a +} + +declare <vscale x 8 x i8> @llvm.riscv.vloxei.nxv8i8.nxv8i64( +  <vscale x 8 x i8>, +  ptr, +  <vscale x 8 x i64>, +  i64); + +define <vscale x 8 x i8> @intrinsic_vloxei_v_nxv8i8_nxv8i8_nxv8i64(ptr %0, <vscale x 8 x i64> %1, i64 %2) nounwind { +; CHECK-LABEL: intrinsic_vloxei_v_nxv8i8_nxv8i8_nxv8i64: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e8, m1, ta, ma +; CHECK-NEXT:    vloxei64.v v16, (a0), v8 +; CHECK-NEXT:    vmv.v.v v8, v16 +; CHECK-NEXT:    ret +entry: +  %a = call <vscale x 8 x i8> @llvm.riscv.vloxei.nxv8i8.nxv8i64( +    <vscale x 8 x i8> poison, +    ptr %0, +    <vscale x 8 x i64> %1, +    i64 %2) + +  ret <vscale x 8 x i8> %a +} + +declare <vscale x 8 x i8> @llvm.riscv.vloxei.mask.nxv8i8.nxv8i64( +  <vscale x 8 x i8>, +  ptr, +  <vscale x 8 x i64>, +  <vscale x 8 x i1>, +  i64, +  i64); + +define <vscale x 8 x i8> @intrinsic_vloxei_mask_v_nxv8i8_nxv8i8_nxv8i64(<vscale x 8 x i8> %0, ptr %1, <vscale x 8 x i64> %2, <vscale x 8 x i1> %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vloxei_mask_v_nxv8i8_nxv8i8_nxv8i64: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e8, m1, ta, mu +; CHECK-NEXT:    vloxei64.v v8, (a0), v16, v0.t +; CHECK-NEXT:    ret +entry: +  %a = call <vscale x 8 x i8> @llvm.riscv.vloxei.mask.nxv8i8.nxv8i64( +    <vscale x 8 x i8> %0, +    ptr %1, +    <vscale x 8 x i64> %2, +    <vscale x 8 x i1> %3, +    i64 %4, i64 1) + +  ret <vscale x 8 x i8> %a +} + +declare <vscale x 1 x i16> @llvm.riscv.vloxei.nxv1i16.nxv1i64( +  <vscale x 1 x i16>, +  ptr, +  <vscale x 1 x i64>, +  i64); + +define <vscale x 1 x i16> @intrinsic_vloxei_v_nxv1i16_nxv1i16_nxv1i64(ptr %0, <vscale x 1 x i64> %1, i64 %2) nounwind { +; CHECK-LABEL: intrinsic_vloxei_v_nxv1i16_nxv1i16_nxv1i64: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e16, mf4, ta, ma +; CHECK-NEXT:    vloxei64.v v9, (a0), v8 +; CHECK-NEXT:    vmv1r.v v8, v9 +; CHECK-NEXT:    ret +entry: +  %a = call <vscale x 1 x i16> @llvm.riscv.vloxei.nxv1i16.nxv1i64( +    <vscale x 1 x i16> poison, +    ptr %0, +    <vscale x 1 x i64> %1, +    i64 %2) + +  ret <vscale x 1 x i16> %a +} + +declare <vscale x 1 x i16> @llvm.riscv.vloxei.mask.nxv1i16.nxv1i64( +  <vscale x 1 x i16>, +  ptr, +  <vscale x 1 x i64>, +  <vscale x 1 x i1>, +  i64, +  i64); + +define <vscale x 1 x i16> @intrinsic_vloxei_mask_v_nxv1i16_nxv1i16_nxv1i64(<vscale x 1 x i16> %0, ptr %1, <vscale x 1 x i64> %2, <vscale x 1 x i1> %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vloxei_mask_v_nxv1i16_nxv1i16_nxv1i64: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e16, mf4, ta, mu +; CHECK-NEXT:    vloxei64.v v8, (a0), v9, v0.t +; CHECK-NEXT:    ret +entry: +  %a = call <vscale x 1 x i16> @llvm.riscv.vloxei.mask.nxv1i16.nxv1i64( +    <vscale x 1 x i16> %0, +    ptr %1, +    <vscale x 1 x i64> %2, +    <vscale x 1 x i1> %3, +    i64 %4, i64 1) + +  ret <vscale x 1 x i16> %a +} + +declare <vscale x 2 x i16> @llvm.riscv.vloxei.nxv2i16.nxv2i64( +  <vscale x 2 x i16>, +  ptr, +  <vscale x 2 x i64>, +  i64); + +define <vscale x 2 x i16> @intrinsic_vloxei_v_nxv2i16_nxv2i16_nxv2i64(ptr %0, <vscale x 2 x i64> %1, i64 %2) nounwind { +; CHECK-LABEL: intrinsic_vloxei_v_nxv2i16_nxv2i16_nxv2i64: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e16, mf2, ta, ma +; CHECK-NEXT:    vloxei64.v v10, (a0), v8 +; CHECK-NEXT:    vmv1r.v v8, v10 +; CHECK-NEXT:    ret +entry: +  %a = call <vscale x 2 x i16> @llvm.riscv.vloxei.nxv2i16.nxv2i64( +    <vscale x 2 x i16> poison, +    ptr %0, +    <vscale x 2 x i64> %1, +    i64 %2) + +  ret <vscale x 2 x i16> %a +} + +declare <vscale x 2 x i16> @llvm.riscv.vloxei.mask.nxv2i16.nxv2i64( +  <vscale x 2 x i16>, +  ptr, +  <vscale x 2 x i64>, +  <vscale x 2 x i1>, +  i64, +  i64); + +define <vscale x 2 x i16> @intrinsic_vloxei_mask_v_nxv2i16_nxv2i16_nxv2i64(<vscale x 2 x i16> %0, ptr %1, <vscale x 2 x i64> %2, <vscale x 2 x i1> %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vloxei_mask_v_nxv2i16_nxv2i16_nxv2i64: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e16, mf2, ta, mu +; CHECK-NEXT:    vloxei64.v v8, (a0), v10, v0.t +; CHECK-NEXT:    ret +entry: +  %a = call <vscale x 2 x i16> @llvm.riscv.vloxei.mask.nxv2i16.nxv2i64( +    <vscale x 2 x i16> %0, +    ptr %1, +    <vscale x 2 x i64> %2, +    <vscale x 2 x i1> %3, +    i64 %4, i64 1) + +  ret <vscale x 2 x i16> %a +} + +declare <vscale x 4 x i16> @llvm.riscv.vloxei.nxv4i16.nxv4i64( +  <vscale x 4 x i16>, +  ptr, +  <vscale x 4 x i64>, +  i64); + +define <vscale x 4 x i16> @intrinsic_vloxei_v_nxv4i16_nxv4i16_nxv4i64(ptr %0, <vscale x 4 x i64> %1, i64 %2) nounwind { +; CHECK-LABEL: intrinsic_vloxei_v_nxv4i16_nxv4i16_nxv4i64: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e16, m1, ta, ma +; CHECK-NEXT:    vloxei64.v v12, (a0), v8 +; CHECK-NEXT:    vmv.v.v v8, v12 +; CHECK-NEXT:    ret +entry: +  %a = call <vscale x 4 x i16> @llvm.riscv.vloxei.nxv4i16.nxv4i64( +    <vscale x 4 x i16> poison, +    ptr %0, +    <vscale x 4 x i64> %1, +    i64 %2) + +  ret <vscale x 4 x i16> %a +} + +declare <vscale x 4 x i16> @llvm.riscv.vloxei.mask.nxv4i16.nxv4i64( +  <vscale x 4 x i16>, +  ptr, +  <vscale x 4 x i64>, +  <vscale x 4 x i1>, +  i64, +  i64); + +define <vscale x 4 x i16> @intrinsic_vloxei_mask_v_nxv4i16_nxv4i16_nxv4i64(<vscale x 4 x i16> %0, ptr %1, <vscale x 4 x i64> %2, <vscale x 4 x i1> %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vloxei_mask_v_nxv4i16_nxv4i16_nxv4i64: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e16, m1, ta, mu +; CHECK-NEXT:    vloxei64.v v8, (a0), v12, v0.t +; CHECK-NEXT:    ret +entry: +  %a = call <vscale x 4 x i16> @llvm.riscv.vloxei.mask.nxv4i16.nxv4i64( +    <vscale x 4 x i16> %0, +    ptr %1, +    <vscale x 4 x i64> %2, +    <vscale x 4 x i1> %3, +    i64 %4, i64 1) + +  ret <vscale x 4 x i16> %a +} + +declare <vscale x 8 x i16> @llvm.riscv.vloxei.nxv8i16.nxv8i64( +  <vscale x 8 x i16>, +  ptr, +  <vscale x 8 x i64>, +  i64); + +define <vscale x 8 x i16> @intrinsic_vloxei_v_nxv8i16_nxv8i16_nxv8i64(ptr %0, <vscale x 8 x i64> %1, i64 %2) nounwind { +; CHECK-LABEL: intrinsic_vloxei_v_nxv8i16_nxv8i16_nxv8i64: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e16, m2, ta, ma +; CHECK-NEXT:    vloxei64.v v16, (a0), v8 +; CHECK-NEXT:    vmv.v.v v8, v16 +; CHECK-NEXT:    ret +entry: +  %a = call <vscale x 8 x i16> @llvm.riscv.vloxei.nxv8i16.nxv8i64( +    <vscale x 8 x i16> poison, +    ptr %0, +    <vscale x 8 x i64> %1, +    i64 %2) + +  ret <vscale x 8 x i16> %a +} + +declare <vscale x 8 x i16> @llvm.riscv.vloxei.mask.nxv8i16.nxv8i64( +  <vscale x 8 x i16>, +  ptr, +  <vscale x 8 x i64>, +  <vscale x 8 x i1>, +  i64, +  i64); + +define <vscale x 8 x i16> @intrinsic_vloxei_mask_v_nxv8i16_nxv8i16_nxv8i64(<vscale x 8 x i16> %0, ptr %1, <vscale x 8 x i64> %2, <vscale x 8 x i1> %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vloxei_mask_v_nxv8i16_nxv8i16_nxv8i64: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e16, m2, ta, mu +; CHECK-NEXT:    vloxei64.v v8, (a0), v16, v0.t +; CHECK-NEXT:    ret +entry: +  %a = call <vscale x 8 x i16> @llvm.riscv.vloxei.mask.nxv8i16.nxv8i64( +    <vscale x 8 x i16> %0, +    ptr %1, +    <vscale x 8 x i64> %2, +    <vscale x 8 x i1> %3, +    i64 %4, i64 1) + +  ret <vscale x 8 x i16> %a +} + +declare <vscale x 1 x i32> @llvm.riscv.vloxei.nxv1i32.nxv1i64( +  <vscale x 1 x i32>, +  ptr, +  <vscale x 1 x i64>, +  i64); + +define <vscale x 1 x i32> @intrinsic_vloxei_v_nxv1i32_nxv1i32_nxv1i64(ptr %0, <vscale x 1 x i64> %1, i64 %2) nounwind { +; CHECK-LABEL: intrinsic_vloxei_v_nxv1i32_nxv1i32_nxv1i64: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e32, mf2, ta, ma +; CHECK-NEXT:    vloxei64.v v9, (a0), v8 +; CHECK-NEXT:    vmv1r.v v8, v9 +; CHECK-NEXT:    ret +entry: +  %a = call <vscale x 1 x i32> @llvm.riscv.vloxei.nxv1i32.nxv1i64( +    <vscale x 1 x i32> poison, +    ptr %0, +    <vscale x 1 x i64> %1, +    i64 %2) + +  ret <vscale x 1 x i32> %a +} + +declare <vscale x 1 x i32> @llvm.riscv.vloxei.mask.nxv1i32.nxv1i64( +  <vscale x 1 x i32>, +  ptr, +  <vscale x 1 x i64>, +  <vscale x 1 x i1>, +  i64, +  i64); + +define <vscale x 1 x i32> @intrinsic_vloxei_mask_v_nxv1i32_nxv1i32_nxv1i64(<vscale x 1 x i32> %0, ptr %1, <vscale x 1 x i64> %2, <vscale x 1 x i1> %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vloxei_mask_v_nxv1i32_nxv1i32_nxv1i64: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e32, mf2, ta, mu +; CHECK-NEXT:    vloxei64.v v8, (a0), v9, v0.t +; CHECK-NEXT:    ret +entry: +  %a = call <vscale x 1 x i32> @llvm.riscv.vloxei.mask.nxv1i32.nxv1i64( +    <vscale x 1 x i32> %0, +    ptr %1, +    <vscale x 1 x i64> %2, +    <vscale x 1 x i1> %3, +    i64 %4, i64 1) + +  ret <vscale x 1 x i32> %a +} + +declare <vscale x 2 x i32> @llvm.riscv.vloxei.nxv2i32.nxv2i64( +  <vscale x 2 x i32>, +  ptr, +  <vscale x 2 x i64>, +  i64); + +define <vscale x 2 x i32> @intrinsic_vloxei_v_nxv2i32_nxv2i32_nxv2i64(ptr %0, <vscale x 2 x i64> %1, i64 %2) nounwind { +; CHECK-LABEL: intrinsic_vloxei_v_nxv2i32_nxv2i32_nxv2i64: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e32, m1, ta, ma +; CHECK-NEXT:    vloxei64.v v10, (a0), v8 +; CHECK-NEXT:    vmv.v.v v8, v10 +; CHECK-NEXT:    ret +entry: +  %a = call <vscale x 2 x i32> @llvm.riscv.vloxei.nxv2i32.nxv2i64( +    <vscale x 2 x i32> poison, +    ptr %0, +    <vscale x 2 x i64> %1, +    i64 %2) + +  ret <vscale x 2 x i32> %a +} + +declare <vscale x 2 x i32> @llvm.riscv.vloxei.mask.nxv2i32.nxv2i64( +  <vscale x 2 x i32>, +  ptr, +  <vscale x 2 x i64>, +  <vscale x 2 x i1>, +  i64, +  i64); + +define <vscale x 2 x i32> @intrinsic_vloxei_mask_v_nxv2i32_nxv2i32_nxv2i64(<vscale x 2 x i32> %0, ptr %1, <vscale x 2 x i64> %2, <vscale x 2 x i1> %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vloxei_mask_v_nxv2i32_nxv2i32_nxv2i64: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e32, m1, ta, mu +; CHECK-NEXT:    vloxei64.v v8, (a0), v10, v0.t +; CHECK-NEXT:    ret +entry: +  %a = call <vscale x 2 x i32> @llvm.riscv.vloxei.mask.nxv2i32.nxv2i64( +    <vscale x 2 x i32> %0, +    ptr %1, +    <vscale x 2 x i64> %2, +    <vscale x 2 x i1> %3, +    i64 %4, i64 1) + +  ret <vscale x 2 x i32> %a +} + +declare <vscale x 4 x i32> @llvm.riscv.vloxei.nxv4i32.nxv4i64( +  <vscale x 4 x i32>, +  ptr, +  <vscale x 4 x i64>, +  i64); + +define <vscale x 4 x i32> @intrinsic_vloxei_v_nxv4i32_nxv4i32_nxv4i64(ptr %0, <vscale x 4 x i64> %1, i64 %2) nounwind { +; CHECK-LABEL: intrinsic_vloxei_v_nxv4i32_nxv4i32_nxv4i64: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e32, m2, ta, ma +; CHECK-NEXT:    vloxei64.v v12, (a0), v8 +; CHECK-NEXT:    vmv.v.v v8, v12 +; CHECK-NEXT:    ret +entry: +  %a = call <vscale x 4 x i32> @llvm.riscv.vloxei.nxv4i32.nxv4i64( +    <vscale x 4 x i32> poison, +    ptr %0, +    <vscale x 4 x i64> %1, +    i64 %2) + +  ret <vscale x 4 x i32> %a +} + +declare <vscale x 4 x i32> @llvm.riscv.vloxei.mask.nxv4i32.nxv4i64( +  <vscale x 4 x i32>, +  ptr, +  <vscale x 4 x i64>, +  <vscale x 4 x i1>, +  i64, +  i64); + +define <vscale x 4 x i32> @intrinsic_vloxei_mask_v_nxv4i32_nxv4i32_nxv4i64(<vscale x 4 x i32> %0, ptr %1, <vscale x 4 x i64> %2, <vscale x 4 x i1> %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vloxei_mask_v_nxv4i32_nxv4i32_nxv4i64: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e32, m2, ta, mu +; CHECK-NEXT:    vloxei64.v v8, (a0), v12, v0.t +; CHECK-NEXT:    ret +entry: +  %a = call <vscale x 4 x i32> @llvm.riscv.vloxei.mask.nxv4i32.nxv4i64( +    <vscale x 4 x i32> %0, +    ptr %1, +    <vscale x 4 x i64> %2, +    <vscale x 4 x i1> %3, +    i64 %4, i64 1) + +  ret <vscale x 4 x i32> %a +} + +declare <vscale x 8 x i32> @llvm.riscv.vloxei.nxv8i32.nxv8i64( +  <vscale x 8 x i32>, +  ptr, +  <vscale x 8 x i64>, +  i64); + +define <vscale x 8 x i32> @intrinsic_vloxei_v_nxv8i32_nxv8i32_nxv8i64(ptr %0, <vscale x 8 x i64> %1, i64 %2) nounwind { +; CHECK-LABEL: intrinsic_vloxei_v_nxv8i32_nxv8i32_nxv8i64: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e32, m4, ta, ma +; CHECK-NEXT:    vloxei64.v v16, (a0), v8 +; CHECK-NEXT:    vmv.v.v v8, v16 +; CHECK-NEXT:    ret +entry: +  %a = call <vscale x 8 x i32> @llvm.riscv.vloxei.nxv8i32.nxv8i64( +    <vscale x 8 x i32> poison, +    ptr %0, +    <vscale x 8 x i64> %1, +    i64 %2) + +  ret <vscale x 8 x i32> %a +} + +declare <vscale x 8 x i32> @llvm.riscv.vloxei.mask.nxv8i32.nxv8i64( +  <vscale x 8 x i32>, +  ptr, +  <vscale x 8 x i64>, +  <vscale x 8 x i1>, +  i64, +  i64); + +define <vscale x 8 x i32> @intrinsic_vloxei_mask_v_nxv8i32_nxv8i32_nxv8i64(<vscale x 8 x i32> %0, ptr %1, <vscale x 8 x i64> %2, <vscale x 8 x i1> %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vloxei_mask_v_nxv8i32_nxv8i32_nxv8i64: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e32, m4, ta, mu +; CHECK-NEXT:    vloxei64.v v8, (a0), v16, v0.t +; CHECK-NEXT:    ret +entry: +  %a = call <vscale x 8 x i32> @llvm.riscv.vloxei.mask.nxv8i32.nxv8i64( +    <vscale x 8 x i32> %0, +    ptr %1, +    <vscale x 8 x i64> %2, +    <vscale x 8 x i1> %3, +    i64 %4, i64 1) + +  ret <vscale x 8 x i32> %a +} + +declare <vscale x 1 x i64> @llvm.riscv.vloxei.nxv1i64.nxv1i64( +  <vscale x 1 x i64>, +  ptr, +  <vscale x 1 x i64>, +  i64); + +define <vscale x 1 x i64> @intrinsic_vloxei_v_nxv1i64_nxv1i64_nxv1i64(ptr %0, <vscale x 1 x i64> %1, i64 %2) nounwind { +; CHECK-LABEL: intrinsic_vloxei_v_nxv1i64_nxv1i64_nxv1i64: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e64, m1, ta, ma +; CHECK-NEXT:    vloxei64.v v8, (a0), v8 +; CHECK-NEXT:    ret +entry: +  %a = call <vscale x 1 x i64> @llvm.riscv.vloxei.nxv1i64.nxv1i64( +    <vscale x 1 x i64> poison, +    ptr %0, +    <vscale x 1 x i64> %1, +    i64 %2) + +  ret <vscale x 1 x i64> %a +} + +declare <vscale x 1 x i64> @llvm.riscv.vloxei.mask.nxv1i64.nxv1i64( +  <vscale x 1 x i64>, +  ptr, +  <vscale x 1 x i64>, +  <vscale x 1 x i1>, +  i64, +  i64); + +define <vscale x 1 x i64> @intrinsic_vloxei_mask_v_nxv1i64_nxv1i64_nxv1i64(<vscale x 1 x i64> %0, ptr %1, <vscale x 1 x i64> %2, <vscale x 1 x i1> %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vloxei_mask_v_nxv1i64_nxv1i64_nxv1i64: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e64, m1, ta, mu +; CHECK-NEXT:    vloxei64.v v8, (a0), v9, v0.t +; CHECK-NEXT:    ret +entry: +  %a = call <vscale x 1 x i64> @llvm.riscv.vloxei.mask.nxv1i64.nxv1i64( +    <vscale x 1 x i64> %0, +    ptr %1, +    <vscale x 1 x i64> %2, +    <vscale x 1 x i1> %3, +    i64 %4, i64 1) + +  ret <vscale x 1 x i64> %a +} + +declare <vscale x 2 x i64> @llvm.riscv.vloxei.nxv2i64.nxv2i64( +  <vscale x 2 x i64>, +  ptr, +  <vscale x 2 x i64>, +  i64); + +define <vscale x 2 x i64> @intrinsic_vloxei_v_nxv2i64_nxv2i64_nxv2i64(ptr %0, <vscale x 2 x i64> %1, i64 %2) nounwind { +; CHECK-LABEL: intrinsic_vloxei_v_nxv2i64_nxv2i64_nxv2i64: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e64, m2, ta, ma +; CHECK-NEXT:    vloxei64.v v8, (a0), v8 +; CHECK-NEXT:    ret +entry: +  %a = call <vscale x 2 x i64> @llvm.riscv.vloxei.nxv2i64.nxv2i64( +    <vscale x 2 x i64> poison, +    ptr %0, +    <vscale x 2 x i64> %1, +    i64 %2) + +  ret <vscale x 2 x i64> %a +} + +declare <vscale x 2 x i64> @llvm.riscv.vloxei.mask.nxv2i64.nxv2i64( +  <vscale x 2 x i64>, +  ptr, +  <vscale x 2 x i64>, +  <vscale x 2 x i1>, +  i64, +  i64); + +define <vscale x 2 x i64> @intrinsic_vloxei_mask_v_nxv2i64_nxv2i64_nxv2i64(<vscale x 2 x i64> %0, ptr %1, <vscale x 2 x i64> %2, <vscale x 2 x i1> %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vloxei_mask_v_nxv2i64_nxv2i64_nxv2i64: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e64, m2, ta, mu +; CHECK-NEXT:    vloxei64.v v8, (a0), v10, v0.t +; CHECK-NEXT:    ret +entry: +  %a = call <vscale x 2 x i64> @llvm.riscv.vloxei.mask.nxv2i64.nxv2i64( +    <vscale x 2 x i64> %0, +    ptr %1, +    <vscale x 2 x i64> %2, +    <vscale x 2 x i1> %3, +    i64 %4, i64 1) + +  ret <vscale x 2 x i64> %a +} + +declare <vscale x 4 x i64> @llvm.riscv.vloxei.nxv4i64.nxv4i64( +  <vscale x 4 x i64>, +  ptr, +  <vscale x 4 x i64>, +  i64); + +define <vscale x 4 x i64> @intrinsic_vloxei_v_nxv4i64_nxv4i64_nxv4i64(ptr %0, <vscale x 4 x i64> %1, i64 %2) nounwind { +; CHECK-LABEL: intrinsic_vloxei_v_nxv4i64_nxv4i64_nxv4i64: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e64, m4, ta, ma +; CHECK-NEXT:    vloxei64.v v8, (a0), v8 +; CHECK-NEXT:    ret +entry: +  %a = call <vscale x 4 x i64> @llvm.riscv.vloxei.nxv4i64.nxv4i64( +    <vscale x 4 x i64> poison, +    ptr %0, +    <vscale x 4 x i64> %1, +    i64 %2) + +  ret <vscale x 4 x i64> %a +} + +declare <vscale x 4 x i64> @llvm.riscv.vloxei.mask.nxv4i64.nxv4i64( +  <vscale x 4 x i64>, +  ptr, +  <vscale x 4 x i64>, +  <vscale x 4 x i1>, +  i64, +  i64); + +define <vscale x 4 x i64> @intrinsic_vloxei_mask_v_nxv4i64_nxv4i64_nxv4i64(<vscale x 4 x i64> %0, ptr %1, <vscale x 4 x i64> %2, <vscale x 4 x i1> %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vloxei_mask_v_nxv4i64_nxv4i64_nxv4i64: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e64, m4, ta, mu +; CHECK-NEXT:    vloxei64.v v8, (a0), v12, v0.t +; CHECK-NEXT:    ret +entry: +  %a = call <vscale x 4 x i64> @llvm.riscv.vloxei.mask.nxv4i64.nxv4i64( +    <vscale x 4 x i64> %0, +    ptr %1, +    <vscale x 4 x i64> %2, +    <vscale x 4 x i1> %3, +    i64 %4, i64 1) + +  ret <vscale x 4 x i64> %a +} + +declare <vscale x 8 x i64> @llvm.riscv.vloxei.nxv8i64.nxv8i64( +  <vscale x 8 x i64>, +  ptr, +  <vscale x 8 x i64>, +  i64); + +define <vscale x 8 x i64> @intrinsic_vloxei_v_nxv8i64_nxv8i64_nxv8i64(ptr %0, <vscale x 8 x i64> %1, i64 %2) nounwind { +; CHECK-LABEL: intrinsic_vloxei_v_nxv8i64_nxv8i64_nxv8i64: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e64, m8, ta, ma +; CHECK-NEXT:    vloxei64.v v8, (a0), v8 +; CHECK-NEXT:    ret +entry: +  %a = call <vscale x 8 x i64> @llvm.riscv.vloxei.nxv8i64.nxv8i64( +    <vscale x 8 x i64> poison, +    ptr %0, +    <vscale x 8 x i64> %1, +    i64 %2) + +  ret <vscale x 8 x i64> %a +} + +declare <vscale x 8 x i64> @llvm.riscv.vloxei.mask.nxv8i64.nxv8i64( +  <vscale x 8 x i64>, +  ptr, +  <vscale x 8 x i64>, +  <vscale x 8 x i1>, +  i64, +  i64); + +define <vscale x 8 x i64> @intrinsic_vloxei_mask_v_nxv8i64_nxv8i64_nxv8i64(<vscale x 8 x i64> %0, ptr %1, <vscale x 8 x i64> %2, <vscale x 8 x i1> %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vloxei_mask_v_nxv8i64_nxv8i64_nxv8i64: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e64, m8, ta, mu +; CHECK-NEXT:    vloxei64.v v8, (a0), v16, v0.t +; CHECK-NEXT:    ret +entry: +  %a = call <vscale x 8 x i64> @llvm.riscv.vloxei.mask.nxv8i64.nxv8i64( +    <vscale x 8 x i64> %0, +    ptr %1, +    <vscale x 8 x i64> %2, +    <vscale x 8 x i1> %3, +    i64 %4, i64 1) + +  ret <vscale x 8 x i64> %a +} + +declare <vscale x 1 x half> @llvm.riscv.vloxei.nxv1f16.nxv1i64( +  <vscale x 1 x half>, +  ptr, +  <vscale x 1 x i64>, +  i64); + +define <vscale x 1 x half> @intrinsic_vloxei_v_nxv1f16_nxv1f16_nxv1i64(ptr %0, <vscale x 1 x i64> %1, i64 %2) nounwind { +; CHECK-LABEL: intrinsic_vloxei_v_nxv1f16_nxv1f16_nxv1i64: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e16, mf4, ta, ma +; CHECK-NEXT:    vloxei64.v v9, (a0), v8 +; CHECK-NEXT:    vmv1r.v v8, v9 +; CHECK-NEXT:    ret +entry: +  %a = call <vscale x 1 x half> @llvm.riscv.vloxei.nxv1f16.nxv1i64( +    <vscale x 1 x half> poison, +    ptr %0, +    <vscale x 1 x i64> %1, +    i64 %2) + +  ret <vscale x 1 x half> %a +} + +declare <vscale x 1 x half> @llvm.riscv.vloxei.mask.nxv1f16.nxv1i64( +  <vscale x 1 x half>, +  ptr, +  <vscale x 1 x i64>, +  <vscale x 1 x i1>, +  i64, +  i64); + +define <vscale x 1 x half> @intrinsic_vloxei_mask_v_nxv1f16_nxv1f16_nxv1i64(<vscale x 1 x half> %0, ptr %1, <vscale x 1 x i64> %2, <vscale x 1 x i1> %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vloxei_mask_v_nxv1f16_nxv1f16_nxv1i64: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e16, mf4, ta, mu +; CHECK-NEXT:    vloxei64.v v8, (a0), v9, v0.t +; CHECK-NEXT:    ret +entry: +  %a = call <vscale x 1 x half> @llvm.riscv.vloxei.mask.nxv1f16.nxv1i64( +    <vscale x 1 x half> %0, +    ptr %1, +    <vscale x 1 x i64> %2, +    <vscale x 1 x i1> %3, +    i64 %4, i64 1) + +  ret <vscale x 1 x half> %a +} + +declare <vscale x 2 x half> @llvm.riscv.vloxei.nxv2f16.nxv2i64( +  <vscale x 2 x half>, +  ptr, +  <vscale x 2 x i64>, +  i64); + +define <vscale x 2 x half> @intrinsic_vloxei_v_nxv2f16_nxv2f16_nxv2i64(ptr %0, <vscale x 2 x i64> %1, i64 %2) nounwind { +; CHECK-LABEL: intrinsic_vloxei_v_nxv2f16_nxv2f16_nxv2i64: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e16, mf2, ta, ma +; CHECK-NEXT:    vloxei64.v v10, (a0), v8 +; CHECK-NEXT:    vmv1r.v v8, v10 +; CHECK-NEXT:    ret +entry: +  %a = call <vscale x 2 x half> @llvm.riscv.vloxei.nxv2f16.nxv2i64( +    <vscale x 2 x half> poison, +    ptr %0, +    <vscale x 2 x i64> %1, +    i64 %2) + +  ret <vscale x 2 x half> %a +} + +declare <vscale x 2 x half> @llvm.riscv.vloxei.mask.nxv2f16.nxv2i64( +  <vscale x 2 x half>, +  ptr, +  <vscale x 2 x i64>, +  <vscale x 2 x i1>, +  i64, +  i64); + +define <vscale x 2 x half> @intrinsic_vloxei_mask_v_nxv2f16_nxv2f16_nxv2i64(<vscale x 2 x half> %0, ptr %1, <vscale x 2 x i64> %2, <vscale x 2 x i1> %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vloxei_mask_v_nxv2f16_nxv2f16_nxv2i64: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e16, mf2, ta, mu +; CHECK-NEXT:    vloxei64.v v8, (a0), v10, v0.t +; CHECK-NEXT:    ret +entry: +  %a = call <vscale x 2 x half> @llvm.riscv.vloxei.mask.nxv2f16.nxv2i64( +    <vscale x 2 x half> %0, +    ptr %1, +    <vscale x 2 x i64> %2, +    <vscale x 2 x i1> %3, +    i64 %4, i64 1) + +  ret <vscale x 2 x half> %a +} + +declare <vscale x 4 x half> @llvm.riscv.vloxei.nxv4f16.nxv4i64( +  <vscale x 4 x half>, +  ptr, +  <vscale x 4 x i64>, +  i64); + +define <vscale x 4 x half> @intrinsic_vloxei_v_nxv4f16_nxv4f16_nxv4i64(ptr %0, <vscale x 4 x i64> %1, i64 %2) nounwind { +; CHECK-LABEL: intrinsic_vloxei_v_nxv4f16_nxv4f16_nxv4i64: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e16, m1, ta, ma +; CHECK-NEXT:    vloxei64.v v12, (a0), v8 +; CHECK-NEXT:    vmv.v.v v8, v12 +; CHECK-NEXT:    ret +entry: +  %a = call <vscale x 4 x half> @llvm.riscv.vloxei.nxv4f16.nxv4i64( +    <vscale x 4 x half> poison, +    ptr %0, +    <vscale x 4 x i64> %1, +    i64 %2) + +  ret <vscale x 4 x half> %a +} + +declare <vscale x 4 x half> @llvm.riscv.vloxei.mask.nxv4f16.nxv4i64( +  <vscale x 4 x half>, +  ptr, +  <vscale x 4 x i64>, +  <vscale x 4 x i1>, +  i64, +  i64); + +define <vscale x 4 x half> @intrinsic_vloxei_mask_v_nxv4f16_nxv4f16_nxv4i64(<vscale x 4 x half> %0, ptr %1, <vscale x 4 x i64> %2, <vscale x 4 x i1> %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vloxei_mask_v_nxv4f16_nxv4f16_nxv4i64: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e16, m1, ta, mu +; CHECK-NEXT:    vloxei64.v v8, (a0), v12, v0.t +; CHECK-NEXT:    ret +entry: +  %a = call <vscale x 4 x half> @llvm.riscv.vloxei.mask.nxv4f16.nxv4i64( +    <vscale x 4 x half> %0, +    ptr %1, +    <vscale x 4 x i64> %2, +    <vscale x 4 x i1> %3, +    i64 %4, i64 1) + +  ret <vscale x 4 x half> %a +} + +declare <vscale x 8 x half> @llvm.riscv.vloxei.nxv8f16.nxv8i64( +  <vscale x 8 x half>, +  ptr, +  <vscale x 8 x i64>, +  i64); + +define <vscale x 8 x half> @intrinsic_vloxei_v_nxv8f16_nxv8f16_nxv8i64(ptr %0, <vscale x 8 x i64> %1, i64 %2) nounwind { +; CHECK-LABEL: intrinsic_vloxei_v_nxv8f16_nxv8f16_nxv8i64: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e16, m2, ta, ma +; CHECK-NEXT:    vloxei64.v v16, (a0), v8 +; CHECK-NEXT:    vmv.v.v v8, v16 +; CHECK-NEXT:    ret +entry: +  %a = call <vscale x 8 x half> @llvm.riscv.vloxei.nxv8f16.nxv8i64( +    <vscale x 8 x half> poison, +    ptr %0, +    <vscale x 8 x i64> %1, +    i64 %2) + +  ret <vscale x 8 x half> %a +} + +declare <vscale x 8 x half> @llvm.riscv.vloxei.mask.nxv8f16.nxv8i64( +  <vscale x 8 x half>, +  ptr, +  <vscale x 8 x i64>, +  <vscale x 8 x i1>, +  i64, +  i64); + +define <vscale x 8 x half> @intrinsic_vloxei_mask_v_nxv8f16_nxv8f16_nxv8i64(<vscale x 8 x half> %0, ptr %1, <vscale x 8 x i64> %2, <vscale x 8 x i1> %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vloxei_mask_v_nxv8f16_nxv8f16_nxv8i64: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e16, m2, ta, mu +; CHECK-NEXT:    vloxei64.v v8, (a0), v16, v0.t +; CHECK-NEXT:    ret +entry: +  %a = call <vscale x 8 x half> @llvm.riscv.vloxei.mask.nxv8f16.nxv8i64( +    <vscale x 8 x half> %0, +    ptr %1, +    <vscale x 8 x i64> %2, +    <vscale x 8 x i1> %3, +    i64 %4, i64 1) + +  ret <vscale x 8 x half> %a +} + +declare <vscale x 1 x float> @llvm.riscv.vloxei.nxv1f32.nxv1i64( +  <vscale x 1 x float>, +  ptr, +  <vscale x 1 x i64>, +  i64); + +define <vscale x 1 x float> @intrinsic_vloxei_v_nxv1f32_nxv1f32_nxv1i64(ptr %0, <vscale x 1 x i64> %1, i64 %2) nounwind { +; CHECK-LABEL: intrinsic_vloxei_v_nxv1f32_nxv1f32_nxv1i64: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e32, mf2, ta, ma +; CHECK-NEXT:    vloxei64.v v9, (a0), v8 +; CHECK-NEXT:    vmv1r.v v8, v9 +; CHECK-NEXT:    ret +entry: +  %a = call <vscale x 1 x float> @llvm.riscv.vloxei.nxv1f32.nxv1i64( +    <vscale x 1 x float> poison, +    ptr %0, +    <vscale x 1 x i64> %1, +    i64 %2) + +  ret <vscale x 1 x float> %a +} + +declare <vscale x 1 x float> @llvm.riscv.vloxei.mask.nxv1f32.nxv1i64( +  <vscale x 1 x float>, +  ptr, +  <vscale x 1 x i64>, +  <vscale x 1 x i1>, +  i64, +  i64); + +define <vscale x 1 x float> @intrinsic_vloxei_mask_v_nxv1f32_nxv1f32_nxv1i64(<vscale x 1 x float> %0, ptr %1, <vscale x 1 x i64> %2, <vscale x 1 x i1> %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vloxei_mask_v_nxv1f32_nxv1f32_nxv1i64: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e32, mf2, ta, mu +; CHECK-NEXT:    vloxei64.v v8, (a0), v9, v0.t +; CHECK-NEXT:    ret +entry: +  %a = call <vscale x 1 x float> @llvm.riscv.vloxei.mask.nxv1f32.nxv1i64( +    <vscale x 1 x float> %0, +    ptr %1, +    <vscale x 1 x i64> %2, +    <vscale x 1 x i1> %3, +    i64 %4, i64 1) + +  ret <vscale x 1 x float> %a +} + +declare <vscale x 2 x float> @llvm.riscv.vloxei.nxv2f32.nxv2i64( +  <vscale x 2 x float>, +  ptr, +  <vscale x 2 x i64>, +  i64); + +define <vscale x 2 x float> @intrinsic_vloxei_v_nxv2f32_nxv2f32_nxv2i64(ptr %0, <vscale x 2 x i64> %1, i64 %2) nounwind { +; CHECK-LABEL: intrinsic_vloxei_v_nxv2f32_nxv2f32_nxv2i64: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e32, m1, ta, ma +; CHECK-NEXT:    vloxei64.v v10, (a0), v8 +; CHECK-NEXT:    vmv.v.v v8, v10 +; CHECK-NEXT:    ret +entry: +  %a = call <vscale x 2 x float> @llvm.riscv.vloxei.nxv2f32.nxv2i64( +    <vscale x 2 x float> poison, +    ptr %0, +    <vscale x 2 x i64> %1, +    i64 %2) + +  ret <vscale x 2 x float> %a +} + +declare <vscale x 2 x float> @llvm.riscv.vloxei.mask.nxv2f32.nxv2i64( +  <vscale x 2 x float>, +  ptr, +  <vscale x 2 x i64>, +  <vscale x 2 x i1>, +  i64, +  i64); + +define <vscale x 2 x float> @intrinsic_vloxei_mask_v_nxv2f32_nxv2f32_nxv2i64(<vscale x 2 x float> %0, ptr %1, <vscale x 2 x i64> %2, <vscale x 2 x i1> %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vloxei_mask_v_nxv2f32_nxv2f32_nxv2i64: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e32, m1, ta, mu +; CHECK-NEXT:    vloxei64.v v8, (a0), v10, v0.t +; CHECK-NEXT:    ret +entry: +  %a = call <vscale x 2 x float> @llvm.riscv.vloxei.mask.nxv2f32.nxv2i64( +    <vscale x 2 x float> %0, +    ptr %1, +    <vscale x 2 x i64> %2, +    <vscale x 2 x i1> %3, +    i64 %4, i64 1) + +  ret <vscale x 2 x float> %a +} + +declare <vscale x 4 x float> @llvm.riscv.vloxei.nxv4f32.nxv4i64( +  <vscale x 4 x float>, +  ptr, +  <vscale x 4 x i64>, +  i64); + +define <vscale x 4 x float> @intrinsic_vloxei_v_nxv4f32_nxv4f32_nxv4i64(ptr %0, <vscale x 4 x i64> %1, i64 %2) nounwind { +; CHECK-LABEL: intrinsic_vloxei_v_nxv4f32_nxv4f32_nxv4i64: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e32, m2, ta, ma +; CHECK-NEXT:    vloxei64.v v12, (a0), v8 +; CHECK-NEXT:    vmv.v.v v8, v12 +; CHECK-NEXT:    ret +entry: +  %a = call <vscale x 4 x float> @llvm.riscv.vloxei.nxv4f32.nxv4i64( +    <vscale x 4 x float> poison, +    ptr %0, +    <vscale x 4 x i64> %1, +    i64 %2) + +  ret <vscale x 4 x float> %a +} + +declare <vscale x 4 x float> @llvm.riscv.vloxei.mask.nxv4f32.nxv4i64( +  <vscale x 4 x float>, +  ptr, +  <vscale x 4 x i64>, +  <vscale x 4 x i1>, +  i64, +  i64); + +define <vscale x 4 x float> @intrinsic_vloxei_mask_v_nxv4f32_nxv4f32_nxv4i64(<vscale x 4 x float> %0, ptr %1, <vscale x 4 x i64> %2, <vscale x 4 x i1> %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vloxei_mask_v_nxv4f32_nxv4f32_nxv4i64: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e32, m2, ta, mu +; CHECK-NEXT:    vloxei64.v v8, (a0), v12, v0.t +; CHECK-NEXT:    ret +entry: +  %a = call <vscale x 4 x float> @llvm.riscv.vloxei.mask.nxv4f32.nxv4i64( +    <vscale x 4 x float> %0, +    ptr %1, +    <vscale x 4 x i64> %2, +    <vscale x 4 x i1> %3, +    i64 %4, i64 1) + +  ret <vscale x 4 x float> %a +} + +declare <vscale x 8 x float> @llvm.riscv.vloxei.nxv8f32.nxv8i64( +  <vscale x 8 x float>, +  ptr, +  <vscale x 8 x i64>, +  i64); + +define <vscale x 8 x float> @intrinsic_vloxei_v_nxv8f32_nxv8f32_nxv8i64(ptr %0, <vscale x 8 x i64> %1, i64 %2) nounwind { +; CHECK-LABEL: intrinsic_vloxei_v_nxv8f32_nxv8f32_nxv8i64: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e32, m4, ta, ma +; CHECK-NEXT:    vloxei64.v v16, (a0), v8 +; CHECK-NEXT:    vmv.v.v v8, v16 +; CHECK-NEXT:    ret +entry: +  %a = call <vscale x 8 x float> @llvm.riscv.vloxei.nxv8f32.nxv8i64( +    <vscale x 8 x float> poison, +    ptr %0, +    <vscale x 8 x i64> %1, +    i64 %2) + +  ret <vscale x 8 x float> %a +} + +declare <vscale x 8 x float> @llvm.riscv.vloxei.mask.nxv8f32.nxv8i64( +  <vscale x 8 x float>, +  ptr, +  <vscale x 8 x i64>, +  <vscale x 8 x i1>, +  i64, +  i64); + +define <vscale x 8 x float> @intrinsic_vloxei_mask_v_nxv8f32_nxv8f32_nxv8i64(<vscale x 8 x float> %0, ptr %1, <vscale x 8 x i64> %2, <vscale x 8 x i1> %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vloxei_mask_v_nxv8f32_nxv8f32_nxv8i64: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e32, m4, ta, mu +; CHECK-NEXT:    vloxei64.v v8, (a0), v16, v0.t +; CHECK-NEXT:    ret +entry: +  %a = call <vscale x 8 x float> @llvm.riscv.vloxei.mask.nxv8f32.nxv8i64( +    <vscale x 8 x float> %0, +    ptr %1, +    <vscale x 8 x i64> %2, +    <vscale x 8 x i1> %3, +    i64 %4, i64 1) + +  ret <vscale x 8 x float> %a +} + +declare <vscale x 1 x double> @llvm.riscv.vloxei.nxv1f64.nxv1i64( +  <vscale x 1 x double>, +  ptr, +  <vscale x 1 x i64>, +  i64); + +define <vscale x 1 x double> @intrinsic_vloxei_v_nxv1f64_nxv1f64_nxv1i64(ptr %0, <vscale x 1 x i64> %1, i64 %2) nounwind { +; CHECK-LABEL: intrinsic_vloxei_v_nxv1f64_nxv1f64_nxv1i64: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e64, m1, ta, ma +; CHECK-NEXT:    vloxei64.v v8, (a0), v8 +; CHECK-NEXT:    ret +entry: +  %a = call <vscale x 1 x double> @llvm.riscv.vloxei.nxv1f64.nxv1i64( +    <vscale x 1 x double> poison, +    ptr %0, +    <vscale x 1 x i64> %1, +    i64 %2) + +  ret <vscale x 1 x double> %a +} + +declare <vscale x 1 x double> @llvm.riscv.vloxei.mask.nxv1f64.nxv1i64( +  <vscale x 1 x double>, +  ptr, +  <vscale x 1 x i64>, +  <vscale x 1 x i1>, +  i64, +  i64); + +define <vscale x 1 x double> @intrinsic_vloxei_mask_v_nxv1f64_nxv1f64_nxv1i64(<vscale x 1 x double> %0, ptr %1, <vscale x 1 x i64> %2, <vscale x 1 x i1> %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vloxei_mask_v_nxv1f64_nxv1f64_nxv1i64: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e64, m1, ta, mu +; CHECK-NEXT:    vloxei64.v v8, (a0), v9, v0.t +; CHECK-NEXT:    ret +entry: +  %a = call <vscale x 1 x double> @llvm.riscv.vloxei.mask.nxv1f64.nxv1i64( +    <vscale x 1 x double> %0, +    ptr %1, +    <vscale x 1 x i64> %2, +    <vscale x 1 x i1> %3, +    i64 %4, i64 1) + +  ret <vscale x 1 x double> %a +} + +declare <vscale x 2 x double> @llvm.riscv.vloxei.nxv2f64.nxv2i64( +  <vscale x 2 x double>, +  ptr, +  <vscale x 2 x i64>, +  i64); + +define <vscale x 2 x double> @intrinsic_vloxei_v_nxv2f64_nxv2f64_nxv2i64(ptr %0, <vscale x 2 x i64> %1, i64 %2) nounwind { +; CHECK-LABEL: intrinsic_vloxei_v_nxv2f64_nxv2f64_nxv2i64: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e64, m2, ta, ma +; CHECK-NEXT:    vloxei64.v v8, (a0), v8 +; CHECK-NEXT:    ret +entry: +  %a = call <vscale x 2 x double> @llvm.riscv.vloxei.nxv2f64.nxv2i64( +    <vscale x 2 x double> poison, +    ptr %0, +    <vscale x 2 x i64> %1, +    i64 %2) + +  ret <vscale x 2 x double> %a +} + +declare <vscale x 2 x double> @llvm.riscv.vloxei.mask.nxv2f64.nxv2i64( +  <vscale x 2 x double>, +  ptr, +  <vscale x 2 x i64>, +  <vscale x 2 x i1>, +  i64, +  i64); + +define <vscale x 2 x double> @intrinsic_vloxei_mask_v_nxv2f64_nxv2f64_nxv2i64(<vscale x 2 x double> %0, ptr %1, <vscale x 2 x i64> %2, <vscale x 2 x i1> %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vloxei_mask_v_nxv2f64_nxv2f64_nxv2i64: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e64, m2, ta, mu +; CHECK-NEXT:    vloxei64.v v8, (a0), v10, v0.t +; CHECK-NEXT:    ret +entry: +  %a = call <vscale x 2 x double> @llvm.riscv.vloxei.mask.nxv2f64.nxv2i64( +    <vscale x 2 x double> %0, +    ptr %1, +    <vscale x 2 x i64> %2, +    <vscale x 2 x i1> %3, +    i64 %4, i64 1) + +  ret <vscale x 2 x double> %a +} + +declare <vscale x 4 x double> @llvm.riscv.vloxei.nxv4f64.nxv4i64( +  <vscale x 4 x double>, +  ptr, +  <vscale x 4 x i64>, +  i64); + +define <vscale x 4 x double> @intrinsic_vloxei_v_nxv4f64_nxv4f64_nxv4i64(ptr %0, <vscale x 4 x i64> %1, i64 %2) nounwind { +; CHECK-LABEL: intrinsic_vloxei_v_nxv4f64_nxv4f64_nxv4i64: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e64, m4, ta, ma +; CHECK-NEXT:    vloxei64.v v8, (a0), v8 +; CHECK-NEXT:    ret +entry: +  %a = call <vscale x 4 x double> @llvm.riscv.vloxei.nxv4f64.nxv4i64( +    <vscale x 4 x double> poison, +    ptr %0, +    <vscale x 4 x i64> %1, +    i64 %2) + +  ret <vscale x 4 x double> %a +} + +declare <vscale x 4 x double> @llvm.riscv.vloxei.mask.nxv4f64.nxv4i64( +  <vscale x 4 x double>, +  ptr, +  <vscale x 4 x i64>, +  <vscale x 4 x i1>, +  i64, +  i64); + +define <vscale x 4 x double> @intrinsic_vloxei_mask_v_nxv4f64_nxv4f64_nxv4i64(<vscale x 4 x double> %0, ptr %1, <vscale x 4 x i64> %2, <vscale x 4 x i1> %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vloxei_mask_v_nxv4f64_nxv4f64_nxv4i64: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e64, m4, ta, mu +; CHECK-NEXT:    vloxei64.v v8, (a0), v12, v0.t +; CHECK-NEXT:    ret +entry: +  %a = call <vscale x 4 x double> @llvm.riscv.vloxei.mask.nxv4f64.nxv4i64( +    <vscale x 4 x double> %0, +    ptr %1, +    <vscale x 4 x i64> %2, +    <vscale x 4 x i1> %3, +    i64 %4, i64 1) + +  ret <vscale x 4 x double> %a +} + +declare <vscale x 8 x double> @llvm.riscv.vloxei.nxv8f64.nxv8i64( +  <vscale x 8 x double>, +  ptr, +  <vscale x 8 x i64>, +  i64); + +define <vscale x 8 x double> @intrinsic_vloxei_v_nxv8f64_nxv8f64_nxv8i64(ptr %0, <vscale x 8 x i64> %1, i64 %2) nounwind { +; CHECK-LABEL: intrinsic_vloxei_v_nxv8f64_nxv8f64_nxv8i64: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e64, m8, ta, ma +; CHECK-NEXT:    vloxei64.v v8, (a0), v8 +; CHECK-NEXT:    ret +entry: +  %a = call <vscale x 8 x double> @llvm.riscv.vloxei.nxv8f64.nxv8i64( +    <vscale x 8 x double> poison, +    ptr %0, +    <vscale x 8 x i64> %1, +    i64 %2) + +  ret <vscale x 8 x double> %a +} + +declare <vscale x 8 x double> @llvm.riscv.vloxei.mask.nxv8f64.nxv8i64( +  <vscale x 8 x double>, +  ptr, +  <vscale x 8 x i64>, +  <vscale x 8 x i1>, +  i64, +  i64); + +define <vscale x 8 x double> @intrinsic_vloxei_mask_v_nxv8f64_nxv8f64_nxv8i64(<vscale x 8 x double> %0, ptr %1, <vscale x 8 x i64> %2, <vscale x 8 x i1> %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vloxei_mask_v_nxv8f64_nxv8f64_nxv8i64: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e64, m8, ta, mu +; CHECK-NEXT:    vloxei64.v v8, (a0), v16, v0.t +; CHECK-NEXT:    ret +entry: +  %a = call <vscale x 8 x double> @llvm.riscv.vloxei.mask.nxv8f64.nxv8i64( +    <vscale x 8 x double> %0, +    ptr %1, +    <vscale x 8 x i64> %2, +    <vscale x 8 x i1> %3, +    i64 %4, i64 1) + +  ret <vscale x 8 x double> %a +} diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/rvv/vloxei.ll b/llvm/test/CodeGen/RISCV/GlobalISel/rvv/vloxei.ll new file mode 100644 index 0000000..fafd45b --- /dev/null +++ b/llvm/test/CodeGen/RISCV/GlobalISel/rvv/vloxei.ll @@ -0,0 +1,5100 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v,+zvfhmin,+zvfbfmin \ +; RUN:   -global-isel -verify-machineinstrs -target-abi=ilp32d | FileCheck %s +; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v,+zvfhmin,+zvfbfmin \ +; RUN:   -global-isel -verify-machineinstrs -target-abi=lp64d | FileCheck %s + +declare <vscale x 1 x i8> @llvm.riscv.vloxei.nxv1i8.nxv1i32( +  <vscale x 1 x i8>, +  ptr, +  <vscale x 1 x i32>, +  iXLen); + +define <vscale x 1 x i8> @intrinsic_vloxei_v_nxv1i8_nxv1i8_nxv1i32(ptr %0, <vscale x 1 x i32> %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vloxei_v_nxv1i8_nxv1i8_nxv1i32: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e8, mf8, ta, ma +; CHECK-NEXT:    vloxei32.v v9, (a0), v8 +; CHECK-NEXT:    vmv1r.v v8, v9 +; CHECK-NEXT:    ret +entry: +  %a = call <vscale x 1 x i8> @llvm.riscv.vloxei.nxv1i8.nxv1i32( +    <vscale x 1 x i8> poison, +    ptr %0, +    <vscale x 1 x i32> %1, +    iXLen %2) + +  ret <vscale x 1 x i8> %a +} + +declare <vscale x 1 x i8> @llvm.riscv.vloxei.mask.nxv1i8.nxv1i32( +  <vscale x 1 x i8>, +  ptr, +  <vscale x 1 x i32>, +  <vscale x 1 x i1>, +  iXLen, +  iXLen); + +define <vscale x 1 x i8> @intrinsic_vloxei_mask_v_nxv1i8_nxv1i8_nxv1i32(<vscale x 1 x i8> %0, ptr %1, <vscale x 1 x i32> %2, <vscale x 1 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vloxei_mask_v_nxv1i8_nxv1i8_nxv1i32: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e8, mf8, ta, mu +; CHECK-NEXT:    vloxei32.v v8, (a0), v9, v0.t +; CHECK-NEXT:    ret +entry: +  %a = call <vscale x 1 x i8> @llvm.riscv.vloxei.mask.nxv1i8.nxv1i32( +    <vscale x 1 x i8> %0, +    ptr %1, +    <vscale x 1 x i32> %2, +    <vscale x 1 x i1> %3, +    iXLen %4, iXLen 1) + +  ret <vscale x 1 x i8> %a +} + +declare <vscale x 2 x i8> @llvm.riscv.vloxei.nxv2i8.nxv2i32( +  <vscale x 2 x i8>, +  ptr, +  <vscale x 2 x i32>, +  iXLen); + +define <vscale x 2 x i8> @intrinsic_vloxei_v_nxv2i8_nxv2i8_nxv2i32(ptr %0, <vscale x 2 x i32> %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vloxei_v_nxv2i8_nxv2i8_nxv2i32: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e8, mf4, ta, ma +; CHECK-NEXT:    vloxei32.v v9, (a0), v8 +; CHECK-NEXT:    vmv1r.v v8, v9 +; CHECK-NEXT:    ret +entry: +  %a = call <vscale x 2 x i8> @llvm.riscv.vloxei.nxv2i8.nxv2i32( +    <vscale x 2 x i8> poison, +    ptr %0, +    <vscale x 2 x i32> %1, +    iXLen %2) + +  ret <vscale x 2 x i8> %a +} + +declare <vscale x 2 x i8> @llvm.riscv.vloxei.mask.nxv2i8.nxv2i32( +  <vscale x 2 x i8>, +  ptr, +  <vscale x 2 x i32>, +  <vscale x 2 x i1>, +  iXLen, +  iXLen); + +define <vscale x 2 x i8> @intrinsic_vloxei_mask_v_nxv2i8_nxv2i8_nxv2i32(<vscale x 2 x i8> %0, ptr %1, <vscale x 2 x i32> %2, <vscale x 2 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vloxei_mask_v_nxv2i8_nxv2i8_nxv2i32: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e8, mf4, ta, mu +; CHECK-NEXT:    vloxei32.v v8, (a0), v9, v0.t +; CHECK-NEXT:    ret +entry: +  %a = call <vscale x 2 x i8> @llvm.riscv.vloxei.mask.nxv2i8.nxv2i32( +    <vscale x 2 x i8> %0, +    ptr %1, +    <vscale x 2 x i32> %2, +    <vscale x 2 x i1> %3, +    iXLen %4, iXLen 1) + +  ret <vscale x 2 x i8> %a +} + +declare <vscale x 4 x i8> @llvm.riscv.vloxei.nxv4i8.nxv4i32( +  <vscale x 4 x i8>, +  ptr, +  <vscale x 4 x i32>, +  iXLen); + +define <vscale x 4 x i8> @intrinsic_vloxei_v_nxv4i8_nxv4i8_nxv4i32(ptr %0, <vscale x 4 x i32> %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vloxei_v_nxv4i8_nxv4i8_nxv4i32: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e8, mf2, ta, ma +; CHECK-NEXT:    vloxei32.v v10, (a0), v8 +; CHECK-NEXT:    vmv1r.v v8, v10 +; CHECK-NEXT:    ret +entry: +  %a = call <vscale x 4 x i8> @llvm.riscv.vloxei.nxv4i8.nxv4i32( +    <vscale x 4 x i8> poison, +    ptr %0, +    <vscale x 4 x i32> %1, +    iXLen %2) + +  ret <vscale x 4 x i8> %a +} + +declare <vscale x 4 x i8> @llvm.riscv.vloxei.mask.nxv4i8.nxv4i32( +  <vscale x 4 x i8>, +  ptr, +  <vscale x 4 x i32>, +  <vscale x 4 x i1>, +  iXLen, +  iXLen); + +define <vscale x 4 x i8> @intrinsic_vloxei_mask_v_nxv4i8_nxv4i8_nxv4i32(<vscale x 4 x i8> %0, ptr %1, <vscale x 4 x i32> %2, <vscale x 4 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vloxei_mask_v_nxv4i8_nxv4i8_nxv4i32: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e8, mf2, ta, mu +; CHECK-NEXT:    vloxei32.v v8, (a0), v10, v0.t +; CHECK-NEXT:    ret +entry: +  %a = call <vscale x 4 x i8> @llvm.riscv.vloxei.mask.nxv4i8.nxv4i32( +    <vscale x 4 x i8> %0, +    ptr %1, +    <vscale x 4 x i32> %2, +    <vscale x 4 x i1> %3, +    iXLen %4, iXLen 1) + +  ret <vscale x 4 x i8> %a +} + +declare <vscale x 8 x i8> @llvm.riscv.vloxei.nxv8i8.nxv8i32( +  <vscale x 8 x i8>, +  ptr, +  <vscale x 8 x i32>, +  iXLen); + +define <vscale x 8 x i8> @intrinsic_vloxei_v_nxv8i8_nxv8i8_nxv8i32(ptr %0, <vscale x 8 x i32> %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vloxei_v_nxv8i8_nxv8i8_nxv8i32: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e8, m1, ta, ma +; CHECK-NEXT:    vloxei32.v v12, (a0), v8 +; CHECK-NEXT:    vmv.v.v v8, v12 +; CHECK-NEXT:    ret +entry: +  %a = call <vscale x 8 x i8> @llvm.riscv.vloxei.nxv8i8.nxv8i32( +    <vscale x 8 x i8> poison, +    ptr %0, +    <vscale x 8 x i32> %1, +    iXLen %2) + +  ret <vscale x 8 x i8> %a +} + +declare <vscale x 8 x i8> @llvm.riscv.vloxei.mask.nxv8i8.nxv8i32( +  <vscale x 8 x i8>, +  ptr, +  <vscale x 8 x i32>, +  <vscale x 8 x i1>, +  iXLen, +  iXLen); + +define <vscale x 8 x i8> @intrinsic_vloxei_mask_v_nxv8i8_nxv8i8_nxv8i32(<vscale x 8 x i8> %0, ptr %1, <vscale x 8 x i32> %2, <vscale x 8 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vloxei_mask_v_nxv8i8_nxv8i8_nxv8i32: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e8, m1, ta, mu +; CHECK-NEXT:    vloxei32.v v8, (a0), v12, v0.t +; CHECK-NEXT:    ret +entry: +  %a = call <vscale x 8 x i8> @llvm.riscv.vloxei.mask.nxv8i8.nxv8i32( +    <vscale x 8 x i8> %0, +    ptr %1, +    <vscale x 8 x i32> %2, +    <vscale x 8 x i1> %3, +    iXLen %4, iXLen 1) + +  ret <vscale x 8 x i8> %a +} + +declare <vscale x 16 x i8> @llvm.riscv.vloxei.nxv16i8.nxv16i32( +  <vscale x 16 x i8>, +  ptr, +  <vscale x 16 x i32>, +  iXLen); + +define <vscale x 16 x i8> @intrinsic_vloxei_v_nxv16i8_nxv16i8_nxv16i32(ptr %0, <vscale x 16 x i32> %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vloxei_v_nxv16i8_nxv16i8_nxv16i32: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e8, m2, ta, ma +; CHECK-NEXT:    vloxei32.v v16, (a0), v8 +; CHECK-NEXT:    vmv.v.v v8, v16 +; CHECK-NEXT:    ret +entry: +  %a = call <vscale x 16 x i8> @llvm.riscv.vloxei.nxv16i8.nxv16i32( +    <vscale x 16 x i8> poison, +    ptr %0, +    <vscale x 16 x i32> %1, +    iXLen %2) + +  ret <vscale x 16 x i8> %a +} + +declare <vscale x 16 x i8> @llvm.riscv.vloxei.mask.nxv16i8.nxv16i32( +  <vscale x 16 x i8>, +  ptr, +  <vscale x 16 x i32>, +  <vscale x 16 x i1>, +  iXLen, +  iXLen); + +define <vscale x 16 x i8> @intrinsic_vloxei_mask_v_nxv16i8_nxv16i8_nxv16i32(<vscale x 16 x i8> %0, ptr %1, <vscale x 16 x i32> %2, <vscale x 16 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vloxei_mask_v_nxv16i8_nxv16i8_nxv16i32: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e8, m2, ta, mu +; CHECK-NEXT:    vloxei32.v v8, (a0), v16, v0.t +; CHECK-NEXT:    ret +entry: +  %a = call <vscale x 16 x i8> @llvm.riscv.vloxei.mask.nxv16i8.nxv16i32( +    <vscale x 16 x i8> %0, +    ptr %1, +    <vscale x 16 x i32> %2, +    <vscale x 16 x i1> %3, +    iXLen %4, iXLen 1) + +  ret <vscale x 16 x i8> %a +} + +declare <vscale x 1 x i16> @llvm.riscv.vloxei.nxv1i16.nxv1i32( +  <vscale x 1 x i16>, +  ptr, +  <vscale x 1 x i32>, +  iXLen); + +define <vscale x 1 x i16> @intrinsic_vloxei_v_nxv1i16_nxv1i16_nxv1i32(ptr %0, <vscale x 1 x i32> %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vloxei_v_nxv1i16_nxv1i16_nxv1i32: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e16, mf4, ta, ma +; CHECK-NEXT:    vloxei32.v v9, (a0), v8 +; CHECK-NEXT:    vmv1r.v v8, v9 +; CHECK-NEXT:    ret +entry: +  %a = call <vscale x 1 x i16> @llvm.riscv.vloxei.nxv1i16.nxv1i32( +    <vscale x 1 x i16> poison, +    ptr %0, +    <vscale x 1 x i32> %1, +    iXLen %2) + +  ret <vscale x 1 x i16> %a +} + +declare <vscale x 1 x i16> @llvm.riscv.vloxei.mask.nxv1i16.nxv1i32( +  <vscale x 1 x i16>, +  ptr, +  <vscale x 1 x i32>, +  <vscale x 1 x i1>, +  iXLen, +  iXLen); + +define <vscale x 1 x i16> @intrinsic_vloxei_mask_v_nxv1i16_nxv1i16_nxv1i32(<vscale x 1 x i16> %0, ptr %1, <vscale x 1 x i32> %2, <vscale x 1 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vloxei_mask_v_nxv1i16_nxv1i16_nxv1i32: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e16, mf4, ta, mu +; CHECK-NEXT:    vloxei32.v v8, (a0), v9, v0.t +; CHECK-NEXT:    ret +entry: +  %a = call <vscale x 1 x i16> @llvm.riscv.vloxei.mask.nxv1i16.nxv1i32( +    <vscale x 1 x i16> %0, +    ptr %1, +    <vscale x 1 x i32> %2, +    <vscale x 1 x i1> %3, +    iXLen %4, iXLen 1) + +  ret <vscale x 1 x i16> %a +} + +declare <vscale x 2 x i16> @llvm.riscv.vloxei.nxv2i16.nxv2i32( +  <vscale x 2 x i16>, +  ptr, +  <vscale x 2 x i32>, +  iXLen); + +define <vscale x 2 x i16> @intrinsic_vloxei_v_nxv2i16_nxv2i16_nxv2i32(ptr %0, <vscale x 2 x i32> %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vloxei_v_nxv2i16_nxv2i16_nxv2i32: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e16, mf2, ta, ma +; CHECK-NEXT:    vloxei32.v v9, (a0), v8 +; CHECK-NEXT:    vmv1r.v v8, v9 +; CHECK-NEXT:    ret +entry: +  %a = call <vscale x 2 x i16> @llvm.riscv.vloxei.nxv2i16.nxv2i32( +    <vscale x 2 x i16> poison, +    ptr %0, +    <vscale x 2 x i32> %1, +    iXLen %2) + +  ret <vscale x 2 x i16> %a +} + +declare <vscale x 2 x i16> @llvm.riscv.vloxei.mask.nxv2i16.nxv2i32( +  <vscale x 2 x i16>, +  ptr, +  <vscale x 2 x i32>, +  <vscale x 2 x i1>, +  iXLen, +  iXLen); + +define <vscale x 2 x i16> @intrinsic_vloxei_mask_v_nxv2i16_nxv2i16_nxv2i32(<vscale x 2 x i16> %0, ptr %1, <vscale x 2 x i32> %2, <vscale x 2 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vloxei_mask_v_nxv2i16_nxv2i16_nxv2i32: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e16, mf2, ta, mu +; CHECK-NEXT:    vloxei32.v v8, (a0), v9, v0.t +; CHECK-NEXT:    ret +entry: +  %a = call <vscale x 2 x i16> @llvm.riscv.vloxei.mask.nxv2i16.nxv2i32( +    <vscale x 2 x i16> %0, +    ptr %1, +    <vscale x 2 x i32> %2, +    <vscale x 2 x i1> %3, +    iXLen %4, iXLen 1) + +  ret <vscale x 2 x i16> %a +} + +declare <vscale x 4 x i16> @llvm.riscv.vloxei.nxv4i16.nxv4i32( +  <vscale x 4 x i16>, +  ptr, +  <vscale x 4 x i32>, +  iXLen); + +define <vscale x 4 x i16> @intrinsic_vloxei_v_nxv4i16_nxv4i16_nxv4i32(ptr %0, <vscale x 4 x i32> %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vloxei_v_nxv4i16_nxv4i16_nxv4i32: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e16, m1, ta, ma +; CHECK-NEXT:    vloxei32.v v10, (a0), v8 +; CHECK-NEXT:    vmv.v.v v8, v10 +; CHECK-NEXT:    ret +entry: +  %a = call <vscale x 4 x i16> @llvm.riscv.vloxei.nxv4i16.nxv4i32( +    <vscale x 4 x i16> poison, +    ptr %0, +    <vscale x 4 x i32> %1, +    iXLen %2) + +  ret <vscale x 4 x i16> %a +} + +declare <vscale x 4 x i16> @llvm.riscv.vloxei.mask.nxv4i16.nxv4i32( +  <vscale x 4 x i16>, +  ptr, +  <vscale x 4 x i32>, +  <vscale x 4 x i1>, +  iXLen, +  iXLen); + +define <vscale x 4 x i16> @intrinsic_vloxei_mask_v_nxv4i16_nxv4i16_nxv4i32(<vscale x 4 x i16> %0, ptr %1, <vscale x 4 x i32> %2, <vscale x 4 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vloxei_mask_v_nxv4i16_nxv4i16_nxv4i32: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e16, m1, ta, mu +; CHECK-NEXT:    vloxei32.v v8, (a0), v10, v0.t +; CHECK-NEXT:    ret +entry: +  %a = call <vscale x 4 x i16> @llvm.riscv.vloxei.mask.nxv4i16.nxv4i32( +    <vscale x 4 x i16> %0, +    ptr %1, +    <vscale x 4 x i32> %2, +    <vscale x 4 x i1> %3, +    iXLen %4, iXLen 1) + +  ret <vscale x 4 x i16> %a +} + +declare <vscale x 8 x i16> @llvm.riscv.vloxei.nxv8i16.nxv8i32( +  <vscale x 8 x i16>, +  ptr, +  <vscale x 8 x i32>, +  iXLen); + +define <vscale x 8 x i16> @intrinsic_vloxei_v_nxv8i16_nxv8i16_nxv8i32(ptr %0, <vscale x 8 x i32> %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vloxei_v_nxv8i16_nxv8i16_nxv8i32: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e16, m2, ta, ma +; CHECK-NEXT:    vloxei32.v v12, (a0), v8 +; CHECK-NEXT:    vmv.v.v v8, v12 +; CHECK-NEXT:    ret +entry: +  %a = call <vscale x 8 x i16> @llvm.riscv.vloxei.nxv8i16.nxv8i32( +    <vscale x 8 x i16> poison, +    ptr %0, +    <vscale x 8 x i32> %1, +    iXLen %2) + +  ret <vscale x 8 x i16> %a +} + +declare <vscale x 8 x i16> @llvm.riscv.vloxei.mask.nxv8i16.nxv8i32( +  <vscale x 8 x i16>, +  ptr, +  <vscale x 8 x i32>, +  <vscale x 8 x i1>, +  iXLen, +  iXLen); + +define <vscale x 8 x i16> @intrinsic_vloxei_mask_v_nxv8i16_nxv8i16_nxv8i32(<vscale x 8 x i16> %0, ptr %1, <vscale x 8 x i32> %2, <vscale x 8 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vloxei_mask_v_nxv8i16_nxv8i16_nxv8i32: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e16, m2, ta, mu +; CHECK-NEXT:    vloxei32.v v8, (a0), v12, v0.t +; CHECK-NEXT:    ret +entry: +  %a = call <vscale x 8 x i16> @llvm.riscv.vloxei.mask.nxv8i16.nxv8i32( +    <vscale x 8 x i16> %0, +    ptr %1, +    <vscale x 8 x i32> %2, +    <vscale x 8 x i1> %3, +    iXLen %4, iXLen 1) + +  ret <vscale x 8 x i16> %a +} + +declare <vscale x 16 x i16> @llvm.riscv.vloxei.nxv16i16.nxv16i32( +  <vscale x 16 x i16>, +  ptr, +  <vscale x 16 x i32>, +  iXLen); + +define <vscale x 16 x i16> @intrinsic_vloxei_v_nxv16i16_nxv16i16_nxv16i32(ptr %0, <vscale x 16 x i32> %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vloxei_v_nxv16i16_nxv16i16_nxv16i32: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e16, m4, ta, ma +; CHECK-NEXT:    vloxei32.v v16, (a0), v8 +; CHECK-NEXT:    vmv.v.v v8, v16 +; CHECK-NEXT:    ret +entry: +  %a = call <vscale x 16 x i16> @llvm.riscv.vloxei.nxv16i16.nxv16i32( +    <vscale x 16 x i16> poison, +    ptr %0, +    <vscale x 16 x i32> %1, +    iXLen %2) + +  ret <vscale x 16 x i16> %a +} + +declare <vscale x 16 x i16> @llvm.riscv.vloxei.mask.nxv16i16.nxv16i32( +  <vscale x 16 x i16>, +  ptr, +  <vscale x 16 x i32>, +  <vscale x 16 x i1>, +  iXLen, +  iXLen); + +define <vscale x 16 x i16> @intrinsic_vloxei_mask_v_nxv16i16_nxv16i16_nxv16i32(<vscale x 16 x i16> %0, ptr %1, <vscale x 16 x i32> %2, <vscale x 16 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vloxei_mask_v_nxv16i16_nxv16i16_nxv16i32: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e16, m4, ta, mu +; CHECK-NEXT:    vloxei32.v v8, (a0), v16, v0.t +; CHECK-NEXT:    ret +entry: +  %a = call <vscale x 16 x i16> @llvm.riscv.vloxei.mask.nxv16i16.nxv16i32( +    <vscale x 16 x i16> %0, +    ptr %1, +    <vscale x 16 x i32> %2, +    <vscale x 16 x i1> %3, +    iXLen %4, iXLen 1) + +  ret <vscale x 16 x i16> %a +} + +declare <vscale x 1 x i32> @llvm.riscv.vloxei.nxv1i32.nxv1i32( +  <vscale x 1 x i32>, +  ptr, +  <vscale x 1 x i32>, +  iXLen); + +define <vscale x 1 x i32> @intrinsic_vloxei_v_nxv1i32_nxv1i32_nxv1i32(ptr %0, <vscale x 1 x i32> %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vloxei_v_nxv1i32_nxv1i32_nxv1i32: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e32, mf2, ta, ma +; CHECK-NEXT:    vloxei32.v v8, (a0), v8 +; CHECK-NEXT:    ret +entry: +  %a = call <vscale x 1 x i32> @llvm.riscv.vloxei.nxv1i32.nxv1i32( +    <vscale x 1 x i32> poison, +    ptr %0, +    <vscale x 1 x i32> %1, +    iXLen %2) + +  ret <vscale x 1 x i32> %a +} + +declare <vscale x 1 x i32> @llvm.riscv.vloxei.mask.nxv1i32.nxv1i32( +  <vscale x 1 x i32>, +  ptr, +  <vscale x 1 x i32>, +  <vscale x 1 x i1>, +  iXLen, +  iXLen); + +define <vscale x 1 x i32> @intrinsic_vloxei_mask_v_nxv1i32_nxv1i32_nxv1i32(<vscale x 1 x i32> %0, ptr %1, <vscale x 1 x i32> %2, <vscale x 1 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vloxei_mask_v_nxv1i32_nxv1i32_nxv1i32: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e32, mf2, ta, mu +; CHECK-NEXT:    vloxei32.v v8, (a0), v9, v0.t +; CHECK-NEXT:    ret +entry: +  %a = call <vscale x 1 x i32> @llvm.riscv.vloxei.mask.nxv1i32.nxv1i32( +    <vscale x 1 x i32> %0, +    ptr %1, +    <vscale x 1 x i32> %2, +    <vscale x 1 x i1> %3, +    iXLen %4, iXLen 1) + +  ret <vscale x 1 x i32> %a +} + +declare <vscale x 2 x i32> @llvm.riscv.vloxei.nxv2i32.nxv2i32( +  <vscale x 2 x i32>, +  ptr, +  <vscale x 2 x i32>, +  iXLen); + +define <vscale x 2 x i32> @intrinsic_vloxei_v_nxv2i32_nxv2i32_nxv2i32(ptr %0, <vscale x 2 x i32> %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vloxei_v_nxv2i32_nxv2i32_nxv2i32: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e32, m1, ta, ma +; CHECK-NEXT:    vloxei32.v v8, (a0), v8 +; CHECK-NEXT:    ret +entry: +  %a = call <vscale x 2 x i32> @llvm.riscv.vloxei.nxv2i32.nxv2i32( +    <vscale x 2 x i32> poison, +    ptr %0, +    <vscale x 2 x i32> %1, +    iXLen %2) + +  ret <vscale x 2 x i32> %a +} + +declare <vscale x 2 x i32> @llvm.riscv.vloxei.mask.nxv2i32.nxv2i32( +  <vscale x 2 x i32>, +  ptr, +  <vscale x 2 x i32>, +  <vscale x 2 x i1>, +  iXLen, +  iXLen); + +define <vscale x 2 x i32> @intrinsic_vloxei_mask_v_nxv2i32_nxv2i32_nxv2i32(<vscale x 2 x i32> %0, ptr %1, <vscale x 2 x i32> %2, <vscale x 2 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vloxei_mask_v_nxv2i32_nxv2i32_nxv2i32: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e32, m1, ta, mu +; CHECK-NEXT:    vloxei32.v v8, (a0), v9, v0.t +; CHECK-NEXT:    ret +entry: +  %a = call <vscale x 2 x i32> @llvm.riscv.vloxei.mask.nxv2i32.nxv2i32( +    <vscale x 2 x i32> %0, +    ptr %1, +    <vscale x 2 x i32> %2, +    <vscale x 2 x i1> %3, +    iXLen %4, iXLen 1) + +  ret <vscale x 2 x i32> %a +} + +declare <vscale x 4 x i32> @llvm.riscv.vloxei.nxv4i32.nxv4i32( +  <vscale x 4 x i32>, +  ptr, +  <vscale x 4 x i32>, +  iXLen); + +define <vscale x 4 x i32> @intrinsic_vloxei_v_nxv4i32_nxv4i32_nxv4i32(ptr %0, <vscale x 4 x i32> %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vloxei_v_nxv4i32_nxv4i32_nxv4i32: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e32, m2, ta, ma +; CHECK-NEXT:    vloxei32.v v8, (a0), v8 +; CHECK-NEXT:    ret +entry: +  %a = call <vscale x 4 x i32> @llvm.riscv.vloxei.nxv4i32.nxv4i32( +    <vscale x 4 x i32> poison, +    ptr %0, +    <vscale x 4 x i32> %1, +    iXLen %2) + +  ret <vscale x 4 x i32> %a +} + +declare <vscale x 4 x i32> @llvm.riscv.vloxei.mask.nxv4i32.nxv4i32( +  <vscale x 4 x i32>, +  ptr, +  <vscale x 4 x i32>, +  <vscale x 4 x i1>, +  iXLen, +  iXLen); + +define <vscale x 4 x i32> @intrinsic_vloxei_mask_v_nxv4i32_nxv4i32_nxv4i32(<vscale x 4 x i32> %0, ptr %1, <vscale x 4 x i32> %2, <vscale x 4 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vloxei_mask_v_nxv4i32_nxv4i32_nxv4i32: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e32, m2, ta, mu +; CHECK-NEXT:    vloxei32.v v8, (a0), v10, v0.t +; CHECK-NEXT:    ret +entry: +  %a = call <vscale x 4 x i32> @llvm.riscv.vloxei.mask.nxv4i32.nxv4i32( +    <vscale x 4 x i32> %0, +    ptr %1, +    <vscale x 4 x i32> %2, +    <vscale x 4 x i1> %3, +    iXLen %4, iXLen 1) + +  ret <vscale x 4 x i32> %a +} + +declare <vscale x 8 x i32> @llvm.riscv.vloxei.nxv8i32.nxv8i32( +  <vscale x 8 x i32>, +  ptr, +  <vscale x 8 x i32>, +  iXLen); + +define <vscale x 8 x i32> @intrinsic_vloxei_v_nxv8i32_nxv8i32_nxv8i32(ptr %0, <vscale x 8 x i32> %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vloxei_v_nxv8i32_nxv8i32_nxv8i32: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e32, m4, ta, ma +; CHECK-NEXT:    vloxei32.v v8, (a0), v8 +; CHECK-NEXT:    ret +entry: +  %a = call <vscale x 8 x i32> @llvm.riscv.vloxei.nxv8i32.nxv8i32( +    <vscale x 8 x i32> poison, +    ptr %0, +    <vscale x 8 x i32> %1, +    iXLen %2) + +  ret <vscale x 8 x i32> %a +} + +declare <vscale x 8 x i32> @llvm.riscv.vloxei.mask.nxv8i32.nxv8i32( +  <vscale x 8 x i32>, +  ptr, +  <vscale x 8 x i32>, +  <vscale x 8 x i1>, +  iXLen, +  iXLen); + +define <vscale x 8 x i32> @intrinsic_vloxei_mask_v_nxv8i32_nxv8i32_nxv8i32(<vscale x 8 x i32> %0, ptr %1, <vscale x 8 x i32> %2, <vscale x 8 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vloxei_mask_v_nxv8i32_nxv8i32_nxv8i32: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e32, m4, ta, mu +; CHECK-NEXT:    vloxei32.v v8, (a0), v12, v0.t +; CHECK-NEXT:    ret +entry: +  %a = call <vscale x 8 x i32> @llvm.riscv.vloxei.mask.nxv8i32.nxv8i32( +    <vscale x 8 x i32> %0, +    ptr %1, +    <vscale x 8 x i32> %2, +    <vscale x 8 x i1> %3, +    iXLen %4, iXLen 1) + +  ret <vscale x 8 x i32> %a +} + +declare <vscale x 16 x i32> @llvm.riscv.vloxei.nxv16i32.nxv16i32( +  <vscale x 16 x i32>, +  ptr, +  <vscale x 16 x i32>, +  iXLen); + +define <vscale x 16 x i32> @intrinsic_vloxei_v_nxv16i32_nxv16i32_nxv16i32(ptr %0, <vscale x 16 x i32> %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vloxei_v_nxv16i32_nxv16i32_nxv16i32: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e32, m8, ta, ma +; CHECK-NEXT:    vloxei32.v v8, (a0), v8 +; CHECK-NEXT:    ret +entry: +  %a = call <vscale x 16 x i32> @llvm.riscv.vloxei.nxv16i32.nxv16i32( +    <vscale x 16 x i32> poison, +    ptr %0, +    <vscale x 16 x i32> %1, +    iXLen %2) + +  ret <vscale x 16 x i32> %a +} + +declare <vscale x 16 x i32> @llvm.riscv.vloxei.mask.nxv16i32.nxv16i32( +  <vscale x 16 x i32>, +  ptr, +  <vscale x 16 x i32>, +  <vscale x 16 x i1>, +  iXLen, +  iXLen); + +define <vscale x 16 x i32> @intrinsic_vloxei_mask_v_nxv16i32_nxv16i32_nxv16i32(<vscale x 16 x i32> %0, ptr %1, <vscale x 16 x i32> %2, <vscale x 16 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vloxei_mask_v_nxv16i32_nxv16i32_nxv16i32: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e32, m8, ta, mu +; CHECK-NEXT:    vloxei32.v v8, (a0), v16, v0.t +; CHECK-NEXT:    ret +entry: +  %a = call <vscale x 16 x i32> @llvm.riscv.vloxei.mask.nxv16i32.nxv16i32( +    <vscale x 16 x i32> %0, +    ptr %1, +    <vscale x 16 x i32> %2, +    <vscale x 16 x i1> %3, +    iXLen %4, iXLen 1) + +  ret <vscale x 16 x i32> %a +} + +declare <vscale x 1 x i64> @llvm.riscv.vloxei.nxv1i64.nxv1i32( +  <vscale x 1 x i64>, +  ptr, +  <vscale x 1 x i32>, +  iXLen); + +define <vscale x 1 x i64> @intrinsic_vloxei_v_nxv1i64_nxv1i64_nxv1i32(ptr %0, <vscale x 1 x i32> %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vloxei_v_nxv1i64_nxv1i64_nxv1i32: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e64, m1, ta, ma +; CHECK-NEXT:    vloxei32.v v9, (a0), v8 +; CHECK-NEXT:    vmv.v.v v8, v9 +; CHECK-NEXT:    ret +entry: +  %a = call <vscale x 1 x i64> @llvm.riscv.vloxei.nxv1i64.nxv1i32( +    <vscale x 1 x i64> poison, +    ptr %0, +    <vscale x 1 x i32> %1, +    iXLen %2) + +  ret <vscale x 1 x i64> %a +} + +declare <vscale x 1 x i64> @llvm.riscv.vloxei.mask.nxv1i64.nxv1i32( +  <vscale x 1 x i64>, +  ptr, +  <vscale x 1 x i32>, +  <vscale x 1 x i1>, +  iXLen, +  iXLen); + +define <vscale x 1 x i64> @intrinsic_vloxei_mask_v_nxv1i64_nxv1i64_nxv1i32(<vscale x 1 x i64> %0, ptr %1, <vscale x 1 x i32> %2, <vscale x 1 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vloxei_mask_v_nxv1i64_nxv1i64_nxv1i32: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e64, m1, ta, mu +; CHECK-NEXT:    vloxei32.v v8, (a0), v9, v0.t +; CHECK-NEXT:    ret +entry: +  %a = call <vscale x 1 x i64> @llvm.riscv.vloxei.mask.nxv1i64.nxv1i32( +    <vscale x 1 x i64> %0, +    ptr %1, +    <vscale x 1 x i32> %2, +    <vscale x 1 x i1> %3, +    iXLen %4, iXLen 1) + +  ret <vscale x 1 x i64> %a +} + +declare <vscale x 2 x i64> @llvm.riscv.vloxei.nxv2i64.nxv2i32( +  <vscale x 2 x i64>, +  ptr, +  <vscale x 2 x i32>, +  iXLen); + +define <vscale x 2 x i64> @intrinsic_vloxei_v_nxv2i64_nxv2i64_nxv2i32(ptr %0, <vscale x 2 x i32> %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vloxei_v_nxv2i64_nxv2i64_nxv2i32: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetivli zero, 1, e8, m1, ta, ma +; CHECK-NEXT:    vmv1r.v v10, v8 +; CHECK-NEXT:    vsetvli zero, a1, e64, m2, ta, ma +; CHECK-NEXT:    vloxei32.v v8, (a0), v10 +; CHECK-NEXT:    ret +entry: +  %a = call <vscale x 2 x i64> @llvm.riscv.vloxei.nxv2i64.nxv2i32( +    <vscale x 2 x i64> poison, +    ptr %0, +    <vscale x 2 x i32> %1, +    iXLen %2) + +  ret <vscale x 2 x i64> %a +} + +declare <vscale x 2 x i64> @llvm.riscv.vloxei.mask.nxv2i64.nxv2i32( +  <vscale x 2 x i64>, +  ptr, +  <vscale x 2 x i32>, +  <vscale x 2 x i1>, +  iXLen, +  iXLen); + +define <vscale x 2 x i64> @intrinsic_vloxei_mask_v_nxv2i64_nxv2i64_nxv2i32(<vscale x 2 x i64> %0, ptr %1, <vscale x 2 x i32> %2, <vscale x 2 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vloxei_mask_v_nxv2i64_nxv2i64_nxv2i32: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e64, m2, ta, mu +; CHECK-NEXT:    vloxei32.v v8, (a0), v10, v0.t +; CHECK-NEXT:    ret +entry: +  %a = call <vscale x 2 x i64> @llvm.riscv.vloxei.mask.nxv2i64.nxv2i32( +    <vscale x 2 x i64> %0, +    ptr %1, +    <vscale x 2 x i32> %2, +    <vscale x 2 x i1> %3, +    iXLen %4, iXLen 1) + +  ret <vscale x 2 x i64> %a +} + +declare <vscale x 4 x i64> @llvm.riscv.vloxei.nxv4i64.nxv4i32( +  <vscale x 4 x i64>, +  ptr, +  <vscale x 4 x i32>, +  iXLen); + +define <vscale x 4 x i64> @intrinsic_vloxei_v_nxv4i64_nxv4i64_nxv4i32(ptr %0, <vscale x 4 x i32> %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vloxei_v_nxv4i64_nxv4i64_nxv4i32: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetivli zero, 1, e8, m1, ta, ma +; CHECK-NEXT:    vmv2r.v v12, v8 +; CHECK-NEXT:    vsetvli zero, a1, e64, m4, ta, ma +; CHECK-NEXT:    vloxei32.v v8, (a0), v12 +; CHECK-NEXT:    ret +entry: +  %a = call <vscale x 4 x i64> @llvm.riscv.vloxei.nxv4i64.nxv4i32( +    <vscale x 4 x i64> poison, +    ptr %0, +    <vscale x 4 x i32> %1, +    iXLen %2) + +  ret <vscale x 4 x i64> %a +} + +declare <vscale x 4 x i64> @llvm.riscv.vloxei.mask.nxv4i64.nxv4i32( +  <vscale x 4 x i64>, +  ptr, +  <vscale x 4 x i32>, +  <vscale x 4 x i1>, +  iXLen, +  iXLen); + +define <vscale x 4 x i64> @intrinsic_vloxei_mask_v_nxv4i64_nxv4i64_nxv4i32(<vscale x 4 x i64> %0, ptr %1, <vscale x 4 x i32> %2, <vscale x 4 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vloxei_mask_v_nxv4i64_nxv4i64_nxv4i32: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e64, m4, ta, mu +; CHECK-NEXT:    vloxei32.v v8, (a0), v12, v0.t +; CHECK-NEXT:    ret +entry: +  %a = call <vscale x 4 x i64> @llvm.riscv.vloxei.mask.nxv4i64.nxv4i32( +    <vscale x 4 x i64> %0, +    ptr %1, +    <vscale x 4 x i32> %2, +    <vscale x 4 x i1> %3, +    iXLen %4, iXLen 1) + +  ret <vscale x 4 x i64> %a +} + +declare <vscale x 8 x i64> @llvm.riscv.vloxei.nxv8i64.nxv8i32( +  <vscale x 8 x i64>, +  ptr, +  <vscale x 8 x i32>, +  iXLen); + +define <vscale x 8 x i64> @intrinsic_vloxei_v_nxv8i64_nxv8i64_nxv8i32(ptr %0, <vscale x 8 x i32> %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vloxei_v_nxv8i64_nxv8i64_nxv8i32: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetivli zero, 1, e8, m1, ta, ma +; CHECK-NEXT:    vmv4r.v v16, v8 +; CHECK-NEXT:    vsetvli zero, a1, e64, m8, ta, ma +; CHECK-NEXT:    vloxei32.v v8, (a0), v16 +; CHECK-NEXT:    ret +entry: +  %a = call <vscale x 8 x i64> @llvm.riscv.vloxei.nxv8i64.nxv8i32( +    <vscale x 8 x i64> poison, +    ptr %0, +    <vscale x 8 x i32> %1, +    iXLen %2) + +  ret <vscale x 8 x i64> %a +} + +declare <vscale x 8 x i64> @llvm.riscv.vloxei.mask.nxv8i64.nxv8i32( +  <vscale x 8 x i64>, +  ptr, +  <vscale x 8 x i32>, +  <vscale x 8 x i1>, +  iXLen, +  iXLen); + +define <vscale x 8 x i64> @intrinsic_vloxei_mask_v_nxv8i64_nxv8i64_nxv8i32(<vscale x 8 x i64> %0, ptr %1, <vscale x 8 x i32> %2, <vscale x 8 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vloxei_mask_v_nxv8i64_nxv8i64_nxv8i32: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e64, m8, ta, mu +; CHECK-NEXT:    vloxei32.v v8, (a0), v16, v0.t +; CHECK-NEXT:    ret +entry: +  %a = call <vscale x 8 x i64> @llvm.riscv.vloxei.mask.nxv8i64.nxv8i32( +    <vscale x 8 x i64> %0, +    ptr %1, +    <vscale x 8 x i32> %2, +    <vscale x 8 x i1> %3, +    iXLen %4, iXLen 1) + +  ret <vscale x 8 x i64> %a +} + +declare <vscale x 1 x half> @llvm.riscv.vloxei.nxv1f16.nxv1i32( +  <vscale x 1 x half>, +  ptr, +  <vscale x 1 x i32>, +  iXLen); + +define <vscale x 1 x half> @intrinsic_vloxei_v_nxv1f16_nxv1f16_nxv1i32(ptr %0, <vscale x 1 x i32> %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vloxei_v_nxv1f16_nxv1f16_nxv1i32: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e16, mf4, ta, ma +; CHECK-NEXT:    vloxei32.v v9, (a0), v8 +; CHECK-NEXT:    vmv1r.v v8, v9 +; CHECK-NEXT:    ret +entry: +  %a = call <vscale x 1 x half> @llvm.riscv.vloxei.nxv1f16.nxv1i32( +    <vscale x 1 x half> poison, +    ptr %0, +    <vscale x 1 x i32> %1, +    iXLen %2) + +  ret <vscale x 1 x half> %a +} + +declare <vscale x 1 x half> @llvm.riscv.vloxei.mask.nxv1f16.nxv1i32( +  <vscale x 1 x half>, +  ptr, +  <vscale x 1 x i32>, +  <vscale x 1 x i1>, +  iXLen, +  iXLen); + +define <vscale x 1 x half> @intrinsic_vloxei_mask_v_nxv1f16_nxv1f16_nxv1i32(<vscale x 1 x half> %0, ptr %1, <vscale x 1 x i32> %2, <vscale x 1 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vloxei_mask_v_nxv1f16_nxv1f16_nxv1i32: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e16, mf4, ta, mu +; CHECK-NEXT:    vloxei32.v v8, (a0), v9, v0.t +; CHECK-NEXT:    ret +entry: +  %a = call <vscale x 1 x half> @llvm.riscv.vloxei.mask.nxv1f16.nxv1i32( +    <vscale x 1 x half> %0, +    ptr %1, +    <vscale x 1 x i32> %2, +    <vscale x 1 x i1> %3, +    iXLen %4, iXLen 1) + +  ret <vscale x 1 x half> %a +} + +declare <vscale x 2 x half> @llvm.riscv.vloxei.nxv2f16.nxv2i32( +  <vscale x 2 x half>, +  ptr, +  <vscale x 2 x i32>, +  iXLen); + +define <vscale x 2 x half> @intrinsic_vloxei_v_nxv2f16_nxv2f16_nxv2i32(ptr %0, <vscale x 2 x i32> %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vloxei_v_nxv2f16_nxv2f16_nxv2i32: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e16, mf2, ta, ma +; CHECK-NEXT:    vloxei32.v v9, (a0), v8 +; CHECK-NEXT:    vmv1r.v v8, v9 +; CHECK-NEXT:    ret +entry: +  %a = call <vscale x 2 x half> @llvm.riscv.vloxei.nxv2f16.nxv2i32( +    <vscale x 2 x half> poison, +    ptr %0, +    <vscale x 2 x i32> %1, +    iXLen %2) + +  ret <vscale x 2 x half> %a +} + +declare <vscale x 2 x half> @llvm.riscv.vloxei.mask.nxv2f16.nxv2i32( +  <vscale x 2 x half>, +  ptr, +  <vscale x 2 x i32>, +  <vscale x 2 x i1>, +  iXLen, +  iXLen); + +define <vscale x 2 x half> @intrinsic_vloxei_mask_v_nxv2f16_nxv2f16_nxv2i32(<vscale x 2 x half> %0, ptr %1, <vscale x 2 x i32> %2, <vscale x 2 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vloxei_mask_v_nxv2f16_nxv2f16_nxv2i32: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e16, mf2, ta, mu +; CHECK-NEXT:    vloxei32.v v8, (a0), v9, v0.t +; CHECK-NEXT:    ret +entry: +  %a = call <vscale x 2 x half> @llvm.riscv.vloxei.mask.nxv2f16.nxv2i32( +    <vscale x 2 x half> %0, +    ptr %1, +    <vscale x 2 x i32> %2, +    <vscale x 2 x i1> %3, +    iXLen %4, iXLen 1) + +  ret <vscale x 2 x half> %a +} + +declare <vscale x 4 x half> @llvm.riscv.vloxei.nxv4f16.nxv4i32( +  <vscale x 4 x half>, +  ptr, +  <vscale x 4 x i32>, +  iXLen); + +define <vscale x 4 x half> @intrinsic_vloxei_v_nxv4f16_nxv4f16_nxv4i32(ptr %0, <vscale x 4 x i32> %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vloxei_v_nxv4f16_nxv4f16_nxv4i32: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e16, m1, ta, ma +; CHECK-NEXT:    vloxei32.v v10, (a0), v8 +; CHECK-NEXT:    vmv.v.v v8, v10 +; CHECK-NEXT:    ret +entry: +  %a = call <vscale x 4 x half> @llvm.riscv.vloxei.nxv4f16.nxv4i32( +    <vscale x 4 x half> poison, +    ptr %0, +    <vscale x 4 x i32> %1, +    iXLen %2) + +  ret <vscale x 4 x half> %a +} + +declare <vscale x 4 x half> @llvm.riscv.vloxei.mask.nxv4f16.nxv4i32( +  <vscale x 4 x half>, +  ptr, +  <vscale x 4 x i32>, +  <vscale x 4 x i1>, +  iXLen, +  iXLen); + +define <vscale x 4 x half> @intrinsic_vloxei_mask_v_nxv4f16_nxv4f16_nxv4i32(<vscale x 4 x half> %0, ptr %1, <vscale x 4 x i32> %2, <vscale x 4 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vloxei_mask_v_nxv4f16_nxv4f16_nxv4i32: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e16, m1, ta, mu +; CHECK-NEXT:    vloxei32.v v8, (a0), v10, v0.t +; CHECK-NEXT:    ret +entry: +  %a = call <vscale x 4 x half> @llvm.riscv.vloxei.mask.nxv4f16.nxv4i32( +    <vscale x 4 x half> %0, +    ptr %1, +    <vscale x 4 x i32> %2, +    <vscale x 4 x i1> %3, +    iXLen %4, iXLen 1) + +  ret <vscale x 4 x half> %a +} + +declare <vscale x 8 x half> @llvm.riscv.vloxei.nxv8f16.nxv8i32( +  <vscale x 8 x half>, +  ptr, +  <vscale x 8 x i32>, +  iXLen); + +define <vscale x 8 x half> @intrinsic_vloxei_v_nxv8f16_nxv8f16_nxv8i32(ptr %0, <vscale x 8 x i32> %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vloxei_v_nxv8f16_nxv8f16_nxv8i32: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e16, m2, ta, ma +; CHECK-NEXT:    vloxei32.v v12, (a0), v8 +; CHECK-NEXT:    vmv.v.v v8, v12 +; CHECK-NEXT:    ret +entry: +  %a = call <vscale x 8 x half> @llvm.riscv.vloxei.nxv8f16.nxv8i32( +    <vscale x 8 x half> poison, +    ptr %0, +    <vscale x 8 x i32> %1, +    iXLen %2) + +  ret <vscale x 8 x half> %a +} + +declare <vscale x 8 x half> @llvm.riscv.vloxei.mask.nxv8f16.nxv8i32( +  <vscale x 8 x half>, +  ptr, +  <vscale x 8 x i32>, +  <vscale x 8 x i1>, +  iXLen, +  iXLen); + +define <vscale x 8 x half> @intrinsic_vloxei_mask_v_nxv8f16_nxv8f16_nxv8i32(<vscale x 8 x half> %0, ptr %1, <vscale x 8 x i32> %2, <vscale x 8 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vloxei_mask_v_nxv8f16_nxv8f16_nxv8i32: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e16, m2, ta, mu +; CHECK-NEXT:    vloxei32.v v8, (a0), v12, v0.t +; CHECK-NEXT:    ret +entry: +  %a = call <vscale x 8 x half> @llvm.riscv.vloxei.mask.nxv8f16.nxv8i32( +    <vscale x 8 x half> %0, +    ptr %1, +    <vscale x 8 x i32> %2, +    <vscale x 8 x i1> %3, +    iXLen %4, iXLen 1) + +  ret <vscale x 8 x half> %a +} + +declare <vscale x 16 x half> @llvm.riscv.vloxei.nxv16f16.nxv16i32( +  <vscale x 16 x half>, +  ptr, +  <vscale x 16 x i32>, +  iXLen); + +define <vscale x 16 x half> @intrinsic_vloxei_v_nxv16f16_nxv16f16_nxv16i32(ptr %0, <vscale x 16 x i32> %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vloxei_v_nxv16f16_nxv16f16_nxv16i32: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e16, m4, ta, ma +; CHECK-NEXT:    vloxei32.v v16, (a0), v8 +; CHECK-NEXT:    vmv.v.v v8, v16 +; CHECK-NEXT:    ret +entry: +  %a = call <vscale x 16 x half> @llvm.riscv.vloxei.nxv16f16.nxv16i32( +    <vscale x 16 x half> poison, +    ptr %0, +    <vscale x 16 x i32> %1, +    iXLen %2) + +  ret <vscale x 16 x half> %a +} + +declare <vscale x 16 x half> @llvm.riscv.vloxei.mask.nxv16f16.nxv16i32( +  <vscale x 16 x half>, +  ptr, +  <vscale x 16 x i32>, +  <vscale x 16 x i1>, +  iXLen, +  iXLen); + +define <vscale x 16 x half> @intrinsic_vloxei_mask_v_nxv16f16_nxv16f16_nxv16i32(<vscale x 16 x half> %0, ptr %1, <vscale x 16 x i32> %2, <vscale x 16 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vloxei_mask_v_nxv16f16_nxv16f16_nxv16i32: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e16, m4, ta, mu +; CHECK-NEXT:    vloxei32.v v8, (a0), v16, v0.t +; CHECK-NEXT:    ret +entry: +  %a = call <vscale x 16 x half> @llvm.riscv.vloxei.mask.nxv16f16.nxv16i32( +    <vscale x 16 x half> %0, +    ptr %1, +    <vscale x 16 x i32> %2, +    <vscale x 16 x i1> %3, +    iXLen %4, iXLen 1) + +  ret <vscale x 16 x half> %a +} + +declare <vscale x 1 x float> @llvm.riscv.vloxei.nxv1f32.nxv1i32( +  <vscale x 1 x float>, +  ptr, +  <vscale x 1 x i32>, +  iXLen); + +define <vscale x 1 x float> @intrinsic_vloxei_v_nxv1f32_nxv1f32_nxv1i32(ptr %0, <vscale x 1 x i32> %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vloxei_v_nxv1f32_nxv1f32_nxv1i32: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e32, mf2, ta, ma +; CHECK-NEXT:    vloxei32.v v8, (a0), v8 +; CHECK-NEXT:    ret +entry: +  %a = call <vscale x 1 x float> @llvm.riscv.vloxei.nxv1f32.nxv1i32( +    <vscale x 1 x float> poison, +    ptr %0, +    <vscale x 1 x i32> %1, +    iXLen %2) + +  ret <vscale x 1 x float> %a +} + +declare <vscale x 1 x float> @llvm.riscv.vloxei.mask.nxv1f32.nxv1i32( +  <vscale x 1 x float>, +  ptr, +  <vscale x 1 x i32>, +  <vscale x 1 x i1>, +  iXLen, +  iXLen); + +define <vscale x 1 x float> @intrinsic_vloxei_mask_v_nxv1f32_nxv1f32_nxv1i32(<vscale x 1 x float> %0, ptr %1, <vscale x 1 x i32> %2, <vscale x 1 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vloxei_mask_v_nxv1f32_nxv1f32_nxv1i32: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e32, mf2, ta, mu +; CHECK-NEXT:    vloxei32.v v8, (a0), v9, v0.t +; CHECK-NEXT:    ret +entry: +  %a = call <vscale x 1 x float> @llvm.riscv.vloxei.mask.nxv1f32.nxv1i32( +    <vscale x 1 x float> %0, +    ptr %1, +    <vscale x 1 x i32> %2, +    <vscale x 1 x i1> %3, +    iXLen %4, iXLen 1) + +  ret <vscale x 1 x float> %a +} + +declare <vscale x 2 x float> @llvm.riscv.vloxei.nxv2f32.nxv2i32( +  <vscale x 2 x float>, +  ptr, +  <vscale x 2 x i32>, +  iXLen); + +define <vscale x 2 x float> @intrinsic_vloxei_v_nxv2f32_nxv2f32_nxv2i32(ptr %0, <vscale x 2 x i32> %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vloxei_v_nxv2f32_nxv2f32_nxv2i32: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e32, m1, ta, ma +; CHECK-NEXT:    vloxei32.v v8, (a0), v8 +; CHECK-NEXT:    ret +entry: +  %a = call <vscale x 2 x float> @llvm.riscv.vloxei.nxv2f32.nxv2i32( +    <vscale x 2 x float> poison, +    ptr %0, +    <vscale x 2 x i32> %1, +    iXLen %2) + +  ret <vscale x 2 x float> %a +} + +declare <vscale x 2 x float> @llvm.riscv.vloxei.mask.nxv2f32.nxv2i32( +  <vscale x 2 x float>, +  ptr, +  <vscale x 2 x i32>, +  <vscale x 2 x i1>, +  iXLen, +  iXLen); + +define <vscale x 2 x float> @intrinsic_vloxei_mask_v_nxv2f32_nxv2f32_nxv2i32(<vscale x 2 x float> %0, ptr %1, <vscale x 2 x i32> %2, <vscale x 2 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vloxei_mask_v_nxv2f32_nxv2f32_nxv2i32: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e32, m1, ta, mu +; CHECK-NEXT:    vloxei32.v v8, (a0), v9, v0.t +; CHECK-NEXT:    ret +entry: +  %a = call <vscale x 2 x float> @llvm.riscv.vloxei.mask.nxv2f32.nxv2i32( +    <vscale x 2 x float> %0, +    ptr %1, +    <vscale x 2 x i32> %2, +    <vscale x 2 x i1> %3, +    iXLen %4, iXLen 1) + +  ret <vscale x 2 x float> %a +} + +declare <vscale x 4 x float> @llvm.riscv.vloxei.nxv4f32.nxv4i32( +  <vscale x 4 x float>, +  ptr, +  <vscale x 4 x i32>, +  iXLen); + +define <vscale x 4 x float> @intrinsic_vloxei_v_nxv4f32_nxv4f32_nxv4i32(ptr %0, <vscale x 4 x i32> %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vloxei_v_nxv4f32_nxv4f32_nxv4i32: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e32, m2, ta, ma +; CHECK-NEXT:    vloxei32.v v8, (a0), v8 +; CHECK-NEXT:    ret +entry: +  %a = call <vscale x 4 x float> @llvm.riscv.vloxei.nxv4f32.nxv4i32( +    <vscale x 4 x float> poison, +    ptr %0, +    <vscale x 4 x i32> %1, +    iXLen %2) + +  ret <vscale x 4 x float> %a +} + +declare <vscale x 4 x float> @llvm.riscv.vloxei.mask.nxv4f32.nxv4i32( +  <vscale x 4 x float>, +  ptr, +  <vscale x 4 x i32>, +  <vscale x 4 x i1>, +  iXLen, +  iXLen); + +define <vscale x 4 x float> @intrinsic_vloxei_mask_v_nxv4f32_nxv4f32_nxv4i32(<vscale x 4 x float> %0, ptr %1, <vscale x 4 x i32> %2, <vscale x 4 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vloxei_mask_v_nxv4f32_nxv4f32_nxv4i32: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e32, m2, ta, mu +; CHECK-NEXT:    vloxei32.v v8, (a0), v10, v0.t +; CHECK-NEXT:    ret +entry: +  %a = call <vscale x 4 x float> @llvm.riscv.vloxei.mask.nxv4f32.nxv4i32( +    <vscale x 4 x float> %0, +    ptr %1, +    <vscale x 4 x i32> %2, +    <vscale x 4 x i1> %3, +    iXLen %4, iXLen 1) + +  ret <vscale x 4 x float> %a +} + +declare <vscale x 8 x float> @llvm.riscv.vloxei.nxv8f32.nxv8i32( +  <vscale x 8 x float>, +  ptr, +  <vscale x 8 x i32>, +  iXLen); + +define <vscale x 8 x float> @intrinsic_vloxei_v_nxv8f32_nxv8f32_nxv8i32(ptr %0, <vscale x 8 x i32> %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vloxei_v_nxv8f32_nxv8f32_nxv8i32: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e32, m4, ta, ma +; CHECK-NEXT:    vloxei32.v v8, (a0), v8 +; CHECK-NEXT:    ret +entry: +  %a = call <vscale x 8 x float> @llvm.riscv.vloxei.nxv8f32.nxv8i32( +    <vscale x 8 x float> poison, +    ptr %0, +    <vscale x 8 x i32> %1, +    iXLen %2) + +  ret <vscale x 8 x float> %a +} + +declare <vscale x 8 x float> @llvm.riscv.vloxei.mask.nxv8f32.nxv8i32( +  <vscale x 8 x float>, +  ptr, +  <vscale x 8 x i32>, +  <vscale x 8 x i1>, +  iXLen, +  iXLen); + +define <vscale x 8 x float> @intrinsic_vloxei_mask_v_nxv8f32_nxv8f32_nxv8i32(<vscale x 8 x float> %0, ptr %1, <vscale x 8 x i32> %2, <vscale x 8 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vloxei_mask_v_nxv8f32_nxv8f32_nxv8i32: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e32, m4, ta, mu +; CHECK-NEXT:    vloxei32.v v8, (a0), v12, v0.t +; CHECK-NEXT:    ret +entry: +  %a = call <vscale x 8 x float> @llvm.riscv.vloxei.mask.nxv8f32.nxv8i32( +    <vscale x 8 x float> %0, +    ptr %1, +    <vscale x 8 x i32> %2, +    <vscale x 8 x i1> %3, +    iXLen %4, iXLen 1) + +  ret <vscale x 8 x float> %a +} + +declare <vscale x 16 x float> @llvm.riscv.vloxei.nxv16f32.nxv16i32( +  <vscale x 16 x float>, +  ptr, +  <vscale x 16 x i32>, +  iXLen); + +define <vscale x 16 x float> @intrinsic_vloxei_v_nxv16f32_nxv16f32_nxv16i32(ptr %0, <vscale x 16 x i32> %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vloxei_v_nxv16f32_nxv16f32_nxv16i32: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e32, m8, ta, ma +; CHECK-NEXT:    vloxei32.v v8, (a0), v8 +; CHECK-NEXT:    ret +entry: +  %a = call <vscale x 16 x float> @llvm.riscv.vloxei.nxv16f32.nxv16i32( +    <vscale x 16 x float> poison, +    ptr %0, +    <vscale x 16 x i32> %1, +    iXLen %2) + +  ret <vscale x 16 x float> %a +} + +declare <vscale x 16 x float> @llvm.riscv.vloxei.mask.nxv16f32.nxv16i32( +  <vscale x 16 x float>, +  ptr, +  <vscale x 16 x i32>, +  <vscale x 16 x i1>, +  iXLen, +  iXLen); + +define <vscale x 16 x float> @intrinsic_vloxei_mask_v_nxv16f32_nxv16f32_nxv16i32(<vscale x 16 x float> %0, ptr %1, <vscale x 16 x i32> %2, <vscale x 16 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vloxei_mask_v_nxv16f32_nxv16f32_nxv16i32: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e32, m8, ta, mu +; CHECK-NEXT:    vloxei32.v v8, (a0), v16, v0.t +; CHECK-NEXT:    ret +entry: +  %a = call <vscale x 16 x float> @llvm.riscv.vloxei.mask.nxv16f32.nxv16i32( +    <vscale x 16 x float> %0, +    ptr %1, +    <vscale x 16 x i32> %2, +    <vscale x 16 x i1> %3, +    iXLen %4, iXLen 1) + +  ret <vscale x 16 x float> %a +} + +declare <vscale x 1 x double> @llvm.riscv.vloxei.nxv1f64.nxv1i32( +  <vscale x 1 x double>, +  ptr, +  <vscale x 1 x i32>, +  iXLen); + +define <vscale x 1 x double> @intrinsic_vloxei_v_nxv1f64_nxv1f64_nxv1i32(ptr %0, <vscale x 1 x i32> %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vloxei_v_nxv1f64_nxv1f64_nxv1i32: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e64, m1, ta, ma +; CHECK-NEXT:    vloxei32.v v9, (a0), v8 +; CHECK-NEXT:    vmv.v.v v8, v9 +; CHECK-NEXT:    ret +entry: +  %a = call <vscale x 1 x double> @llvm.riscv.vloxei.nxv1f64.nxv1i32( +    <vscale x 1 x double> poison, +    ptr %0, +    <vscale x 1 x i32> %1, +    iXLen %2) + +  ret <vscale x 1 x double> %a +} + +declare <vscale x 1 x double> @llvm.riscv.vloxei.mask.nxv1f64.nxv1i32( +  <vscale x 1 x double>, +  ptr, +  <vscale x 1 x i32>, +  <vscale x 1 x i1>, +  iXLen, +  iXLen); + +define <vscale x 1 x double> @intrinsic_vloxei_mask_v_nxv1f64_nxv1f64_nxv1i32(<vscale x 1 x double> %0, ptr %1, <vscale x 1 x i32> %2, <vscale x 1 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vloxei_mask_v_nxv1f64_nxv1f64_nxv1i32: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e64, m1, ta, mu +; CHECK-NEXT:    vloxei32.v v8, (a0), v9, v0.t +; CHECK-NEXT:    ret +entry: +  %a = call <vscale x 1 x double> @llvm.riscv.vloxei.mask.nxv1f64.nxv1i32( +    <vscale x 1 x double> %0, +    ptr %1, +    <vscale x 1 x i32> %2, +    <vscale x 1 x i1> %3, +    iXLen %4, iXLen 1) + +  ret <vscale x 1 x double> %a +} + +declare <vscale x 2 x double> @llvm.riscv.vloxei.nxv2f64.nxv2i32( +  <vscale x 2 x double>, +  ptr, +  <vscale x 2 x i32>, +  iXLen); + +define <vscale x 2 x double> @intrinsic_vloxei_v_nxv2f64_nxv2f64_nxv2i32(ptr %0, <vscale x 2 x i32> %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vloxei_v_nxv2f64_nxv2f64_nxv2i32: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetivli zero, 1, e8, m1, ta, ma +; CHECK-NEXT:    vmv1r.v v10, v8 +; CHECK-NEXT:    vsetvli zero, a1, e64, m2, ta, ma +; CHECK-NEXT:    vloxei32.v v8, (a0), v10 +; CHECK-NEXT:    ret +entry: +  %a = call <vscale x 2 x double> @llvm.riscv.vloxei.nxv2f64.nxv2i32( +    <vscale x 2 x double> poison, +    ptr %0, +    <vscale x 2 x i32> %1, +    iXLen %2) + +  ret <vscale x 2 x double> %a +} + +declare <vscale x 2 x double> @llvm.riscv.vloxei.mask.nxv2f64.nxv2i32( +  <vscale x 2 x double>, +  ptr, +  <vscale x 2 x i32>, +  <vscale x 2 x i1>, +  iXLen, +  iXLen); + +define <vscale x 2 x double> @intrinsic_vloxei_mask_v_nxv2f64_nxv2f64_nxv2i32(<vscale x 2 x double> %0, ptr %1, <vscale x 2 x i32> %2, <vscale x 2 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vloxei_mask_v_nxv2f64_nxv2f64_nxv2i32: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e64, m2, ta, mu +; CHECK-NEXT:    vloxei32.v v8, (a0), v10, v0.t +; CHECK-NEXT:    ret +entry: +  %a = call <vscale x 2 x double> @llvm.riscv.vloxei.mask.nxv2f64.nxv2i32( +    <vscale x 2 x double> %0, +    ptr %1, +    <vscale x 2 x i32> %2, +    <vscale x 2 x i1> %3, +    iXLen %4, iXLen 1) + +  ret <vscale x 2 x double> %a +} + +declare <vscale x 4 x double> @llvm.riscv.vloxei.nxv4f64.nxv4i32( +  <vscale x 4 x double>, +  ptr, +  <vscale x 4 x i32>, +  iXLen); + +define <vscale x 4 x double> @intrinsic_vloxei_v_nxv4f64_nxv4f64_nxv4i32(ptr %0, <vscale x 4 x i32> %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vloxei_v_nxv4f64_nxv4f64_nxv4i32: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetivli zero, 1, e8, m1, ta, ma +; CHECK-NEXT:    vmv2r.v v12, v8 +; CHECK-NEXT:    vsetvli zero, a1, e64, m4, ta, ma +; CHECK-NEXT:    vloxei32.v v8, (a0), v12 +; CHECK-NEXT:    ret +entry: +  %a = call <vscale x 4 x double> @llvm.riscv.vloxei.nxv4f64.nxv4i32( +    <vscale x 4 x double> poison, +    ptr %0, +    <vscale x 4 x i32> %1, +    iXLen %2) + +  ret <vscale x 4 x double> %a +} + +declare <vscale x 4 x double> @llvm.riscv.vloxei.mask.nxv4f64.nxv4i32( +  <vscale x 4 x double>, +  ptr, +  <vscale x 4 x i32>, +  <vscale x 4 x i1>, +  iXLen, +  iXLen); + +define <vscale x 4 x double> @intrinsic_vloxei_mask_v_nxv4f64_nxv4f64_nxv4i32(<vscale x 4 x double> %0, ptr %1, <vscale x 4 x i32> %2, <vscale x 4 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vloxei_mask_v_nxv4f64_nxv4f64_nxv4i32: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e64, m4, ta, mu +; CHECK-NEXT:    vloxei32.v v8, (a0), v12, v0.t +; CHECK-NEXT:    ret +entry: +  %a = call <vscale x 4 x double> @llvm.riscv.vloxei.mask.nxv4f64.nxv4i32( +    <vscale x 4 x double> %0, +    ptr %1, +    <vscale x 4 x i32> %2, +    <vscale x 4 x i1> %3, +    iXLen %4, iXLen 1) + +  ret <vscale x 4 x double> %a +} + +declare <vscale x 8 x double> @llvm.riscv.vloxei.nxv8f64.nxv8i32( +  <vscale x 8 x double>, +  ptr, +  <vscale x 8 x i32>, +  iXLen); + +define <vscale x 8 x double> @intrinsic_vloxei_v_nxv8f64_nxv8f64_nxv8i32(ptr %0, <vscale x 8 x i32> %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vloxei_v_nxv8f64_nxv8f64_nxv8i32: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetivli zero, 1, e8, m1, ta, ma +; CHECK-NEXT:    vmv4r.v v16, v8 +; CHECK-NEXT:    vsetvli zero, a1, e64, m8, ta, ma +; CHECK-NEXT:    vloxei32.v v8, (a0), v16 +; CHECK-NEXT:    ret +entry: +  %a = call <vscale x 8 x double> @llvm.riscv.vloxei.nxv8f64.nxv8i32( +    <vscale x 8 x double> poison, +    ptr %0, +    <vscale x 8 x i32> %1, +    iXLen %2) + +  ret <vscale x 8 x double> %a +} + +declare <vscale x 8 x double> @llvm.riscv.vloxei.mask.nxv8f64.nxv8i32( +  <vscale x 8 x double>, +  ptr, +  <vscale x 8 x i32>, +  <vscale x 8 x i1>, +  iXLen, +  iXLen); + +define <vscale x 8 x double> @intrinsic_vloxei_mask_v_nxv8f64_nxv8f64_nxv8i32(<vscale x 8 x double> %0, ptr %1, <vscale x 8 x i32> %2, <vscale x 8 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vloxei_mask_v_nxv8f64_nxv8f64_nxv8i32: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e64, m8, ta, mu +; CHECK-NEXT:    vloxei32.v v8, (a0), v16, v0.t +; CHECK-NEXT:    ret +entry: +  %a = call <vscale x 8 x double> @llvm.riscv.vloxei.mask.nxv8f64.nxv8i32( +    <vscale x 8 x double> %0, +    ptr %1, +    <vscale x 8 x i32> %2, +    <vscale x 8 x i1> %3, +    iXLen %4, iXLen 1) + +  ret <vscale x 8 x double> %a +} + +declare <vscale x 1 x i8> @llvm.riscv.vloxei.nxv1i8.nxv1i16( +  <vscale x 1 x i8>, +  ptr, +  <vscale x 1 x i16>, +  iXLen); + +define <vscale x 1 x i8> @intrinsic_vloxei_v_nxv1i8_nxv1i8_nxv1i16(ptr %0, <vscale x 1 x i16> %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vloxei_v_nxv1i8_nxv1i8_nxv1i16: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e8, mf8, ta, ma +; CHECK-NEXT:    vloxei16.v v9, (a0), v8 +; CHECK-NEXT:    vmv1r.v v8, v9 +; CHECK-NEXT:    ret +entry: +  %a = call <vscale x 1 x i8> @llvm.riscv.vloxei.nxv1i8.nxv1i16( +    <vscale x 1 x i8> poison, +    ptr %0, +    <vscale x 1 x i16> %1, +    iXLen %2) + +  ret <vscale x 1 x i8> %a +} + +declare <vscale x 1 x i8> @llvm.riscv.vloxei.mask.nxv1i8.nxv1i16( +  <vscale x 1 x i8>, +  ptr, +  <vscale x 1 x i16>, +  <vscale x 1 x i1>, +  iXLen, +  iXLen); + +define <vscale x 1 x i8> @intrinsic_vloxei_mask_v_nxv1i8_nxv1i8_nxv1i16(<vscale x 1 x i8> %0, ptr %1, <vscale x 1 x i16> %2, <vscale x 1 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vloxei_mask_v_nxv1i8_nxv1i8_nxv1i16: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e8, mf8, ta, mu +; CHECK-NEXT:    vloxei16.v v8, (a0), v9, v0.t +; CHECK-NEXT:    ret +entry: +  %a = call <vscale x 1 x i8> @llvm.riscv.vloxei.mask.nxv1i8.nxv1i16( +    <vscale x 1 x i8> %0, +    ptr %1, +    <vscale x 1 x i16> %2, +    <vscale x 1 x i1> %3, +    iXLen %4, iXLen 1) + +  ret <vscale x 1 x i8> %a +} + +declare <vscale x 2 x i8> @llvm.riscv.vloxei.nxv2i8.nxv2i16( +  <vscale x 2 x i8>, +  ptr, +  <vscale x 2 x i16>, +  iXLen); + +define <vscale x 2 x i8> @intrinsic_vloxei_v_nxv2i8_nxv2i8_nxv2i16(ptr %0, <vscale x 2 x i16> %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vloxei_v_nxv2i8_nxv2i8_nxv2i16: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e8, mf4, ta, ma +; CHECK-NEXT:    vloxei16.v v9, (a0), v8 +; CHECK-NEXT:    vmv1r.v v8, v9 +; CHECK-NEXT:    ret +entry: +  %a = call <vscale x 2 x i8> @llvm.riscv.vloxei.nxv2i8.nxv2i16( +    <vscale x 2 x i8> poison, +    ptr %0, +    <vscale x 2 x i16> %1, +    iXLen %2) + +  ret <vscale x 2 x i8> %a +} + +declare <vscale x 2 x i8> @llvm.riscv.vloxei.mask.nxv2i8.nxv2i16( +  <vscale x 2 x i8>, +  ptr, +  <vscale x 2 x i16>, +  <vscale x 2 x i1>, +  iXLen, +  iXLen); + +define <vscale x 2 x i8> @intrinsic_vloxei_mask_v_nxv2i8_nxv2i8_nxv2i16(<vscale x 2 x i8> %0, ptr %1, <vscale x 2 x i16> %2, <vscale x 2 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vloxei_mask_v_nxv2i8_nxv2i8_nxv2i16: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e8, mf4, ta, mu +; CHECK-NEXT:    vloxei16.v v8, (a0), v9, v0.t +; CHECK-NEXT:    ret +entry: +  %a = call <vscale x 2 x i8> @llvm.riscv.vloxei.mask.nxv2i8.nxv2i16( +    <vscale x 2 x i8> %0, +    ptr %1, +    <vscale x 2 x i16> %2, +    <vscale x 2 x i1> %3, +    iXLen %4, iXLen 1) + +  ret <vscale x 2 x i8> %a +} + +declare <vscale x 4 x i8> @llvm.riscv.vloxei.nxv4i8.nxv4i16( +  <vscale x 4 x i8>, +  ptr, +  <vscale x 4 x i16>, +  iXLen); + +define <vscale x 4 x i8> @intrinsic_vloxei_v_nxv4i8_nxv4i8_nxv4i16(ptr %0, <vscale x 4 x i16> %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vloxei_v_nxv4i8_nxv4i8_nxv4i16: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e8, mf2, ta, ma +; CHECK-NEXT:    vloxei16.v v9, (a0), v8 +; CHECK-NEXT:    vmv1r.v v8, v9 +; CHECK-NEXT:    ret +entry: +  %a = call <vscale x 4 x i8> @llvm.riscv.vloxei.nxv4i8.nxv4i16( +    <vscale x 4 x i8> poison, +    ptr %0, +    <vscale x 4 x i16> %1, +    iXLen %2) + +  ret <vscale x 4 x i8> %a +} + +declare <vscale x 4 x i8> @llvm.riscv.vloxei.mask.nxv4i8.nxv4i16( +  <vscale x 4 x i8>, +  ptr, +  <vscale x 4 x i16>, +  <vscale x 4 x i1>, +  iXLen, +  iXLen); + +define <vscale x 4 x i8> @intrinsic_vloxei_mask_v_nxv4i8_nxv4i8_nxv4i16(<vscale x 4 x i8> %0, ptr %1, <vscale x 4 x i16> %2, <vscale x 4 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vloxei_mask_v_nxv4i8_nxv4i8_nxv4i16: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e8, mf2, ta, mu +; CHECK-NEXT:    vloxei16.v v8, (a0), v9, v0.t +; CHECK-NEXT:    ret +entry: +  %a = call <vscale x 4 x i8> @llvm.riscv.vloxei.mask.nxv4i8.nxv4i16( +    <vscale x 4 x i8> %0, +    ptr %1, +    <vscale x 4 x i16> %2, +    <vscale x 4 x i1> %3, +    iXLen %4, iXLen 1) + +  ret <vscale x 4 x i8> %a +} + +declare <vscale x 8 x i8> @llvm.riscv.vloxei.nxv8i8.nxv8i16( +  <vscale x 8 x i8>, +  ptr, +  <vscale x 8 x i16>, +  iXLen); + +define <vscale x 8 x i8> @intrinsic_vloxei_v_nxv8i8_nxv8i8_nxv8i16(ptr %0, <vscale x 8 x i16> %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vloxei_v_nxv8i8_nxv8i8_nxv8i16: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e8, m1, ta, ma +; CHECK-NEXT:    vloxei16.v v10, (a0), v8 +; CHECK-NEXT:    vmv.v.v v8, v10 +; CHECK-NEXT:    ret +entry: +  %a = call <vscale x 8 x i8> @llvm.riscv.vloxei.nxv8i8.nxv8i16( +    <vscale x 8 x i8> poison, +    ptr %0, +    <vscale x 8 x i16> %1, +    iXLen %2) + +  ret <vscale x 8 x i8> %a +} + +declare <vscale x 8 x i8> @llvm.riscv.vloxei.mask.nxv8i8.nxv8i16( +  <vscale x 8 x i8>, +  ptr, +  <vscale x 8 x i16>, +  <vscale x 8 x i1>, +  iXLen, +  iXLen); + +define <vscale x 8 x i8> @intrinsic_vloxei_mask_v_nxv8i8_nxv8i8_nxv8i16(<vscale x 8 x i8> %0, ptr %1, <vscale x 8 x i16> %2, <vscale x 8 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vloxei_mask_v_nxv8i8_nxv8i8_nxv8i16: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e8, m1, ta, mu +; CHECK-NEXT:    vloxei16.v v8, (a0), v10, v0.t +; CHECK-NEXT:    ret +entry: +  %a = call <vscale x 8 x i8> @llvm.riscv.vloxei.mask.nxv8i8.nxv8i16( +    <vscale x 8 x i8> %0, +    ptr %1, +    <vscale x 8 x i16> %2, +    <vscale x 8 x i1> %3, +    iXLen %4, iXLen 1) + +  ret <vscale x 8 x i8> %a +} + +declare <vscale x 16 x i8> @llvm.riscv.vloxei.nxv16i8.nxv16i16( +  <vscale x 16 x i8>, +  ptr, +  <vscale x 16 x i16>, +  iXLen); + +define <vscale x 16 x i8> @intrinsic_vloxei_v_nxv16i8_nxv16i8_nxv16i16(ptr %0, <vscale x 16 x i16> %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vloxei_v_nxv16i8_nxv16i8_nxv16i16: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e8, m2, ta, ma +; CHECK-NEXT:    vloxei16.v v12, (a0), v8 +; CHECK-NEXT:    vmv.v.v v8, v12 +; CHECK-NEXT:    ret +entry: +  %a = call <vscale x 16 x i8> @llvm.riscv.vloxei.nxv16i8.nxv16i16( +    <vscale x 16 x i8> poison, +    ptr %0, +    <vscale x 16 x i16> %1, +    iXLen %2) + +  ret <vscale x 16 x i8> %a +} + +declare <vscale x 16 x i8> @llvm.riscv.vloxei.mask.nxv16i8.nxv16i16( +  <vscale x 16 x i8>, +  ptr, +  <vscale x 16 x i16>, +  <vscale x 16 x i1>, +  iXLen, +  iXLen); + +define <vscale x 16 x i8> @intrinsic_vloxei_mask_v_nxv16i8_nxv16i8_nxv16i16(<vscale x 16 x i8> %0, ptr %1, <vscale x 16 x i16> %2, <vscale x 16 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vloxei_mask_v_nxv16i8_nxv16i8_nxv16i16: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e8, m2, ta, mu +; CHECK-NEXT:    vloxei16.v v8, (a0), v12, v0.t +; CHECK-NEXT:    ret +entry: +  %a = call <vscale x 16 x i8> @llvm.riscv.vloxei.mask.nxv16i8.nxv16i16( +    <vscale x 16 x i8> %0, +    ptr %1, +    <vscale x 16 x i16> %2, +    <vscale x 16 x i1> %3, +    iXLen %4, iXLen 1) + +  ret <vscale x 16 x i8> %a +} + +declare <vscale x 32 x i8> @llvm.riscv.vloxei.nxv32i8.nxv32i16( +  <vscale x 32 x i8>, +  ptr, +  <vscale x 32 x i16>, +  iXLen); + +define <vscale x 32 x i8> @intrinsic_vloxei_v_nxv32i8_nxv32i8_nxv32i16(ptr %0, <vscale x 32 x i16> %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vloxei_v_nxv32i8_nxv32i8_nxv32i16: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e8, m4, ta, ma +; CHECK-NEXT:    vloxei16.v v16, (a0), v8 +; CHECK-NEXT:    vmv.v.v v8, v16 +; CHECK-NEXT:    ret +entry: +  %a = call <vscale x 32 x i8> @llvm.riscv.vloxei.nxv32i8.nxv32i16( +    <vscale x 32 x i8> poison, +    ptr %0, +    <vscale x 32 x i16> %1, +    iXLen %2) + +  ret <vscale x 32 x i8> %a +} + +declare <vscale x 32 x i8> @llvm.riscv.vloxei.mask.nxv32i8.nxv32i16( +  <vscale x 32 x i8>, +  ptr, +  <vscale x 32 x i16>, +  <vscale x 32 x i1>, +  iXLen, +  iXLen); + +define <vscale x 32 x i8> @intrinsic_vloxei_mask_v_nxv32i8_nxv32i8_nxv32i16(<vscale x 32 x i8> %0, ptr %1, <vscale x 32 x i16> %2, <vscale x 32 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vloxei_mask_v_nxv32i8_nxv32i8_nxv32i16: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e8, m4, ta, mu +; CHECK-NEXT:    vloxei16.v v8, (a0), v16, v0.t +; CHECK-NEXT:    ret +entry: +  %a = call <vscale x 32 x i8> @llvm.riscv.vloxei.mask.nxv32i8.nxv32i16( +    <vscale x 32 x i8> %0, +    ptr %1, +    <vscale x 32 x i16> %2, +    <vscale x 32 x i1> %3, +    iXLen %4, iXLen 1) + +  ret <vscale x 32 x i8> %a +} + +declare <vscale x 1 x i16> @llvm.riscv.vloxei.nxv1i16.nxv1i16( +  <vscale x 1 x i16>, +  ptr, +  <vscale x 1 x i16>, +  iXLen); + +define <vscale x 1 x i16> @intrinsic_vloxei_v_nxv1i16_nxv1i16_nxv1i16(ptr %0, <vscale x 1 x i16> %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vloxei_v_nxv1i16_nxv1i16_nxv1i16: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e16, mf4, ta, ma +; CHECK-NEXT:    vloxei16.v v8, (a0), v8 +; CHECK-NEXT:    ret +entry: +  %a = call <vscale x 1 x i16> @llvm.riscv.vloxei.nxv1i16.nxv1i16( +    <vscale x 1 x i16> poison, +    ptr %0, +    <vscale x 1 x i16> %1, +    iXLen %2) + +  ret <vscale x 1 x i16> %a +} + +declare <vscale x 1 x i16> @llvm.riscv.vloxei.mask.nxv1i16.nxv1i16( +  <vscale x 1 x i16>, +  ptr, +  <vscale x 1 x i16>, +  <vscale x 1 x i1>, +  iXLen, +  iXLen); + +define <vscale x 1 x i16> @intrinsic_vloxei_mask_v_nxv1i16_nxv1i16_nxv1i16(<vscale x 1 x i16> %0, ptr %1, <vscale x 1 x i16> %2, <vscale x 1 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vloxei_mask_v_nxv1i16_nxv1i16_nxv1i16: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e16, mf4, ta, mu +; CHECK-NEXT:    vloxei16.v v8, (a0), v9, v0.t +; CHECK-NEXT:    ret +entry: +  %a = call <vscale x 1 x i16> @llvm.riscv.vloxei.mask.nxv1i16.nxv1i16( +    <vscale x 1 x i16> %0, +    ptr %1, +    <vscale x 1 x i16> %2, +    <vscale x 1 x i1> %3, +    iXLen %4, iXLen 1) + +  ret <vscale x 1 x i16> %a +} + +declare <vscale x 2 x i16> @llvm.riscv.vloxei.nxv2i16.nxv2i16( +  <vscale x 2 x i16>, +  ptr, +  <vscale x 2 x i16>, +  iXLen); + +define <vscale x 2 x i16> @intrinsic_vloxei_v_nxv2i16_nxv2i16_nxv2i16(ptr %0, <vscale x 2 x i16> %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vloxei_v_nxv2i16_nxv2i16_nxv2i16: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e16, mf2, ta, ma +; CHECK-NEXT:    vloxei16.v v8, (a0), v8 +; CHECK-NEXT:    ret +entry: +  %a = call <vscale x 2 x i16> @llvm.riscv.vloxei.nxv2i16.nxv2i16( +    <vscale x 2 x i16> poison, +    ptr %0, +    <vscale x 2 x i16> %1, +    iXLen %2) + +  ret <vscale x 2 x i16> %a +} + +declare <vscale x 2 x i16> @llvm.riscv.vloxei.mask.nxv2i16.nxv2i16( +  <vscale x 2 x i16>, +  ptr, +  <vscale x 2 x i16>, +  <vscale x 2 x i1>, +  iXLen, +  iXLen); + +define <vscale x 2 x i16> @intrinsic_vloxei_mask_v_nxv2i16_nxv2i16_nxv2i16(<vscale x 2 x i16> %0, ptr %1, <vscale x 2 x i16> %2, <vscale x 2 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vloxei_mask_v_nxv2i16_nxv2i16_nxv2i16: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e16, mf2, ta, mu +; CHECK-NEXT:    vloxei16.v v8, (a0), v9, v0.t +; CHECK-NEXT:    ret +entry: +  %a = call <vscale x 2 x i16> @llvm.riscv.vloxei.mask.nxv2i16.nxv2i16( +    <vscale x 2 x i16> %0, +    ptr %1, +    <vscale x 2 x i16> %2, +    <vscale x 2 x i1> %3, +    iXLen %4, iXLen 1) + +  ret <vscale x 2 x i16> %a +} + +declare <vscale x 4 x i16> @llvm.riscv.vloxei.nxv4i16.nxv4i16( +  <vscale x 4 x i16>, +  ptr, +  <vscale x 4 x i16>, +  iXLen); + +define <vscale x 4 x i16> @intrinsic_vloxei_v_nxv4i16_nxv4i16_nxv4i16(ptr %0, <vscale x 4 x i16> %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vloxei_v_nxv4i16_nxv4i16_nxv4i16: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e16, m1, ta, ma +; CHECK-NEXT:    vloxei16.v v8, (a0), v8 +; CHECK-NEXT:    ret +entry: +  %a = call <vscale x 4 x i16> @llvm.riscv.vloxei.nxv4i16.nxv4i16( +    <vscale x 4 x i16> poison, +    ptr %0, +    <vscale x 4 x i16> %1, +    iXLen %2) + +  ret <vscale x 4 x i16> %a +} + +declare <vscale x 4 x i16> @llvm.riscv.vloxei.mask.nxv4i16.nxv4i16( +  <vscale x 4 x i16>, +  ptr, +  <vscale x 4 x i16>, +  <vscale x 4 x i1>, +  iXLen, +  iXLen); + +define <vscale x 4 x i16> @intrinsic_vloxei_mask_v_nxv4i16_nxv4i16_nxv4i16(<vscale x 4 x i16> %0, ptr %1, <vscale x 4 x i16> %2, <vscale x 4 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vloxei_mask_v_nxv4i16_nxv4i16_nxv4i16: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e16, m1, ta, mu +; CHECK-NEXT:    vloxei16.v v8, (a0), v9, v0.t +; CHECK-NEXT:    ret +entry: +  %a = call <vscale x 4 x i16> @llvm.riscv.vloxei.mask.nxv4i16.nxv4i16( +    <vscale x 4 x i16> %0, +    ptr %1, +    <vscale x 4 x i16> %2, +    <vscale x 4 x i1> %3, +    iXLen %4, iXLen 1) + +  ret <vscale x 4 x i16> %a +} + +declare <vscale x 8 x i16> @llvm.riscv.vloxei.nxv8i16.nxv8i16( +  <vscale x 8 x i16>, +  ptr, +  <vscale x 8 x i16>, +  iXLen); + +define <vscale x 8 x i16> @intrinsic_vloxei_v_nxv8i16_nxv8i16_nxv8i16(ptr %0, <vscale x 8 x i16> %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vloxei_v_nxv8i16_nxv8i16_nxv8i16: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e16, m2, ta, ma +; CHECK-NEXT:    vloxei16.v v8, (a0), v8 +; CHECK-NEXT:    ret +entry: +  %a = call <vscale x 8 x i16> @llvm.riscv.vloxei.nxv8i16.nxv8i16( +    <vscale x 8 x i16> poison, +    ptr %0, +    <vscale x 8 x i16> %1, +    iXLen %2) + +  ret <vscale x 8 x i16> %a +} + +declare <vscale x 8 x i16> @llvm.riscv.vloxei.mask.nxv8i16.nxv8i16( +  <vscale x 8 x i16>, +  ptr, +  <vscale x 8 x i16>, +  <vscale x 8 x i1>, +  iXLen, +  iXLen); + +define <vscale x 8 x i16> @intrinsic_vloxei_mask_v_nxv8i16_nxv8i16_nxv8i16(<vscale x 8 x i16> %0, ptr %1, <vscale x 8 x i16> %2, <vscale x 8 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vloxei_mask_v_nxv8i16_nxv8i16_nxv8i16: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e16, m2, ta, mu +; CHECK-NEXT:    vloxei16.v v8, (a0), v10, v0.t +; CHECK-NEXT:    ret +entry: +  %a = call <vscale x 8 x i16> @llvm.riscv.vloxei.mask.nxv8i16.nxv8i16( +    <vscale x 8 x i16> %0, +    ptr %1, +    <vscale x 8 x i16> %2, +    <vscale x 8 x i1> %3, +    iXLen %4, iXLen 1) + +  ret <vscale x 8 x i16> %a +} + +declare <vscale x 16 x i16> @llvm.riscv.vloxei.nxv16i16.nxv16i16( +  <vscale x 16 x i16>, +  ptr, +  <vscale x 16 x i16>, +  iXLen); + +define <vscale x 16 x i16> @intrinsic_vloxei_v_nxv16i16_nxv16i16_nxv16i16(ptr %0, <vscale x 16 x i16> %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vloxei_v_nxv16i16_nxv16i16_nxv16i16: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e16, m4, ta, ma +; CHECK-NEXT:    vloxei16.v v8, (a0), v8 +; CHECK-NEXT:    ret +entry: +  %a = call <vscale x 16 x i16> @llvm.riscv.vloxei.nxv16i16.nxv16i16( +    <vscale x 16 x i16> poison, +    ptr %0, +    <vscale x 16 x i16> %1, +    iXLen %2) + +  ret <vscale x 16 x i16> %a +} + +declare <vscale x 16 x i16> @llvm.riscv.vloxei.mask.nxv16i16.nxv16i16( +  <vscale x 16 x i16>, +  ptr, +  <vscale x 16 x i16>, +  <vscale x 16 x i1>, +  iXLen, +  iXLen); + +define <vscale x 16 x i16> @intrinsic_vloxei_mask_v_nxv16i16_nxv16i16_nxv16i16(<vscale x 16 x i16> %0, ptr %1, <vscale x 16 x i16> %2, <vscale x 16 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vloxei_mask_v_nxv16i16_nxv16i16_nxv16i16: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e16, m4, ta, mu +; CHECK-NEXT:    vloxei16.v v8, (a0), v12, v0.t +; CHECK-NEXT:    ret +entry: +  %a = call <vscale x 16 x i16> @llvm.riscv.vloxei.mask.nxv16i16.nxv16i16( +    <vscale x 16 x i16> %0, +    ptr %1, +    <vscale x 16 x i16> %2, +    <vscale x 16 x i1> %3, +    iXLen %4, iXLen 1) + +  ret <vscale x 16 x i16> %a +} + +declare <vscale x 32 x i16> @llvm.riscv.vloxei.nxv32i16.nxv32i16( +  <vscale x 32 x i16>, +  ptr, +  <vscale x 32 x i16>, +  iXLen); + +define <vscale x 32 x i16> @intrinsic_vloxei_v_nxv32i16_nxv32i16_nxv32i16(ptr %0, <vscale x 32 x i16> %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vloxei_v_nxv32i16_nxv32i16_nxv32i16: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e16, m8, ta, ma +; CHECK-NEXT:    vloxei16.v v8, (a0), v8 +; CHECK-NEXT:    ret +entry: +  %a = call <vscale x 32 x i16> @llvm.riscv.vloxei.nxv32i16.nxv32i16( +    <vscale x 32 x i16> poison, +    ptr %0, +    <vscale x 32 x i16> %1, +    iXLen %2) + +  ret <vscale x 32 x i16> %a +} + +declare <vscale x 32 x i16> @llvm.riscv.vloxei.mask.nxv32i16.nxv32i16( +  <vscale x 32 x i16>, +  ptr, +  <vscale x 32 x i16>, +  <vscale x 32 x i1>, +  iXLen, +  iXLen); + +define <vscale x 32 x i16> @intrinsic_vloxei_mask_v_nxv32i16_nxv32i16_nxv32i16(<vscale x 32 x i16> %0, ptr %1, <vscale x 32 x i16> %2, <vscale x 32 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vloxei_mask_v_nxv32i16_nxv32i16_nxv32i16: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e16, m8, ta, mu +; CHECK-NEXT:    vloxei16.v v8, (a0), v16, v0.t +; CHECK-NEXT:    ret +entry: +  %a = call <vscale x 32 x i16> @llvm.riscv.vloxei.mask.nxv32i16.nxv32i16( +    <vscale x 32 x i16> %0, +    ptr %1, +    <vscale x 32 x i16> %2, +    <vscale x 32 x i1> %3, +    iXLen %4, iXLen 1) + +  ret <vscale x 32 x i16> %a +} + +declare <vscale x 1 x i32> @llvm.riscv.vloxei.nxv1i32.nxv1i16( +  <vscale x 1 x i32>, +  ptr, +  <vscale x 1 x i16>, +  iXLen); + +define <vscale x 1 x i32> @intrinsic_vloxei_v_nxv1i32_nxv1i32_nxv1i16(ptr %0, <vscale x 1 x i16> %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vloxei_v_nxv1i32_nxv1i32_nxv1i16: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e32, mf2, ta, ma +; CHECK-NEXT:    vloxei16.v v9, (a0), v8 +; CHECK-NEXT:    vmv1r.v v8, v9 +; CHECK-NEXT:    ret +entry: +  %a = call <vscale x 1 x i32> @llvm.riscv.vloxei.nxv1i32.nxv1i16( +    <vscale x 1 x i32> poison, +    ptr %0, +    <vscale x 1 x i16> %1, +    iXLen %2) + +  ret <vscale x 1 x i32> %a +} + +declare <vscale x 1 x i32> @llvm.riscv.vloxei.mask.nxv1i32.nxv1i16( +  <vscale x 1 x i32>, +  ptr, +  <vscale x 1 x i16>, +  <vscale x 1 x i1>, +  iXLen, +  iXLen); + +define <vscale x 1 x i32> @intrinsic_vloxei_mask_v_nxv1i32_nxv1i32_nxv1i16(<vscale x 1 x i32> %0, ptr %1, <vscale x 1 x i16> %2, <vscale x 1 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vloxei_mask_v_nxv1i32_nxv1i32_nxv1i16: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e32, mf2, ta, mu +; CHECK-NEXT:    vloxei16.v v8, (a0), v9, v0.t +; CHECK-NEXT:    ret +entry: +  %a = call <vscale x 1 x i32> @llvm.riscv.vloxei.mask.nxv1i32.nxv1i16( +    <vscale x 1 x i32> %0, +    ptr %1, +    <vscale x 1 x i16> %2, +    <vscale x 1 x i1> %3, +    iXLen %4, iXLen 1) + +  ret <vscale x 1 x i32> %a +} + +declare <vscale x 2 x i32> @llvm.riscv.vloxei.nxv2i32.nxv2i16( +  <vscale x 2 x i32>, +  ptr, +  <vscale x 2 x i16>, +  iXLen); + +define <vscale x 2 x i32> @intrinsic_vloxei_v_nxv2i32_nxv2i32_nxv2i16(ptr %0, <vscale x 2 x i16> %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vloxei_v_nxv2i32_nxv2i32_nxv2i16: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e32, m1, ta, ma +; CHECK-NEXT:    vloxei16.v v9, (a0), v8 +; CHECK-NEXT:    vmv.v.v v8, v9 +; CHECK-NEXT:    ret +entry: +  %a = call <vscale x 2 x i32> @llvm.riscv.vloxei.nxv2i32.nxv2i16( +    <vscale x 2 x i32> poison, +    ptr %0, +    <vscale x 2 x i16> %1, +    iXLen %2) + +  ret <vscale x 2 x i32> %a +} + +declare <vscale x 2 x i32> @llvm.riscv.vloxei.mask.nxv2i32.nxv2i16( +  <vscale x 2 x i32>, +  ptr, +  <vscale x 2 x i16>, +  <vscale x 2 x i1>, +  iXLen, +  iXLen); + +define <vscale x 2 x i32> @intrinsic_vloxei_mask_v_nxv2i32_nxv2i32_nxv2i16(<vscale x 2 x i32> %0, ptr %1, <vscale x 2 x i16> %2, <vscale x 2 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vloxei_mask_v_nxv2i32_nxv2i32_nxv2i16: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e32, m1, ta, mu +; CHECK-NEXT:    vloxei16.v v8, (a0), v9, v0.t +; CHECK-NEXT:    ret +entry: +  %a = call <vscale x 2 x i32> @llvm.riscv.vloxei.mask.nxv2i32.nxv2i16( +    <vscale x 2 x i32> %0, +    ptr %1, +    <vscale x 2 x i16> %2, +    <vscale x 2 x i1> %3, +    iXLen %4, iXLen 1) + +  ret <vscale x 2 x i32> %a +} + +declare <vscale x 4 x i32> @llvm.riscv.vloxei.nxv4i32.nxv4i16( +  <vscale x 4 x i32>, +  ptr, +  <vscale x 4 x i16>, +  iXLen); + +define <vscale x 4 x i32> @intrinsic_vloxei_v_nxv4i32_nxv4i32_nxv4i16(ptr %0, <vscale x 4 x i16> %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vloxei_v_nxv4i32_nxv4i32_nxv4i16: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetivli zero, 1, e8, m1, ta, ma +; CHECK-NEXT:    vmv1r.v v10, v8 +; CHECK-NEXT:    vsetvli zero, a1, e32, m2, ta, ma +; CHECK-NEXT:    vloxei16.v v8, (a0), v10 +; CHECK-NEXT:    ret +entry: +  %a = call <vscale x 4 x i32> @llvm.riscv.vloxei.nxv4i32.nxv4i16( +    <vscale x 4 x i32> poison, +    ptr %0, +    <vscale x 4 x i16> %1, +    iXLen %2) + +  ret <vscale x 4 x i32> %a +} + +declare <vscale x 4 x i32> @llvm.riscv.vloxei.mask.nxv4i32.nxv4i16( +  <vscale x 4 x i32>, +  ptr, +  <vscale x 4 x i16>, +  <vscale x 4 x i1>, +  iXLen, +  iXLen); + +define <vscale x 4 x i32> @intrinsic_vloxei_mask_v_nxv4i32_nxv4i32_nxv4i16(<vscale x 4 x i32> %0, ptr %1, <vscale x 4 x i16> %2, <vscale x 4 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vloxei_mask_v_nxv4i32_nxv4i32_nxv4i16: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e32, m2, ta, mu +; CHECK-NEXT:    vloxei16.v v8, (a0), v10, v0.t +; CHECK-NEXT:    ret +entry: +  %a = call <vscale x 4 x i32> @llvm.riscv.vloxei.mask.nxv4i32.nxv4i16( +    <vscale x 4 x i32> %0, +    ptr %1, +    <vscale x 4 x i16> %2, +    <vscale x 4 x i1> %3, +    iXLen %4, iXLen 1) + +  ret <vscale x 4 x i32> %a +} + +declare <vscale x 8 x i32> @llvm.riscv.vloxei.nxv8i32.nxv8i16( +  <vscale x 8 x i32>, +  ptr, +  <vscale x 8 x i16>, +  iXLen); + +define <vscale x 8 x i32> @intrinsic_vloxei_v_nxv8i32_nxv8i32_nxv8i16(ptr %0, <vscale x 8 x i16> %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vloxei_v_nxv8i32_nxv8i32_nxv8i16: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetivli zero, 1, e8, m1, ta, ma +; CHECK-NEXT:    vmv2r.v v12, v8 +; CHECK-NEXT:    vsetvli zero, a1, e32, m4, ta, ma +; CHECK-NEXT:    vloxei16.v v8, (a0), v12 +; CHECK-NEXT:    ret +entry: +  %a = call <vscale x 8 x i32> @llvm.riscv.vloxei.nxv8i32.nxv8i16( +    <vscale x 8 x i32> poison, +    ptr %0, +    <vscale x 8 x i16> %1, +    iXLen %2) + +  ret <vscale x 8 x i32> %a +} + +declare <vscale x 8 x i32> @llvm.riscv.vloxei.mask.nxv8i32.nxv8i16( +  <vscale x 8 x i32>, +  ptr, +  <vscale x 8 x i16>, +  <vscale x 8 x i1>, +  iXLen, +  iXLen); + +define <vscale x 8 x i32> @intrinsic_vloxei_mask_v_nxv8i32_nxv8i32_nxv8i16(<vscale x 8 x i32> %0, ptr %1, <vscale x 8 x i16> %2, <vscale x 8 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vloxei_mask_v_nxv8i32_nxv8i32_nxv8i16: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e32, m4, ta, mu +; CHECK-NEXT:    vloxei16.v v8, (a0), v12, v0.t +; CHECK-NEXT:    ret +entry: +  %a = call <vscale x 8 x i32> @llvm.riscv.vloxei.mask.nxv8i32.nxv8i16( +    <vscale x 8 x i32> %0, +    ptr %1, +    <vscale x 8 x i16> %2, +    <vscale x 8 x i1> %3, +    iXLen %4, iXLen 1) + +  ret <vscale x 8 x i32> %a +} + +declare <vscale x 16 x i32> @llvm.riscv.vloxei.nxv16i32.nxv16i16( +  <vscale x 16 x i32>, +  ptr, +  <vscale x 16 x i16>, +  iXLen); + +define <vscale x 16 x i32> @intrinsic_vloxei_v_nxv16i32_nxv16i32_nxv16i16(ptr %0, <vscale x 16 x i16> %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vloxei_v_nxv16i32_nxv16i32_nxv16i16: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetivli zero, 1, e8, m1, ta, ma +; CHECK-NEXT:    vmv4r.v v16, v8 +; CHECK-NEXT:    vsetvli zero, a1, e32, m8, ta, ma +; CHECK-NEXT:    vloxei16.v v8, (a0), v16 +; CHECK-NEXT:    ret +entry: +  %a = call <vscale x 16 x i32> @llvm.riscv.vloxei.nxv16i32.nxv16i16( +    <vscale x 16 x i32> poison, +    ptr %0, +    <vscale x 16 x i16> %1, +    iXLen %2) + +  ret <vscale x 16 x i32> %a +} + +declare <vscale x 16 x i32> @llvm.riscv.vloxei.mask.nxv16i32.nxv16i16( +  <vscale x 16 x i32>, +  ptr, +  <vscale x 16 x i16>, +  <vscale x 16 x i1>, +  iXLen, +  iXLen); + +define <vscale x 16 x i32> @intrinsic_vloxei_mask_v_nxv16i32_nxv16i32_nxv16i16(<vscale x 16 x i32> %0, ptr %1, <vscale x 16 x i16> %2, <vscale x 16 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vloxei_mask_v_nxv16i32_nxv16i32_nxv16i16: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e32, m8, ta, mu +; CHECK-NEXT:    vloxei16.v v8, (a0), v16, v0.t +; CHECK-NEXT:    ret +entry: +  %a = call <vscale x 16 x i32> @llvm.riscv.vloxei.mask.nxv16i32.nxv16i16( +    <vscale x 16 x i32> %0, +    ptr %1, +    <vscale x 16 x i16> %2, +    <vscale x 16 x i1> %3, +    iXLen %4, iXLen 1) + +  ret <vscale x 16 x i32> %a +} + +declare <vscale x 1 x i64> @llvm.riscv.vloxei.nxv1i64.nxv1i16( +  <vscale x 1 x i64>, +  ptr, +  <vscale x 1 x i16>, +  iXLen); + +define <vscale x 1 x i64> @intrinsic_vloxei_v_nxv1i64_nxv1i64_nxv1i16(ptr %0, <vscale x 1 x i16> %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vloxei_v_nxv1i64_nxv1i64_nxv1i16: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e64, m1, ta, ma +; CHECK-NEXT:    vloxei16.v v9, (a0), v8 +; CHECK-NEXT:    vmv.v.v v8, v9 +; CHECK-NEXT:    ret +entry: +  %a = call <vscale x 1 x i64> @llvm.riscv.vloxei.nxv1i64.nxv1i16( +    <vscale x 1 x i64> poison, +    ptr %0, +    <vscale x 1 x i16> %1, +    iXLen %2) + +  ret <vscale x 1 x i64> %a +} + +declare <vscale x 1 x i64> @llvm.riscv.vloxei.mask.nxv1i64.nxv1i16( +  <vscale x 1 x i64>, +  ptr, +  <vscale x 1 x i16>, +  <vscale x 1 x i1>, +  iXLen, +  iXLen); + +define <vscale x 1 x i64> @intrinsic_vloxei_mask_v_nxv1i64_nxv1i64_nxv1i16(<vscale x 1 x i64> %0, ptr %1, <vscale x 1 x i16> %2, <vscale x 1 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vloxei_mask_v_nxv1i64_nxv1i64_nxv1i16: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e64, m1, ta, mu +; CHECK-NEXT:    vloxei16.v v8, (a0), v9, v0.t +; CHECK-NEXT:    ret +entry: +  %a = call <vscale x 1 x i64> @llvm.riscv.vloxei.mask.nxv1i64.nxv1i16( +    <vscale x 1 x i64> %0, +    ptr %1, +    <vscale x 1 x i16> %2, +    <vscale x 1 x i1> %3, +    iXLen %4, iXLen 1) + +  ret <vscale x 1 x i64> %a +} + +declare <vscale x 2 x i64> @llvm.riscv.vloxei.nxv2i64.nxv2i16( +  <vscale x 2 x i64>, +  ptr, +  <vscale x 2 x i16>, +  iXLen); + +define <vscale x 2 x i64> @intrinsic_vloxei_v_nxv2i64_nxv2i64_nxv2i16(ptr %0, <vscale x 2 x i16> %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vloxei_v_nxv2i64_nxv2i64_nxv2i16: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetivli zero, 1, e8, m1, ta, ma +; CHECK-NEXT:    vmv1r.v v10, v8 +; CHECK-NEXT:    vsetvli zero, a1, e64, m2, ta, ma +; CHECK-NEXT:    vloxei16.v v8, (a0), v10 +; CHECK-NEXT:    ret +entry: +  %a = call <vscale x 2 x i64> @llvm.riscv.vloxei.nxv2i64.nxv2i16( +    <vscale x 2 x i64> poison, +    ptr %0, +    <vscale x 2 x i16> %1, +    iXLen %2) + +  ret <vscale x 2 x i64> %a +} + +declare <vscale x 2 x i64> @llvm.riscv.vloxei.mask.nxv2i64.nxv2i16( +  <vscale x 2 x i64>, +  ptr, +  <vscale x 2 x i16>, +  <vscale x 2 x i1>, +  iXLen, +  iXLen); + +define <vscale x 2 x i64> @intrinsic_vloxei_mask_v_nxv2i64_nxv2i64_nxv2i16(<vscale x 2 x i64> %0, ptr %1, <vscale x 2 x i16> %2, <vscale x 2 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vloxei_mask_v_nxv2i64_nxv2i64_nxv2i16: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e64, m2, ta, mu +; CHECK-NEXT:    vloxei16.v v8, (a0), v10, v0.t +; CHECK-NEXT:    ret +entry: +  %a = call <vscale x 2 x i64> @llvm.riscv.vloxei.mask.nxv2i64.nxv2i16( +    <vscale x 2 x i64> %0, +    ptr %1, +    <vscale x 2 x i16> %2, +    <vscale x 2 x i1> %3, +    iXLen %4, iXLen 1) + +  ret <vscale x 2 x i64> %a +} + +declare <vscale x 4 x i64> @llvm.riscv.vloxei.nxv4i64.nxv4i16( +  <vscale x 4 x i64>, +  ptr, +  <vscale x 4 x i16>, +  iXLen); + +define <vscale x 4 x i64> @intrinsic_vloxei_v_nxv4i64_nxv4i64_nxv4i16(ptr %0, <vscale x 4 x i16> %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vloxei_v_nxv4i64_nxv4i64_nxv4i16: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetivli zero, 1, e8, m1, ta, ma +; CHECK-NEXT:    vmv1r.v v12, v8 +; CHECK-NEXT:    vsetvli zero, a1, e64, m4, ta, ma +; CHECK-NEXT:    vloxei16.v v8, (a0), v12 +; CHECK-NEXT:    ret +entry: +  %a = call <vscale x 4 x i64> @llvm.riscv.vloxei.nxv4i64.nxv4i16( +    <vscale x 4 x i64> poison, +    ptr %0, +    <vscale x 4 x i16> %1, +    iXLen %2) + +  ret <vscale x 4 x i64> %a +} + +declare <vscale x 4 x i64> @llvm.riscv.vloxei.mask.nxv4i64.nxv4i16( +  <vscale x 4 x i64>, +  ptr, +  <vscale x 4 x i16>, +  <vscale x 4 x i1>, +  iXLen, +  iXLen); + +define <vscale x 4 x i64> @intrinsic_vloxei_mask_v_nxv4i64_nxv4i64_nxv4i16(<vscale x 4 x i64> %0, ptr %1, <vscale x 4 x i16> %2, <vscale x 4 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vloxei_mask_v_nxv4i64_nxv4i64_nxv4i16: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e64, m4, ta, mu +; CHECK-NEXT:    vloxei16.v v8, (a0), v12, v0.t +; CHECK-NEXT:    ret +entry: +  %a = call <vscale x 4 x i64> @llvm.riscv.vloxei.mask.nxv4i64.nxv4i16( +    <vscale x 4 x i64> %0, +    ptr %1, +    <vscale x 4 x i16> %2, +    <vscale x 4 x i1> %3, +    iXLen %4, iXLen 1) + +  ret <vscale x 4 x i64> %a +} + +declare <vscale x 8 x i64> @llvm.riscv.vloxei.nxv8i64.nxv8i16( +  <vscale x 8 x i64>, +  ptr, +  <vscale x 8 x i16>, +  iXLen); + +define <vscale x 8 x i64> @intrinsic_vloxei_v_nxv8i64_nxv8i64_nxv8i16(ptr %0, <vscale x 8 x i16> %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vloxei_v_nxv8i64_nxv8i64_nxv8i16: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetivli zero, 1, e8, m1, ta, ma +; CHECK-NEXT:    vmv2r.v v16, v8 +; CHECK-NEXT:    vsetvli zero, a1, e64, m8, ta, ma +; CHECK-NEXT:    vloxei16.v v8, (a0), v16 +; CHECK-NEXT:    ret +entry: +  %a = call <vscale x 8 x i64> @llvm.riscv.vloxei.nxv8i64.nxv8i16( +    <vscale x 8 x i64> poison, +    ptr %0, +    <vscale x 8 x i16> %1, +    iXLen %2) + +  ret <vscale x 8 x i64> %a +} + +declare <vscale x 8 x i64> @llvm.riscv.vloxei.mask.nxv8i64.nxv8i16( +  <vscale x 8 x i64>, +  ptr, +  <vscale x 8 x i16>, +  <vscale x 8 x i1>, +  iXLen, +  iXLen); + +define <vscale x 8 x i64> @intrinsic_vloxei_mask_v_nxv8i64_nxv8i64_nxv8i16(<vscale x 8 x i64> %0, ptr %1, <vscale x 8 x i16> %2, <vscale x 8 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vloxei_mask_v_nxv8i64_nxv8i64_nxv8i16: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e64, m8, ta, mu +; CHECK-NEXT:    vloxei16.v v8, (a0), v16, v0.t +; CHECK-NEXT:    ret +entry: +  %a = call <vscale x 8 x i64> @llvm.riscv.vloxei.mask.nxv8i64.nxv8i16( +    <vscale x 8 x i64> %0, +    ptr %1, +    <vscale x 8 x i16> %2, +    <vscale x 8 x i1> %3, +    iXLen %4, iXLen 1) + +  ret <vscale x 8 x i64> %a +} + +declare <vscale x 1 x half> @llvm.riscv.vloxei.nxv1f16.nxv1i16( +  <vscale x 1 x half>, +  ptr, +  <vscale x 1 x i16>, +  iXLen); + +define <vscale x 1 x half> @intrinsic_vloxei_v_nxv1f16_nxv1f16_nxv1i16(ptr %0, <vscale x 1 x i16> %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vloxei_v_nxv1f16_nxv1f16_nxv1i16: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e16, mf4, ta, ma +; CHECK-NEXT:    vloxei16.v v8, (a0), v8 +; CHECK-NEXT:    ret +entry: +  %a = call <vscale x 1 x half> @llvm.riscv.vloxei.nxv1f16.nxv1i16( +    <vscale x 1 x half> poison, +    ptr %0, +    <vscale x 1 x i16> %1, +    iXLen %2) + +  ret <vscale x 1 x half> %a +} + +declare <vscale x 1 x half> @llvm.riscv.vloxei.mask.nxv1f16.nxv1i16( +  <vscale x 1 x half>, +  ptr, +  <vscale x 1 x i16>, +  <vscale x 1 x i1>, +  iXLen, +  iXLen); + +define <vscale x 1 x half> @intrinsic_vloxei_mask_v_nxv1f16_nxv1f16_nxv1i16(<vscale x 1 x half> %0, ptr %1, <vscale x 1 x i16> %2, <vscale x 1 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vloxei_mask_v_nxv1f16_nxv1f16_nxv1i16: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e16, mf4, ta, mu +; CHECK-NEXT:    vloxei16.v v8, (a0), v9, v0.t +; CHECK-NEXT:    ret +entry: +  %a = call <vscale x 1 x half> @llvm.riscv.vloxei.mask.nxv1f16.nxv1i16( +    <vscale x 1 x half> %0, +    ptr %1, +    <vscale x 1 x i16> %2, +    <vscale x 1 x i1> %3, +    iXLen %4, iXLen 1) + +  ret <vscale x 1 x half> %a +} + +declare <vscale x 2 x half> @llvm.riscv.vloxei.nxv2f16.nxv2i16( +  <vscale x 2 x half>, +  ptr, +  <vscale x 2 x i16>, +  iXLen); + +define <vscale x 2 x half> @intrinsic_vloxei_v_nxv2f16_nxv2f16_nxv2i16(ptr %0, <vscale x 2 x i16> %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vloxei_v_nxv2f16_nxv2f16_nxv2i16: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e16, mf2, ta, ma +; CHECK-NEXT:    vloxei16.v v8, (a0), v8 +; CHECK-NEXT:    ret +entry: +  %a = call <vscale x 2 x half> @llvm.riscv.vloxei.nxv2f16.nxv2i16( +    <vscale x 2 x half> poison, +    ptr %0, +    <vscale x 2 x i16> %1, +    iXLen %2) + +  ret <vscale x 2 x half> %a +} + +declare <vscale x 2 x half> @llvm.riscv.vloxei.mask.nxv2f16.nxv2i16( +  <vscale x 2 x half>, +  ptr, +  <vscale x 2 x i16>, +  <vscale x 2 x i1>, +  iXLen, +  iXLen); + +define <vscale x 2 x half> @intrinsic_vloxei_mask_v_nxv2f16_nxv2f16_nxv2i16(<vscale x 2 x half> %0, ptr %1, <vscale x 2 x i16> %2, <vscale x 2 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vloxei_mask_v_nxv2f16_nxv2f16_nxv2i16: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e16, mf2, ta, mu +; CHECK-NEXT:    vloxei16.v v8, (a0), v9, v0.t +; CHECK-NEXT:    ret +entry: +  %a = call <vscale x 2 x half> @llvm.riscv.vloxei.mask.nxv2f16.nxv2i16( +    <vscale x 2 x half> %0, +    ptr %1, +    <vscale x 2 x i16> %2, +    <vscale x 2 x i1> %3, +    iXLen %4, iXLen 1) + +  ret <vscale x 2 x half> %a +} + +declare <vscale x 4 x half> @llvm.riscv.vloxei.nxv4f16.nxv4i16( +  <vscale x 4 x half>, +  ptr, +  <vscale x 4 x i16>, +  iXLen); + +define <vscale x 4 x half> @intrinsic_vloxei_v_nxv4f16_nxv4f16_nxv4i16(ptr %0, <vscale x 4 x i16> %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vloxei_v_nxv4f16_nxv4f16_nxv4i16: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e16, m1, ta, ma +; CHECK-NEXT:    vloxei16.v v8, (a0), v8 +; CHECK-NEXT:    ret +entry: +  %a = call <vscale x 4 x half> @llvm.riscv.vloxei.nxv4f16.nxv4i16( +    <vscale x 4 x half> poison, +    ptr %0, +    <vscale x 4 x i16> %1, +    iXLen %2) + +  ret <vscale x 4 x half> %a +} + +declare <vscale x 4 x half> @llvm.riscv.vloxei.mask.nxv4f16.nxv4i16( +  <vscale x 4 x half>, +  ptr, +  <vscale x 4 x i16>, +  <vscale x 4 x i1>, +  iXLen, +  iXLen); + +define <vscale x 4 x half> @intrinsic_vloxei_mask_v_nxv4f16_nxv4f16_nxv4i16(<vscale x 4 x half> %0, ptr %1, <vscale x 4 x i16> %2, <vscale x 4 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vloxei_mask_v_nxv4f16_nxv4f16_nxv4i16: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e16, m1, ta, mu +; CHECK-NEXT:    vloxei16.v v8, (a0), v9, v0.t +; CHECK-NEXT:    ret +entry: +  %a = call <vscale x 4 x half> @llvm.riscv.vloxei.mask.nxv4f16.nxv4i16( +    <vscale x 4 x half> %0, +    ptr %1, +    <vscale x 4 x i16> %2, +    <vscale x 4 x i1> %3, +    iXLen %4, iXLen 1) + +  ret <vscale x 4 x half> %a +} + +declare <vscale x 8 x half> @llvm.riscv.vloxei.nxv8f16.nxv8i16( +  <vscale x 8 x half>, +  ptr, +  <vscale x 8 x i16>, +  iXLen); + +define <vscale x 8 x half> @intrinsic_vloxei_v_nxv8f16_nxv8f16_nxv8i16(ptr %0, <vscale x 8 x i16> %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vloxei_v_nxv8f16_nxv8f16_nxv8i16: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e16, m2, ta, ma +; CHECK-NEXT:    vloxei16.v v8, (a0), v8 +; CHECK-NEXT:    ret +entry: +  %a = call <vscale x 8 x half> @llvm.riscv.vloxei.nxv8f16.nxv8i16( +    <vscale x 8 x half> poison, +    ptr %0, +    <vscale x 8 x i16> %1, +    iXLen %2) + +  ret <vscale x 8 x half> %a +} + +declare <vscale x 8 x half> @llvm.riscv.vloxei.mask.nxv8f16.nxv8i16( +  <vscale x 8 x half>, +  ptr, +  <vscale x 8 x i16>, +  <vscale x 8 x i1>, +  iXLen, +  iXLen); + +define <vscale x 8 x half> @intrinsic_vloxei_mask_v_nxv8f16_nxv8f16_nxv8i16(<vscale x 8 x half> %0, ptr %1, <vscale x 8 x i16> %2, <vscale x 8 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vloxei_mask_v_nxv8f16_nxv8f16_nxv8i16: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e16, m2, ta, mu +; CHECK-NEXT:    vloxei16.v v8, (a0), v10, v0.t +; CHECK-NEXT:    ret +entry: +  %a = call <vscale x 8 x half> @llvm.riscv.vloxei.mask.nxv8f16.nxv8i16( +    <vscale x 8 x half> %0, +    ptr %1, +    <vscale x 8 x i16> %2, +    <vscale x 8 x i1> %3, +    iXLen %4, iXLen 1) + +  ret <vscale x 8 x half> %a +} + +declare <vscale x 16 x half> @llvm.riscv.vloxei.nxv16f16.nxv16i16( +  <vscale x 16 x half>, +  ptr, +  <vscale x 16 x i16>, +  iXLen); + +define <vscale x 16 x half> @intrinsic_vloxei_v_nxv16f16_nxv16f16_nxv16i16(ptr %0, <vscale x 16 x i16> %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vloxei_v_nxv16f16_nxv16f16_nxv16i16: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e16, m4, ta, ma +; CHECK-NEXT:    vloxei16.v v8, (a0), v8 +; CHECK-NEXT:    ret +entry: +  %a = call <vscale x 16 x half> @llvm.riscv.vloxei.nxv16f16.nxv16i16( +    <vscale x 16 x half> poison, +    ptr %0, +    <vscale x 16 x i16> %1, +    iXLen %2) + +  ret <vscale x 16 x half> %a +} + +declare <vscale x 16 x half> @llvm.riscv.vloxei.mask.nxv16f16.nxv16i16( +  <vscale x 16 x half>, +  ptr, +  <vscale x 16 x i16>, +  <vscale x 16 x i1>, +  iXLen, +  iXLen); + +define <vscale x 16 x half> @intrinsic_vloxei_mask_v_nxv16f16_nxv16f16_nxv16i16(<vscale x 16 x half> %0, ptr %1, <vscale x 16 x i16> %2, <vscale x 16 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vloxei_mask_v_nxv16f16_nxv16f16_nxv16i16: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e16, m4, ta, mu +; CHECK-NEXT:    vloxei16.v v8, (a0), v12, v0.t +; CHECK-NEXT:    ret +entry: +  %a = call <vscale x 16 x half> @llvm.riscv.vloxei.mask.nxv16f16.nxv16i16( +    <vscale x 16 x half> %0, +    ptr %1, +    <vscale x 16 x i16> %2, +    <vscale x 16 x i1> %3, +    iXLen %4, iXLen 1) + +  ret <vscale x 16 x half> %a +} + +declare <vscale x 32 x half> @llvm.riscv.vloxei.nxv32f16.nxv32i16( +  <vscale x 32 x half>, +  ptr, +  <vscale x 32 x i16>, +  iXLen); + +define <vscale x 32 x half> @intrinsic_vloxei_v_nxv32f16_nxv32f16_nxv32i16(ptr %0, <vscale x 32 x i16> %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vloxei_v_nxv32f16_nxv32f16_nxv32i16: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e16, m8, ta, ma +; CHECK-NEXT:    vloxei16.v v8, (a0), v8 +; CHECK-NEXT:    ret +entry: +  %a = call <vscale x 32 x half> @llvm.riscv.vloxei.nxv32f16.nxv32i16( +    <vscale x 32 x half> poison, +    ptr %0, +    <vscale x 32 x i16> %1, +    iXLen %2) + +  ret <vscale x 32 x half> %a +} + +declare <vscale x 32 x half> @llvm.riscv.vloxei.mask.nxv32f16.nxv32i16( +  <vscale x 32 x half>, +  ptr, +  <vscale x 32 x i16>, +  <vscale x 32 x i1>, +  iXLen, +  iXLen); + +define <vscale x 32 x half> @intrinsic_vloxei_mask_v_nxv32f16_nxv32f16_nxv32i16(<vscale x 32 x half> %0, ptr %1, <vscale x 32 x i16> %2, <vscale x 32 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vloxei_mask_v_nxv32f16_nxv32f16_nxv32i16: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e16, m8, ta, mu +; CHECK-NEXT:    vloxei16.v v8, (a0), v16, v0.t +; CHECK-NEXT:    ret +entry: +  %a = call <vscale x 32 x half> @llvm.riscv.vloxei.mask.nxv32f16.nxv32i16( +    <vscale x 32 x half> %0, +    ptr %1, +    <vscale x 32 x i16> %2, +    <vscale x 32 x i1> %3, +    iXLen %4, iXLen 1) + +  ret <vscale x 32 x half> %a +} + +declare <vscale x 1 x float> @llvm.riscv.vloxei.nxv1f32.nxv1i16( +  <vscale x 1 x float>, +  ptr, +  <vscale x 1 x i16>, +  iXLen); + +define <vscale x 1 x float> @intrinsic_vloxei_v_nxv1f32_nxv1f32_nxv1i16(ptr %0, <vscale x 1 x i16> %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vloxei_v_nxv1f32_nxv1f32_nxv1i16: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e32, mf2, ta, ma +; CHECK-NEXT:    vloxei16.v v9, (a0), v8 +; CHECK-NEXT:    vmv1r.v v8, v9 +; CHECK-NEXT:    ret +entry: +  %a = call <vscale x 1 x float> @llvm.riscv.vloxei.nxv1f32.nxv1i16( +    <vscale x 1 x float> poison, +    ptr %0, +    <vscale x 1 x i16> %1, +    iXLen %2) + +  ret <vscale x 1 x float> %a +} + +declare <vscale x 1 x float> @llvm.riscv.vloxei.mask.nxv1f32.nxv1i16( +  <vscale x 1 x float>, +  ptr, +  <vscale x 1 x i16>, +  <vscale x 1 x i1>, +  iXLen, +  iXLen); + +define <vscale x 1 x float> @intrinsic_vloxei_mask_v_nxv1f32_nxv1f32_nxv1i16(<vscale x 1 x float> %0, ptr %1, <vscale x 1 x i16> %2, <vscale x 1 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vloxei_mask_v_nxv1f32_nxv1f32_nxv1i16: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e32, mf2, ta, mu +; CHECK-NEXT:    vloxei16.v v8, (a0), v9, v0.t +; CHECK-NEXT:    ret +entry: +  %a = call <vscale x 1 x float> @llvm.riscv.vloxei.mask.nxv1f32.nxv1i16( +    <vscale x 1 x float> %0, +    ptr %1, +    <vscale x 1 x i16> %2, +    <vscale x 1 x i1> %3, +    iXLen %4, iXLen 1) + +  ret <vscale x 1 x float> %a +} + +declare <vscale x 2 x float> @llvm.riscv.vloxei.nxv2f32.nxv2i16( +  <vscale x 2 x float>, +  ptr, +  <vscale x 2 x i16>, +  iXLen); + +define <vscale x 2 x float> @intrinsic_vloxei_v_nxv2f32_nxv2f32_nxv2i16(ptr %0, <vscale x 2 x i16> %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vloxei_v_nxv2f32_nxv2f32_nxv2i16: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e32, m1, ta, ma +; CHECK-NEXT:    vloxei16.v v9, (a0), v8 +; CHECK-NEXT:    vmv.v.v v8, v9 +; CHECK-NEXT:    ret +entry: +  %a = call <vscale x 2 x float> @llvm.riscv.vloxei.nxv2f32.nxv2i16( +    <vscale x 2 x float> poison, +    ptr %0, +    <vscale x 2 x i16> %1, +    iXLen %2) + +  ret <vscale x 2 x float> %a +} + +declare <vscale x 2 x float> @llvm.riscv.vloxei.mask.nxv2f32.nxv2i16( +  <vscale x 2 x float>, +  ptr, +  <vscale x 2 x i16>, +  <vscale x 2 x i1>, +  iXLen, +  iXLen); + +define <vscale x 2 x float> @intrinsic_vloxei_mask_v_nxv2f32_nxv2f32_nxv2i16(<vscale x 2 x float> %0, ptr %1, <vscale x 2 x i16> %2, <vscale x 2 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vloxei_mask_v_nxv2f32_nxv2f32_nxv2i16: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e32, m1, ta, mu +; CHECK-NEXT:    vloxei16.v v8, (a0), v9, v0.t +; CHECK-NEXT:    ret +entry: +  %a = call <vscale x 2 x float> @llvm.riscv.vloxei.mask.nxv2f32.nxv2i16( +    <vscale x 2 x float> %0, +    ptr %1, +    <vscale x 2 x i16> %2, +    <vscale x 2 x i1> %3, +    iXLen %4, iXLen 1) + +  ret <vscale x 2 x float> %a +} + +declare <vscale x 4 x float> @llvm.riscv.vloxei.nxv4f32.nxv4i16( +  <vscale x 4 x float>, +  ptr, +  <vscale x 4 x i16>, +  iXLen); + +define <vscale x 4 x float> @intrinsic_vloxei_v_nxv4f32_nxv4f32_nxv4i16(ptr %0, <vscale x 4 x i16> %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vloxei_v_nxv4f32_nxv4f32_nxv4i16: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetivli zero, 1, e8, m1, ta, ma +; CHECK-NEXT:    vmv1r.v v10, v8 +; CHECK-NEXT:    vsetvli zero, a1, e32, m2, ta, ma +; CHECK-NEXT:    vloxei16.v v8, (a0), v10 +; CHECK-NEXT:    ret +entry: +  %a = call <vscale x 4 x float> @llvm.riscv.vloxei.nxv4f32.nxv4i16( +    <vscale x 4 x float> poison, +    ptr %0, +    <vscale x 4 x i16> %1, +    iXLen %2) + +  ret <vscale x 4 x float> %a +} + +declare <vscale x 4 x float> @llvm.riscv.vloxei.mask.nxv4f32.nxv4i16( +  <vscale x 4 x float>, +  ptr, +  <vscale x 4 x i16>, +  <vscale x 4 x i1>, +  iXLen, +  iXLen); + +define <vscale x 4 x float> @intrinsic_vloxei_mask_v_nxv4f32_nxv4f32_nxv4i16(<vscale x 4 x float> %0, ptr %1, <vscale x 4 x i16> %2, <vscale x 4 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vloxei_mask_v_nxv4f32_nxv4f32_nxv4i16: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e32, m2, ta, mu +; CHECK-NEXT:    vloxei16.v v8, (a0), v10, v0.t +; CHECK-NEXT:    ret +entry: +  %a = call <vscale x 4 x float> @llvm.riscv.vloxei.mask.nxv4f32.nxv4i16( +    <vscale x 4 x float> %0, +    ptr %1, +    <vscale x 4 x i16> %2, +    <vscale x 4 x i1> %3, +    iXLen %4, iXLen 1) + +  ret <vscale x 4 x float> %a +} + +declare <vscale x 8 x float> @llvm.riscv.vloxei.nxv8f32.nxv8i16( +  <vscale x 8 x float>, +  ptr, +  <vscale x 8 x i16>, +  iXLen); + +define <vscale x 8 x float> @intrinsic_vloxei_v_nxv8f32_nxv8f32_nxv8i16(ptr %0, <vscale x 8 x i16> %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vloxei_v_nxv8f32_nxv8f32_nxv8i16: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetivli zero, 1, e8, m1, ta, ma +; CHECK-NEXT:    vmv2r.v v12, v8 +; CHECK-NEXT:    vsetvli zero, a1, e32, m4, ta, ma +; CHECK-NEXT:    vloxei16.v v8, (a0), v12 +; CHECK-NEXT:    ret +entry: +  %a = call <vscale x 8 x float> @llvm.riscv.vloxei.nxv8f32.nxv8i16( +    <vscale x 8 x float> poison, +    ptr %0, +    <vscale x 8 x i16> %1, +    iXLen %2) + +  ret <vscale x 8 x float> %a +} + +declare <vscale x 8 x float> @llvm.riscv.vloxei.mask.nxv8f32.nxv8i16( +  <vscale x 8 x float>, +  ptr, +  <vscale x 8 x i16>, +  <vscale x 8 x i1>, +  iXLen, +  iXLen); + +define <vscale x 8 x float> @intrinsic_vloxei_mask_v_nxv8f32_nxv8f32_nxv8i16(<vscale x 8 x float> %0, ptr %1, <vscale x 8 x i16> %2, <vscale x 8 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vloxei_mask_v_nxv8f32_nxv8f32_nxv8i16: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e32, m4, ta, mu +; CHECK-NEXT:    vloxei16.v v8, (a0), v12, v0.t +; CHECK-NEXT:    ret +entry: +  %a = call <vscale x 8 x float> @llvm.riscv.vloxei.mask.nxv8f32.nxv8i16( +    <vscale x 8 x float> %0, +    ptr %1, +    <vscale x 8 x i16> %2, +    <vscale x 8 x i1> %3, +    iXLen %4, iXLen 1) + +  ret <vscale x 8 x float> %a +} + +declare <vscale x 16 x float> @llvm.riscv.vloxei.nxv16f32.nxv16i16( +  <vscale x 16 x float>, +  ptr, +  <vscale x 16 x i16>, +  iXLen); + +define <vscale x 16 x float> @intrinsic_vloxei_v_nxv16f32_nxv16f32_nxv16i16(ptr %0, <vscale x 16 x i16> %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vloxei_v_nxv16f32_nxv16f32_nxv16i16: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetivli zero, 1, e8, m1, ta, ma +; CHECK-NEXT:    vmv4r.v v16, v8 +; CHECK-NEXT:    vsetvli zero, a1, e32, m8, ta, ma +; CHECK-NEXT:    vloxei16.v v8, (a0), v16 +; CHECK-NEXT:    ret +entry: +  %a = call <vscale x 16 x float> @llvm.riscv.vloxei.nxv16f32.nxv16i16( +    <vscale x 16 x float> poison, +    ptr %0, +    <vscale x 16 x i16> %1, +    iXLen %2) + +  ret <vscale x 16 x float> %a +} + +declare <vscale x 16 x float> @llvm.riscv.vloxei.mask.nxv16f32.nxv16i16( +  <vscale x 16 x float>, +  ptr, +  <vscale x 16 x i16>, +  <vscale x 16 x i1>, +  iXLen, +  iXLen); + +define <vscale x 16 x float> @intrinsic_vloxei_mask_v_nxv16f32_nxv16f32_nxv16i16(<vscale x 16 x float> %0, ptr %1, <vscale x 16 x i16> %2, <vscale x 16 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vloxei_mask_v_nxv16f32_nxv16f32_nxv16i16: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e32, m8, ta, mu +; CHECK-NEXT:    vloxei16.v v8, (a0), v16, v0.t +; CHECK-NEXT:    ret +entry: +  %a = call <vscale x 16 x float> @llvm.riscv.vloxei.mask.nxv16f32.nxv16i16( +    <vscale x 16 x float> %0, +    ptr %1, +    <vscale x 16 x i16> %2, +    <vscale x 16 x i1> %3, +    iXLen %4, iXLen 1) + +  ret <vscale x 16 x float> %a +} + +declare <vscale x 1 x double> @llvm.riscv.vloxei.nxv1f64.nxv1i16( +  <vscale x 1 x double>, +  ptr, +  <vscale x 1 x i16>, +  iXLen); + +define <vscale x 1 x double> @intrinsic_vloxei_v_nxv1f64_nxv1f64_nxv1i16(ptr %0, <vscale x 1 x i16> %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vloxei_v_nxv1f64_nxv1f64_nxv1i16: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e64, m1, ta, ma +; CHECK-NEXT:    vloxei16.v v9, (a0), v8 +; CHECK-NEXT:    vmv.v.v v8, v9 +; CHECK-NEXT:    ret +entry: +  %a = call <vscale x 1 x double> @llvm.riscv.vloxei.nxv1f64.nxv1i16( +    <vscale x 1 x double> poison, +    ptr %0, +    <vscale x 1 x i16> %1, +    iXLen %2) + +  ret <vscale x 1 x double> %a +} + +declare <vscale x 1 x double> @llvm.riscv.vloxei.mask.nxv1f64.nxv1i16( +  <vscale x 1 x double>, +  ptr, +  <vscale x 1 x i16>, +  <vscale x 1 x i1>, +  iXLen, +  iXLen); + +define <vscale x 1 x double> @intrinsic_vloxei_mask_v_nxv1f64_nxv1f64_nxv1i16(<vscale x 1 x double> %0, ptr %1, <vscale x 1 x i16> %2, <vscale x 1 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vloxei_mask_v_nxv1f64_nxv1f64_nxv1i16: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e64, m1, ta, mu +; CHECK-NEXT:    vloxei16.v v8, (a0), v9, v0.t +; CHECK-NEXT:    ret +entry: +  %a = call <vscale x 1 x double> @llvm.riscv.vloxei.mask.nxv1f64.nxv1i16( +    <vscale x 1 x double> %0, +    ptr %1, +    <vscale x 1 x i16> %2, +    <vscale x 1 x i1> %3, +    iXLen %4, iXLen 1) + +  ret <vscale x 1 x double> %a +} + +declare <vscale x 2 x double> @llvm.riscv.vloxei.nxv2f64.nxv2i16( +  <vscale x 2 x double>, +  ptr, +  <vscale x 2 x i16>, +  iXLen); + +define <vscale x 2 x double> @intrinsic_vloxei_v_nxv2f64_nxv2f64_nxv2i16(ptr %0, <vscale x 2 x i16> %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vloxei_v_nxv2f64_nxv2f64_nxv2i16: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetivli zero, 1, e8, m1, ta, ma +; CHECK-NEXT:    vmv1r.v v10, v8 +; CHECK-NEXT:    vsetvli zero, a1, e64, m2, ta, ma +; CHECK-NEXT:    vloxei16.v v8, (a0), v10 +; CHECK-NEXT:    ret +entry: +  %a = call <vscale x 2 x double> @llvm.riscv.vloxei.nxv2f64.nxv2i16( +    <vscale x 2 x double> poison, +    ptr %0, +    <vscale x 2 x i16> %1, +    iXLen %2) + +  ret <vscale x 2 x double> %a +} + +declare <vscale x 2 x double> @llvm.riscv.vloxei.mask.nxv2f64.nxv2i16( +  <vscale x 2 x double>, +  ptr, +  <vscale x 2 x i16>, +  <vscale x 2 x i1>, +  iXLen, +  iXLen); + +define <vscale x 2 x double> @intrinsic_vloxei_mask_v_nxv2f64_nxv2f64_nxv2i16(<vscale x 2 x double> %0, ptr %1, <vscale x 2 x i16> %2, <vscale x 2 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vloxei_mask_v_nxv2f64_nxv2f64_nxv2i16: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e64, m2, ta, mu +; CHECK-NEXT:    vloxei16.v v8, (a0), v10, v0.t +; CHECK-NEXT:    ret +entry: +  %a = call <vscale x 2 x double> @llvm.riscv.vloxei.mask.nxv2f64.nxv2i16( +    <vscale x 2 x double> %0, +    ptr %1, +    <vscale x 2 x i16> %2, +    <vscale x 2 x i1> %3, +    iXLen %4, iXLen 1) + +  ret <vscale x 2 x double> %a +} + +declare <vscale x 4 x double> @llvm.riscv.vloxei.nxv4f64.nxv4i16( +  <vscale x 4 x double>, +  ptr, +  <vscale x 4 x i16>, +  iXLen); + +define <vscale x 4 x double> @intrinsic_vloxei_v_nxv4f64_nxv4f64_nxv4i16(ptr %0, <vscale x 4 x i16> %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vloxei_v_nxv4f64_nxv4f64_nxv4i16: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetivli zero, 1, e8, m1, ta, ma +; CHECK-NEXT:    vmv1r.v v12, v8 +; CHECK-NEXT:    vsetvli zero, a1, e64, m4, ta, ma +; CHECK-NEXT:    vloxei16.v v8, (a0), v12 +; CHECK-NEXT:    ret +entry: +  %a = call <vscale x 4 x double> @llvm.riscv.vloxei.nxv4f64.nxv4i16( +    <vscale x 4 x double> poison, +    ptr %0, +    <vscale x 4 x i16> %1, +    iXLen %2) + +  ret <vscale x 4 x double> %a +} + +declare <vscale x 4 x double> @llvm.riscv.vloxei.mask.nxv4f64.nxv4i16( +  <vscale x 4 x double>, +  ptr, +  <vscale x 4 x i16>, +  <vscale x 4 x i1>, +  iXLen, +  iXLen); + +define <vscale x 4 x double> @intrinsic_vloxei_mask_v_nxv4f64_nxv4f64_nxv4i16(<vscale x 4 x double> %0, ptr %1, <vscale x 4 x i16> %2, <vscale x 4 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vloxei_mask_v_nxv4f64_nxv4f64_nxv4i16: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e64, m4, ta, mu +; CHECK-NEXT:    vloxei16.v v8, (a0), v12, v0.t +; CHECK-NEXT:    ret +entry: +  %a = call <vscale x 4 x double> @llvm.riscv.vloxei.mask.nxv4f64.nxv4i16( +    <vscale x 4 x double> %0, +    ptr %1, +    <vscale x 4 x i16> %2, +    <vscale x 4 x i1> %3, +    iXLen %4, iXLen 1) + +  ret <vscale x 4 x double> %a +} + +declare <vscale x 8 x double> @llvm.riscv.vloxei.nxv8f64.nxv8i16( +  <vscale x 8 x double>, +  ptr, +  <vscale x 8 x i16>, +  iXLen); + +define <vscale x 8 x double> @intrinsic_vloxei_v_nxv8f64_nxv8f64_nxv8i16(ptr %0, <vscale x 8 x i16> %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vloxei_v_nxv8f64_nxv8f64_nxv8i16: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetivli zero, 1, e8, m1, ta, ma +; CHECK-NEXT:    vmv2r.v v16, v8 +; CHECK-NEXT:    vsetvli zero, a1, e64, m8, ta, ma +; CHECK-NEXT:    vloxei16.v v8, (a0), v16 +; CHECK-NEXT:    ret +entry: +  %a = call <vscale x 8 x double> @llvm.riscv.vloxei.nxv8f64.nxv8i16( +    <vscale x 8 x double> poison, +    ptr %0, +    <vscale x 8 x i16> %1, +    iXLen %2) + +  ret <vscale x 8 x double> %a +} + +declare <vscale x 8 x double> @llvm.riscv.vloxei.mask.nxv8f64.nxv8i16( +  <vscale x 8 x double>, +  ptr, +  <vscale x 8 x i16>, +  <vscale x 8 x i1>, +  iXLen, +  iXLen); + +define <vscale x 8 x double> @intrinsic_vloxei_mask_v_nxv8f64_nxv8f64_nxv8i16(<vscale x 8 x double> %0, ptr %1, <vscale x 8 x i16> %2, <vscale x 8 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vloxei_mask_v_nxv8f64_nxv8f64_nxv8i16: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e64, m8, ta, mu +; CHECK-NEXT:    vloxei16.v v8, (a0), v16, v0.t +; CHECK-NEXT:    ret +entry: +  %a = call <vscale x 8 x double> @llvm.riscv.vloxei.mask.nxv8f64.nxv8i16( +    <vscale x 8 x double> %0, +    ptr %1, +    <vscale x 8 x i16> %2, +    <vscale x 8 x i1> %3, +    iXLen %4, iXLen 1) + +  ret <vscale x 8 x double> %a +} + +declare <vscale x 1 x i8> @llvm.riscv.vloxei.nxv1i8.nxv1i8( +  <vscale x 1 x i8>, +  ptr, +  <vscale x 1 x i8>, +  iXLen); + +define <vscale x 1 x i8> @intrinsic_vloxei_v_nxv1i8_nxv1i8_nxv1i8(ptr %0, <vscale x 1 x i8> %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vloxei_v_nxv1i8_nxv1i8_nxv1i8: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e8, mf8, ta, ma +; CHECK-NEXT:    vloxei8.v v8, (a0), v8 +; CHECK-NEXT:    ret +entry: +  %a = call <vscale x 1 x i8> @llvm.riscv.vloxei.nxv1i8.nxv1i8( +    <vscale x 1 x i8> poison, +    ptr %0, +    <vscale x 1 x i8> %1, +    iXLen %2) + +  ret <vscale x 1 x i8> %a +} + +declare <vscale x 1 x i8> @llvm.riscv.vloxei.mask.nxv1i8.nxv1i8( +  <vscale x 1 x i8>, +  ptr, +  <vscale x 1 x i8>, +  <vscale x 1 x i1>, +  iXLen, +  iXLen); + +define <vscale x 1 x i8> @intrinsic_vloxei_mask_v_nxv1i8_nxv1i8_nxv1i8(<vscale x 1 x i8> %0, ptr %1, <vscale x 1 x i8> %2, <vscale x 1 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vloxei_mask_v_nxv1i8_nxv1i8_nxv1i8: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e8, mf8, ta, mu +; CHECK-NEXT:    vloxei8.v v8, (a0), v9, v0.t +; CHECK-NEXT:    ret +entry: +  %a = call <vscale x 1 x i8> @llvm.riscv.vloxei.mask.nxv1i8.nxv1i8( +    <vscale x 1 x i8> %0, +    ptr %1, +    <vscale x 1 x i8> %2, +    <vscale x 1 x i1> %3, +    iXLen %4, iXLen 1) + +  ret <vscale x 1 x i8> %a +} + +declare <vscale x 2 x i8> @llvm.riscv.vloxei.nxv2i8.nxv2i8( +  <vscale x 2 x i8>, +  ptr, +  <vscale x 2 x i8>, +  iXLen); + +define <vscale x 2 x i8> @intrinsic_vloxei_v_nxv2i8_nxv2i8_nxv2i8(ptr %0, <vscale x 2 x i8> %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vloxei_v_nxv2i8_nxv2i8_nxv2i8: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e8, mf4, ta, ma +; CHECK-NEXT:    vloxei8.v v8, (a0), v8 +; CHECK-NEXT:    ret +entry: +  %a = call <vscale x 2 x i8> @llvm.riscv.vloxei.nxv2i8.nxv2i8( +    <vscale x 2 x i8> poison, +    ptr %0, +    <vscale x 2 x i8> %1, +    iXLen %2) + +  ret <vscale x 2 x i8> %a +} + +declare <vscale x 2 x i8> @llvm.riscv.vloxei.mask.nxv2i8.nxv2i8( +  <vscale x 2 x i8>, +  ptr, +  <vscale x 2 x i8>, +  <vscale x 2 x i1>, +  iXLen, +  iXLen); + +define <vscale x 2 x i8> @intrinsic_vloxei_mask_v_nxv2i8_nxv2i8_nxv2i8(<vscale x 2 x i8> %0, ptr %1, <vscale x 2 x i8> %2, <vscale x 2 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vloxei_mask_v_nxv2i8_nxv2i8_nxv2i8: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e8, mf4, ta, mu +; CHECK-NEXT:    vloxei8.v v8, (a0), v9, v0.t +; CHECK-NEXT:    ret +entry: +  %a = call <vscale x 2 x i8> @llvm.riscv.vloxei.mask.nxv2i8.nxv2i8( +    <vscale x 2 x i8> %0, +    ptr %1, +    <vscale x 2 x i8> %2, +    <vscale x 2 x i1> %3, +    iXLen %4, iXLen 1) + +  ret <vscale x 2 x i8> %a +} + +declare <vscale x 4 x i8> @llvm.riscv.vloxei.nxv4i8.nxv4i8( +  <vscale x 4 x i8>, +  ptr, +  <vscale x 4 x i8>, +  iXLen); + +define <vscale x 4 x i8> @intrinsic_vloxei_v_nxv4i8_nxv4i8_nxv4i8(ptr %0, <vscale x 4 x i8> %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vloxei_v_nxv4i8_nxv4i8_nxv4i8: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e8, mf2, ta, ma +; CHECK-NEXT:    vloxei8.v v8, (a0), v8 +; CHECK-NEXT:    ret +entry: +  %a = call <vscale x 4 x i8> @llvm.riscv.vloxei.nxv4i8.nxv4i8( +    <vscale x 4 x i8> poison, +    ptr %0, +    <vscale x 4 x i8> %1, +    iXLen %2) + +  ret <vscale x 4 x i8> %a +} + +declare <vscale x 4 x i8> @llvm.riscv.vloxei.mask.nxv4i8.nxv4i8( +  <vscale x 4 x i8>, +  ptr, +  <vscale x 4 x i8>, +  <vscale x 4 x i1>, +  iXLen, +  iXLen); + +define <vscale x 4 x i8> @intrinsic_vloxei_mask_v_nxv4i8_nxv4i8_nxv4i8(<vscale x 4 x i8> %0, ptr %1, <vscale x 4 x i8> %2, <vscale x 4 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vloxei_mask_v_nxv4i8_nxv4i8_nxv4i8: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e8, mf2, ta, mu +; CHECK-NEXT:    vloxei8.v v8, (a0), v9, v0.t +; CHECK-NEXT:    ret +entry: +  %a = call <vscale x 4 x i8> @llvm.riscv.vloxei.mask.nxv4i8.nxv4i8( +    <vscale x 4 x i8> %0, +    ptr %1, +    <vscale x 4 x i8> %2, +    <vscale x 4 x i1> %3, +    iXLen %4, iXLen 1) + +  ret <vscale x 4 x i8> %a +} + +declare <vscale x 8 x i8> @llvm.riscv.vloxei.nxv8i8.nxv8i8( +  <vscale x 8 x i8>, +  ptr, +  <vscale x 8 x i8>, +  iXLen); + +define <vscale x 8 x i8> @intrinsic_vloxei_v_nxv8i8_nxv8i8_nxv8i8(ptr %0, <vscale x 8 x i8> %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vloxei_v_nxv8i8_nxv8i8_nxv8i8: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e8, m1, ta, ma +; CHECK-NEXT:    vloxei8.v v8, (a0), v8 +; CHECK-NEXT:    ret +entry: +  %a = call <vscale x 8 x i8> @llvm.riscv.vloxei.nxv8i8.nxv8i8( +    <vscale x 8 x i8> poison, +    ptr %0, +    <vscale x 8 x i8> %1, +    iXLen %2) + +  ret <vscale x 8 x i8> %a +} + +declare <vscale x 8 x i8> @llvm.riscv.vloxei.mask.nxv8i8.nxv8i8( +  <vscale x 8 x i8>, +  ptr, +  <vscale x 8 x i8>, +  <vscale x 8 x i1>, +  iXLen, +  iXLen); + +define <vscale x 8 x i8> @intrinsic_vloxei_mask_v_nxv8i8_nxv8i8_nxv8i8(<vscale x 8 x i8> %0, ptr %1, <vscale x 8 x i8> %2, <vscale x 8 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vloxei_mask_v_nxv8i8_nxv8i8_nxv8i8: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e8, m1, ta, mu +; CHECK-NEXT:    vloxei8.v v8, (a0), v9, v0.t +; CHECK-NEXT:    ret +entry: +  %a = call <vscale x 8 x i8> @llvm.riscv.vloxei.mask.nxv8i8.nxv8i8( +    <vscale x 8 x i8> %0, +    ptr %1, +    <vscale x 8 x i8> %2, +    <vscale x 8 x i1> %3, +    iXLen %4, iXLen 1) + +  ret <vscale x 8 x i8> %a +} + +declare <vscale x 16 x i8> @llvm.riscv.vloxei.nxv16i8.nxv16i8( +  <vscale x 16 x i8>, +  ptr, +  <vscale x 16 x i8>, +  iXLen); + +define <vscale x 16 x i8> @intrinsic_vloxei_v_nxv16i8_nxv16i8_nxv16i8(ptr %0, <vscale x 16 x i8> %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vloxei_v_nxv16i8_nxv16i8_nxv16i8: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e8, m2, ta, ma +; CHECK-NEXT:    vloxei8.v v8, (a0), v8 +; CHECK-NEXT:    ret +entry: +  %a = call <vscale x 16 x i8> @llvm.riscv.vloxei.nxv16i8.nxv16i8( +    <vscale x 16 x i8> poison, +    ptr %0, +    <vscale x 16 x i8> %1, +    iXLen %2) + +  ret <vscale x 16 x i8> %a +} + +declare <vscale x 16 x i8> @llvm.riscv.vloxei.mask.nxv16i8.nxv16i8( +  <vscale x 16 x i8>, +  ptr, +  <vscale x 16 x i8>, +  <vscale x 16 x i1>, +  iXLen, +  iXLen); + +define <vscale x 16 x i8> @intrinsic_vloxei_mask_v_nxv16i8_nxv16i8_nxv16i8(<vscale x 16 x i8> %0, ptr %1, <vscale x 16 x i8> %2, <vscale x 16 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vloxei_mask_v_nxv16i8_nxv16i8_nxv16i8: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e8, m2, ta, mu +; CHECK-NEXT:    vloxei8.v v8, (a0), v10, v0.t +; CHECK-NEXT:    ret +entry: +  %a = call <vscale x 16 x i8> @llvm.riscv.vloxei.mask.nxv16i8.nxv16i8( +    <vscale x 16 x i8> %0, +    ptr %1, +    <vscale x 16 x i8> %2, +    <vscale x 16 x i1> %3, +    iXLen %4, iXLen 1) + +  ret <vscale x 16 x i8> %a +} + +declare <vscale x 32 x i8> @llvm.riscv.vloxei.nxv32i8.nxv32i8( +  <vscale x 32 x i8>, +  ptr, +  <vscale x 32 x i8>, +  iXLen); + +define <vscale x 32 x i8> @intrinsic_vloxei_v_nxv32i8_nxv32i8_nxv32i8(ptr %0, <vscale x 32 x i8> %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vloxei_v_nxv32i8_nxv32i8_nxv32i8: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e8, m4, ta, ma +; CHECK-NEXT:    vloxei8.v v8, (a0), v8 +; CHECK-NEXT:    ret +entry: +  %a = call <vscale x 32 x i8> @llvm.riscv.vloxei.nxv32i8.nxv32i8( +    <vscale x 32 x i8> poison, +    ptr %0, +    <vscale x 32 x i8> %1, +    iXLen %2) + +  ret <vscale x 32 x i8> %a +} + +declare <vscale x 32 x i8> @llvm.riscv.vloxei.mask.nxv32i8.nxv32i8( +  <vscale x 32 x i8>, +  ptr, +  <vscale x 32 x i8>, +  <vscale x 32 x i1>, +  iXLen, +  iXLen); + +define <vscale x 32 x i8> @intrinsic_vloxei_mask_v_nxv32i8_nxv32i8_nxv32i8(<vscale x 32 x i8> %0, ptr %1, <vscale x 32 x i8> %2, <vscale x 32 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vloxei_mask_v_nxv32i8_nxv32i8_nxv32i8: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e8, m4, ta, mu +; CHECK-NEXT:    vloxei8.v v8, (a0), v12, v0.t +; CHECK-NEXT:    ret +entry: +  %a = call <vscale x 32 x i8> @llvm.riscv.vloxei.mask.nxv32i8.nxv32i8( +    <vscale x 32 x i8> %0, +    ptr %1, +    <vscale x 32 x i8> %2, +    <vscale x 32 x i1> %3, +    iXLen %4, iXLen 1) + +  ret <vscale x 32 x i8> %a +} + +declare <vscale x 64 x i8> @llvm.riscv.vloxei.nxv64i8.nxv64i8( +  <vscale x 64 x i8>, +  ptr, +  <vscale x 64 x i8>, +  iXLen); + +define <vscale x 64 x i8> @intrinsic_vloxei_v_nxv64i8_nxv64i8_nxv64i8(ptr %0, <vscale x 64 x i8> %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vloxei_v_nxv64i8_nxv64i8_nxv64i8: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e8, m8, ta, ma +; CHECK-NEXT:    vloxei8.v v8, (a0), v8 +; CHECK-NEXT:    ret +entry: +  %a = call <vscale x 64 x i8> @llvm.riscv.vloxei.nxv64i8.nxv64i8( +    <vscale x 64 x i8> poison, +    ptr %0, +    <vscale x 64 x i8> %1, +    iXLen %2) + +  ret <vscale x 64 x i8> %a +} + +declare <vscale x 64 x i8> @llvm.riscv.vloxei.mask.nxv64i8.nxv64i8( +  <vscale x 64 x i8>, +  ptr, +  <vscale x 64 x i8>, +  <vscale x 64 x i1>, +  iXLen, +  iXLen); + +define <vscale x 64 x i8> @intrinsic_vloxei_mask_v_nxv64i8_nxv64i8_nxv64i8(<vscale x 64 x i8> %0, ptr %1, <vscale x 64 x i8> %2, <vscale x 64 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vloxei_mask_v_nxv64i8_nxv64i8_nxv64i8: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e8, m8, ta, mu +; CHECK-NEXT:    vloxei8.v v8, (a0), v16, v0.t +; CHECK-NEXT:    ret +entry: +  %a = call <vscale x 64 x i8> @llvm.riscv.vloxei.mask.nxv64i8.nxv64i8( +    <vscale x 64 x i8> %0, +    ptr %1, +    <vscale x 64 x i8> %2, +    <vscale x 64 x i1> %3, +    iXLen %4, iXLen 1) + +  ret <vscale x 64 x i8> %a +} + +declare <vscale x 1 x i16> @llvm.riscv.vloxei.nxv1i16.nxv1i8( +  <vscale x 1 x i16>, +  ptr, +  <vscale x 1 x i8>, +  iXLen); + +define <vscale x 1 x i16> @intrinsic_vloxei_v_nxv1i16_nxv1i16_nxv1i8(ptr %0, <vscale x 1 x i8> %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vloxei_v_nxv1i16_nxv1i16_nxv1i8: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e16, mf4, ta, ma +; CHECK-NEXT:    vloxei8.v v9, (a0), v8 +; CHECK-NEXT:    vmv1r.v v8, v9 +; CHECK-NEXT:    ret +entry: +  %a = call <vscale x 1 x i16> @llvm.riscv.vloxei.nxv1i16.nxv1i8( +    <vscale x 1 x i16> poison, +    ptr %0, +    <vscale x 1 x i8> %1, +    iXLen %2) + +  ret <vscale x 1 x i16> %a +} + +declare <vscale x 1 x i16> @llvm.riscv.vloxei.mask.nxv1i16.nxv1i8( +  <vscale x 1 x i16>, +  ptr, +  <vscale x 1 x i8>, +  <vscale x 1 x i1>, +  iXLen, +  iXLen); + +define <vscale x 1 x i16> @intrinsic_vloxei_mask_v_nxv1i16_nxv1i16_nxv1i8(<vscale x 1 x i16> %0, ptr %1, <vscale x 1 x i8> %2, <vscale x 1 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vloxei_mask_v_nxv1i16_nxv1i16_nxv1i8: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e16, mf4, ta, mu +; CHECK-NEXT:    vloxei8.v v8, (a0), v9, v0.t +; CHECK-NEXT:    ret +entry: +  %a = call <vscale x 1 x i16> @llvm.riscv.vloxei.mask.nxv1i16.nxv1i8( +    <vscale x 1 x i16> %0, +    ptr %1, +    <vscale x 1 x i8> %2, +    <vscale x 1 x i1> %3, +    iXLen %4, iXLen 1) + +  ret <vscale x 1 x i16> %a +} + +declare <vscale x 2 x i16> @llvm.riscv.vloxei.nxv2i16.nxv2i8( +  <vscale x 2 x i16>, +  ptr, +  <vscale x 2 x i8>, +  iXLen); + +define <vscale x 2 x i16> @intrinsic_vloxei_v_nxv2i16_nxv2i16_nxv2i8(ptr %0, <vscale x 2 x i8> %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vloxei_v_nxv2i16_nxv2i16_nxv2i8: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e16, mf2, ta, ma +; CHECK-NEXT:    vloxei8.v v9, (a0), v8 +; CHECK-NEXT:    vmv1r.v v8, v9 +; CHECK-NEXT:    ret +entry: +  %a = call <vscale x 2 x i16> @llvm.riscv.vloxei.nxv2i16.nxv2i8( +    <vscale x 2 x i16> poison, +    ptr %0, +    <vscale x 2 x i8> %1, +    iXLen %2) + +  ret <vscale x 2 x i16> %a +} + +declare <vscale x 2 x i16> @llvm.riscv.vloxei.mask.nxv2i16.nxv2i8( +  <vscale x 2 x i16>, +  ptr, +  <vscale x 2 x i8>, +  <vscale x 2 x i1>, +  iXLen, +  iXLen); + +define <vscale x 2 x i16> @intrinsic_vloxei_mask_v_nxv2i16_nxv2i16_nxv2i8(<vscale x 2 x i16> %0, ptr %1, <vscale x 2 x i8> %2, <vscale x 2 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vloxei_mask_v_nxv2i16_nxv2i16_nxv2i8: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e16, mf2, ta, mu +; CHECK-NEXT:    vloxei8.v v8, (a0), v9, v0.t +; CHECK-NEXT:    ret +entry: +  %a = call <vscale x 2 x i16> @llvm.riscv.vloxei.mask.nxv2i16.nxv2i8( +    <vscale x 2 x i16> %0, +    ptr %1, +    <vscale x 2 x i8> %2, +    <vscale x 2 x i1> %3, +    iXLen %4, iXLen 1) + +  ret <vscale x 2 x i16> %a +} + +declare <vscale x 4 x i16> @llvm.riscv.vloxei.nxv4i16.nxv4i8( +  <vscale x 4 x i16>, +  ptr, +  <vscale x 4 x i8>, +  iXLen); + +define <vscale x 4 x i16> @intrinsic_vloxei_v_nxv4i16_nxv4i16_nxv4i8(ptr %0, <vscale x 4 x i8> %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vloxei_v_nxv4i16_nxv4i16_nxv4i8: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e16, m1, ta, ma +; CHECK-NEXT:    vloxei8.v v9, (a0), v8 +; CHECK-NEXT:    vmv.v.v v8, v9 +; CHECK-NEXT:    ret +entry: +  %a = call <vscale x 4 x i16> @llvm.riscv.vloxei.nxv4i16.nxv4i8( +    <vscale x 4 x i16> poison, +    ptr %0, +    <vscale x 4 x i8> %1, +    iXLen %2) + +  ret <vscale x 4 x i16> %a +} + +declare <vscale x 4 x i16> @llvm.riscv.vloxei.mask.nxv4i16.nxv4i8( +  <vscale x 4 x i16>, +  ptr, +  <vscale x 4 x i8>, +  <vscale x 4 x i1>, +  iXLen, +  iXLen); + +define <vscale x 4 x i16> @intrinsic_vloxei_mask_v_nxv4i16_nxv4i16_nxv4i8(<vscale x 4 x i16> %0, ptr %1, <vscale x 4 x i8> %2, <vscale x 4 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vloxei_mask_v_nxv4i16_nxv4i16_nxv4i8: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e16, m1, ta, mu +; CHECK-NEXT:    vloxei8.v v8, (a0), v9, v0.t +; CHECK-NEXT:    ret +entry: +  %a = call <vscale x 4 x i16> @llvm.riscv.vloxei.mask.nxv4i16.nxv4i8( +    <vscale x 4 x i16> %0, +    ptr %1, +    <vscale x 4 x i8> %2, +    <vscale x 4 x i1> %3, +    iXLen %4, iXLen 1) + +  ret <vscale x 4 x i16> %a +} + +declare <vscale x 8 x i16> @llvm.riscv.vloxei.nxv8i16.nxv8i8( +  <vscale x 8 x i16>, +  ptr, +  <vscale x 8 x i8>, +  iXLen); + +define <vscale x 8 x i16> @intrinsic_vloxei_v_nxv8i16_nxv8i16_nxv8i8(ptr %0, <vscale x 8 x i8> %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vloxei_v_nxv8i16_nxv8i16_nxv8i8: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetivli zero, 1, e8, m1, ta, ma +; CHECK-NEXT:    vmv1r.v v10, v8 +; CHECK-NEXT:    vsetvli zero, a1, e16, m2, ta, ma +; CHECK-NEXT:    vloxei8.v v8, (a0), v10 +; CHECK-NEXT:    ret +entry: +  %a = call <vscale x 8 x i16> @llvm.riscv.vloxei.nxv8i16.nxv8i8( +    <vscale x 8 x i16> poison, +    ptr %0, +    <vscale x 8 x i8> %1, +    iXLen %2) + +  ret <vscale x 8 x i16> %a +} + +declare <vscale x 8 x i16> @llvm.riscv.vloxei.mask.nxv8i16.nxv8i8( +  <vscale x 8 x i16>, +  ptr, +  <vscale x 8 x i8>, +  <vscale x 8 x i1>, +  iXLen, +  iXLen); + +define <vscale x 8 x i16> @intrinsic_vloxei_mask_v_nxv8i16_nxv8i16_nxv8i8(<vscale x 8 x i16> %0, ptr %1, <vscale x 8 x i8> %2, <vscale x 8 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vloxei_mask_v_nxv8i16_nxv8i16_nxv8i8: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e16, m2, ta, mu +; CHECK-NEXT:    vloxei8.v v8, (a0), v10, v0.t +; CHECK-NEXT:    ret +entry: +  %a = call <vscale x 8 x i16> @llvm.riscv.vloxei.mask.nxv8i16.nxv8i8( +    <vscale x 8 x i16> %0, +    ptr %1, +    <vscale x 8 x i8> %2, +    <vscale x 8 x i1> %3, +    iXLen %4, iXLen 1) + +  ret <vscale x 8 x i16> %a +} + +declare <vscale x 16 x i16> @llvm.riscv.vloxei.nxv16i16.nxv16i8( +  <vscale x 16 x i16>, +  ptr, +  <vscale x 16 x i8>, +  iXLen); + +define <vscale x 16 x i16> @intrinsic_vloxei_v_nxv16i16_nxv16i16_nxv16i8(ptr %0, <vscale x 16 x i8> %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vloxei_v_nxv16i16_nxv16i16_nxv16i8: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetivli zero, 1, e8, m1, ta, ma +; CHECK-NEXT:    vmv2r.v v12, v8 +; CHECK-NEXT:    vsetvli zero, a1, e16, m4, ta, ma +; CHECK-NEXT:    vloxei8.v v8, (a0), v12 +; CHECK-NEXT:    ret +entry: +  %a = call <vscale x 16 x i16> @llvm.riscv.vloxei.nxv16i16.nxv16i8( +    <vscale x 16 x i16> poison, +    ptr %0, +    <vscale x 16 x i8> %1, +    iXLen %2) + +  ret <vscale x 16 x i16> %a +} + +declare <vscale x 16 x i16> @llvm.riscv.vloxei.mask.nxv16i16.nxv16i8( +  <vscale x 16 x i16>, +  ptr, +  <vscale x 16 x i8>, +  <vscale x 16 x i1>, +  iXLen, +  iXLen); + +define <vscale x 16 x i16> @intrinsic_vloxei_mask_v_nxv16i16_nxv16i16_nxv16i8(<vscale x 16 x i16> %0, ptr %1, <vscale x 16 x i8> %2, <vscale x 16 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vloxei_mask_v_nxv16i16_nxv16i16_nxv16i8: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e16, m4, ta, mu +; CHECK-NEXT:    vloxei8.v v8, (a0), v12, v0.t +; CHECK-NEXT:    ret +entry: +  %a = call <vscale x 16 x i16> @llvm.riscv.vloxei.mask.nxv16i16.nxv16i8( +    <vscale x 16 x i16> %0, +    ptr %1, +    <vscale x 16 x i8> %2, +    <vscale x 16 x i1> %3, +    iXLen %4, iXLen 1) + +  ret <vscale x 16 x i16> %a +} + +declare <vscale x 32 x i16> @llvm.riscv.vloxei.nxv32i16.nxv32i8( +  <vscale x 32 x i16>, +  ptr, +  <vscale x 32 x i8>, +  iXLen); + +define <vscale x 32 x i16> @intrinsic_vloxei_v_nxv32i16_nxv32i16_nxv32i8(ptr %0, <vscale x 32 x i8> %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vloxei_v_nxv32i16_nxv32i16_nxv32i8: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetivli zero, 1, e8, m1, ta, ma +; CHECK-NEXT:    vmv4r.v v16, v8 +; CHECK-NEXT:    vsetvli zero, a1, e16, m8, ta, ma +; CHECK-NEXT:    vloxei8.v v8, (a0), v16 +; CHECK-NEXT:    ret +entry: +  %a = call <vscale x 32 x i16> @llvm.riscv.vloxei.nxv32i16.nxv32i8( +    <vscale x 32 x i16> poison, +    ptr %0, +    <vscale x 32 x i8> %1, +    iXLen %2) + +  ret <vscale x 32 x i16> %a +} + +declare <vscale x 32 x i16> @llvm.riscv.vloxei.mask.nxv32i16.nxv32i8( +  <vscale x 32 x i16>, +  ptr, +  <vscale x 32 x i8>, +  <vscale x 32 x i1>, +  iXLen, +  iXLen); + +define <vscale x 32 x i16> @intrinsic_vloxei_mask_v_nxv32i16_nxv32i16_nxv32i8(<vscale x 32 x i16> %0, ptr %1, <vscale x 32 x i8> %2, <vscale x 32 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vloxei_mask_v_nxv32i16_nxv32i16_nxv32i8: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e16, m8, ta, mu +; CHECK-NEXT:    vloxei8.v v8, (a0), v16, v0.t +; CHECK-NEXT:    ret +entry: +  %a = call <vscale x 32 x i16> @llvm.riscv.vloxei.mask.nxv32i16.nxv32i8( +    <vscale x 32 x i16> %0, +    ptr %1, +    <vscale x 32 x i8> %2, +    <vscale x 32 x i1> %3, +    iXLen %4, iXLen 1) + +  ret <vscale x 32 x i16> %a +} + +declare <vscale x 1 x i32> @llvm.riscv.vloxei.nxv1i32.nxv1i8( +  <vscale x 1 x i32>, +  ptr, +  <vscale x 1 x i8>, +  iXLen); + +define <vscale x 1 x i32> @intrinsic_vloxei_v_nxv1i32_nxv1i32_nxv1i8(ptr %0, <vscale x 1 x i8> %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vloxei_v_nxv1i32_nxv1i32_nxv1i8: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e32, mf2, ta, ma +; CHECK-NEXT:    vloxei8.v v9, (a0), v8 +; CHECK-NEXT:    vmv1r.v v8, v9 +; CHECK-NEXT:    ret +entry: +  %a = call <vscale x 1 x i32> @llvm.riscv.vloxei.nxv1i32.nxv1i8( +    <vscale x 1 x i32> poison, +    ptr %0, +    <vscale x 1 x i8> %1, +    iXLen %2) + +  ret <vscale x 1 x i32> %a +} + +declare <vscale x 1 x i32> @llvm.riscv.vloxei.mask.nxv1i32.nxv1i8( +  <vscale x 1 x i32>, +  ptr, +  <vscale x 1 x i8>, +  <vscale x 1 x i1>, +  iXLen, +  iXLen); + +define <vscale x 1 x i32> @intrinsic_vloxei_mask_v_nxv1i32_nxv1i32_nxv1i8(<vscale x 1 x i32> %0, ptr %1, <vscale x 1 x i8> %2, <vscale x 1 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vloxei_mask_v_nxv1i32_nxv1i32_nxv1i8: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e32, mf2, ta, mu +; CHECK-NEXT:    vloxei8.v v8, (a0), v9, v0.t +; CHECK-NEXT:    ret +entry: +  %a = call <vscale x 1 x i32> @llvm.riscv.vloxei.mask.nxv1i32.nxv1i8( +    <vscale x 1 x i32> %0, +    ptr %1, +    <vscale x 1 x i8> %2, +    <vscale x 1 x i1> %3, +    iXLen %4, iXLen 1) + +  ret <vscale x 1 x i32> %a +} + +declare <vscale x 2 x i32> @llvm.riscv.vloxei.nxv2i32.nxv2i8( +  <vscale x 2 x i32>, +  ptr, +  <vscale x 2 x i8>, +  iXLen); + +define <vscale x 2 x i32> @intrinsic_vloxei_v_nxv2i32_nxv2i32_nxv2i8(ptr %0, <vscale x 2 x i8> %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vloxei_v_nxv2i32_nxv2i32_nxv2i8: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e32, m1, ta, ma +; CHECK-NEXT:    vloxei8.v v9, (a0), v8 +; CHECK-NEXT:    vmv.v.v v8, v9 +; CHECK-NEXT:    ret +entry: +  %a = call <vscale x 2 x i32> @llvm.riscv.vloxei.nxv2i32.nxv2i8( +    <vscale x 2 x i32> poison, +    ptr %0, +    <vscale x 2 x i8> %1, +    iXLen %2) + +  ret <vscale x 2 x i32> %a +} + +declare <vscale x 2 x i32> @llvm.riscv.vloxei.mask.nxv2i32.nxv2i8( +  <vscale x 2 x i32>, +  ptr, +  <vscale x 2 x i8>, +  <vscale x 2 x i1>, +  iXLen, +  iXLen); + +define <vscale x 2 x i32> @intrinsic_vloxei_mask_v_nxv2i32_nxv2i32_nxv2i8(<vscale x 2 x i32> %0, ptr %1, <vscale x 2 x i8> %2, <vscale x 2 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vloxei_mask_v_nxv2i32_nxv2i32_nxv2i8: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e32, m1, ta, mu +; CHECK-NEXT:    vloxei8.v v8, (a0), v9, v0.t +; CHECK-NEXT:    ret +entry: +  %a = call <vscale x 2 x i32> @llvm.riscv.vloxei.mask.nxv2i32.nxv2i8( +    <vscale x 2 x i32> %0, +    ptr %1, +    <vscale x 2 x i8> %2, +    <vscale x 2 x i1> %3, +    iXLen %4, iXLen 1) + +  ret <vscale x 2 x i32> %a +} + +declare <vscale x 4 x i32> @llvm.riscv.vloxei.nxv4i32.nxv4i8( +  <vscale x 4 x i32>, +  ptr, +  <vscale x 4 x i8>, +  iXLen); + +define <vscale x 4 x i32> @intrinsic_vloxei_v_nxv4i32_nxv4i32_nxv4i8(ptr %0, <vscale x 4 x i8> %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vloxei_v_nxv4i32_nxv4i32_nxv4i8: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetivli zero, 1, e8, m1, ta, ma +; CHECK-NEXT:    vmv1r.v v10, v8 +; CHECK-NEXT:    vsetvli zero, a1, e32, m2, ta, ma +; CHECK-NEXT:    vloxei8.v v8, (a0), v10 +; CHECK-NEXT:    ret +entry: +  %a = call <vscale x 4 x i32> @llvm.riscv.vloxei.nxv4i32.nxv4i8( +    <vscale x 4 x i32> poison, +    ptr %0, +    <vscale x 4 x i8> %1, +    iXLen %2) + +  ret <vscale x 4 x i32> %a +} + +declare <vscale x 4 x i32> @llvm.riscv.vloxei.mask.nxv4i32.nxv4i8( +  <vscale x 4 x i32>, +  ptr, +  <vscale x 4 x i8>, +  <vscale x 4 x i1>, +  iXLen, +  iXLen); + +define <vscale x 4 x i32> @intrinsic_vloxei_mask_v_nxv4i32_nxv4i32_nxv4i8(<vscale x 4 x i32> %0, ptr %1, <vscale x 4 x i8> %2, <vscale x 4 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vloxei_mask_v_nxv4i32_nxv4i32_nxv4i8: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e32, m2, ta, mu +; CHECK-NEXT:    vloxei8.v v8, (a0), v10, v0.t +; CHECK-NEXT:    ret +entry: +  %a = call <vscale x 4 x i32> @llvm.riscv.vloxei.mask.nxv4i32.nxv4i8( +    <vscale x 4 x i32> %0, +    ptr %1, +    <vscale x 4 x i8> %2, +    <vscale x 4 x i1> %3, +    iXLen %4, iXLen 1) + +  ret <vscale x 4 x i32> %a +} + +declare <vscale x 8 x i32> @llvm.riscv.vloxei.nxv8i32.nxv8i8( +  <vscale x 8 x i32>, +  ptr, +  <vscale x 8 x i8>, +  iXLen); + +define <vscale x 8 x i32> @intrinsic_vloxei_v_nxv8i32_nxv8i32_nxv8i8(ptr %0, <vscale x 8 x i8> %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vloxei_v_nxv8i32_nxv8i32_nxv8i8: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetivli zero, 1, e8, m1, ta, ma +; CHECK-NEXT:    vmv1r.v v12, v8 +; CHECK-NEXT:    vsetvli zero, a1, e32, m4, ta, ma +; CHECK-NEXT:    vloxei8.v v8, (a0), v12 +; CHECK-NEXT:    ret +entry: +  %a = call <vscale x 8 x i32> @llvm.riscv.vloxei.nxv8i32.nxv8i8( +    <vscale x 8 x i32> poison, +    ptr %0, +    <vscale x 8 x i8> %1, +    iXLen %2) + +  ret <vscale x 8 x i32> %a +} + +declare <vscale x 8 x i32> @llvm.riscv.vloxei.mask.nxv8i32.nxv8i8( +  <vscale x 8 x i32>, +  ptr, +  <vscale x 8 x i8>, +  <vscale x 8 x i1>, +  iXLen, +  iXLen); + +define <vscale x 8 x i32> @intrinsic_vloxei_mask_v_nxv8i32_nxv8i32_nxv8i8(<vscale x 8 x i32> %0, ptr %1, <vscale x 8 x i8> %2, <vscale x 8 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vloxei_mask_v_nxv8i32_nxv8i32_nxv8i8: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e32, m4, ta, mu +; CHECK-NEXT:    vloxei8.v v8, (a0), v12, v0.t +; CHECK-NEXT:    ret +entry: +  %a = call <vscale x 8 x i32> @llvm.riscv.vloxei.mask.nxv8i32.nxv8i8( +    <vscale x 8 x i32> %0, +    ptr %1, +    <vscale x 8 x i8> %2, +    <vscale x 8 x i1> %3, +    iXLen %4, iXLen 1) + +  ret <vscale x 8 x i32> %a +} + +declare <vscale x 16 x i32> @llvm.riscv.vloxei.nxv16i32.nxv16i8( +  <vscale x 16 x i32>, +  ptr, +  <vscale x 16 x i8>, +  iXLen); + +define <vscale x 16 x i32> @intrinsic_vloxei_v_nxv16i32_nxv16i32_nxv16i8(ptr %0, <vscale x 16 x i8> %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vloxei_v_nxv16i32_nxv16i32_nxv16i8: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetivli zero, 1, e8, m1, ta, ma +; CHECK-NEXT:    vmv2r.v v16, v8 +; CHECK-NEXT:    vsetvli zero, a1, e32, m8, ta, ma +; CHECK-NEXT:    vloxei8.v v8, (a0), v16 +; CHECK-NEXT:    ret +entry: +  %a = call <vscale x 16 x i32> @llvm.riscv.vloxei.nxv16i32.nxv16i8( +    <vscale x 16 x i32> poison, +    ptr %0, +    <vscale x 16 x i8> %1, +    iXLen %2) + +  ret <vscale x 16 x i32> %a +} + +declare <vscale x 16 x i32> @llvm.riscv.vloxei.mask.nxv16i32.nxv16i8( +  <vscale x 16 x i32>, +  ptr, +  <vscale x 16 x i8>, +  <vscale x 16 x i1>, +  iXLen, +  iXLen); + +define <vscale x 16 x i32> @intrinsic_vloxei_mask_v_nxv16i32_nxv16i32_nxv16i8(<vscale x 16 x i32> %0, ptr %1, <vscale x 16 x i8> %2, <vscale x 16 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vloxei_mask_v_nxv16i32_nxv16i32_nxv16i8: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e32, m8, ta, mu +; CHECK-NEXT:    vloxei8.v v8, (a0), v16, v0.t +; CHECK-NEXT:    ret +entry: +  %a = call <vscale x 16 x i32> @llvm.riscv.vloxei.mask.nxv16i32.nxv16i8( +    <vscale x 16 x i32> %0, +    ptr %1, +    <vscale x 16 x i8> %2, +    <vscale x 16 x i1> %3, +    iXLen %4, iXLen 1) + +  ret <vscale x 16 x i32> %a +} + +declare <vscale x 1 x i64> @llvm.riscv.vloxei.nxv1i64.nxv1i8( +  <vscale x 1 x i64>, +  ptr, +  <vscale x 1 x i8>, +  iXLen); + +define <vscale x 1 x i64> @intrinsic_vloxei_v_nxv1i64_nxv1i64_nxv1i8(ptr %0, <vscale x 1 x i8> %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vloxei_v_nxv1i64_nxv1i64_nxv1i8: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e64, m1, ta, ma +; CHECK-NEXT:    vloxei8.v v9, (a0), v8 +; CHECK-NEXT:    vmv.v.v v8, v9 +; CHECK-NEXT:    ret +entry: +  %a = call <vscale x 1 x i64> @llvm.riscv.vloxei.nxv1i64.nxv1i8( +    <vscale x 1 x i64> poison, +    ptr %0, +    <vscale x 1 x i8> %1, +    iXLen %2) + +  ret <vscale x 1 x i64> %a +} + +declare <vscale x 1 x i64> @llvm.riscv.vloxei.mask.nxv1i64.nxv1i8( +  <vscale x 1 x i64>, +  ptr, +  <vscale x 1 x i8>, +  <vscale x 1 x i1>, +  iXLen, +  iXLen); + +define <vscale x 1 x i64> @intrinsic_vloxei_mask_v_nxv1i64_nxv1i64_nxv1i8(<vscale x 1 x i64> %0, ptr %1, <vscale x 1 x i8> %2, <vscale x 1 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vloxei_mask_v_nxv1i64_nxv1i64_nxv1i8: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e64, m1, ta, mu +; CHECK-NEXT:    vloxei8.v v8, (a0), v9, v0.t +; CHECK-NEXT:    ret +entry: +  %a = call <vscale x 1 x i64> @llvm.riscv.vloxei.mask.nxv1i64.nxv1i8( +    <vscale x 1 x i64> %0, +    ptr %1, +    <vscale x 1 x i8> %2, +    <vscale x 1 x i1> %3, +    iXLen %4, iXLen 1) + +  ret <vscale x 1 x i64> %a +} + +declare <vscale x 2 x i64> @llvm.riscv.vloxei.nxv2i64.nxv2i8( +  <vscale x 2 x i64>, +  ptr, +  <vscale x 2 x i8>, +  iXLen); + +define <vscale x 2 x i64> @intrinsic_vloxei_v_nxv2i64_nxv2i64_nxv2i8(ptr %0, <vscale x 2 x i8> %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vloxei_v_nxv2i64_nxv2i64_nxv2i8: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetivli zero, 1, e8, m1, ta, ma +; CHECK-NEXT:    vmv1r.v v10, v8 +; CHECK-NEXT:    vsetvli zero, a1, e64, m2, ta, ma +; CHECK-NEXT:    vloxei8.v v8, (a0), v10 +; CHECK-NEXT:    ret +entry: +  %a = call <vscale x 2 x i64> @llvm.riscv.vloxei.nxv2i64.nxv2i8( +    <vscale x 2 x i64> poison, +    ptr %0, +    <vscale x 2 x i8> %1, +    iXLen %2) + +  ret <vscale x 2 x i64> %a +} + +declare <vscale x 2 x i64> @llvm.riscv.vloxei.mask.nxv2i64.nxv2i8( +  <vscale x 2 x i64>, +  ptr, +  <vscale x 2 x i8>, +  <vscale x 2 x i1>, +  iXLen, +  iXLen); + +define <vscale x 2 x i64> @intrinsic_vloxei_mask_v_nxv2i64_nxv2i64_nxv2i8(<vscale x 2 x i64> %0, ptr %1, <vscale x 2 x i8> %2, <vscale x 2 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vloxei_mask_v_nxv2i64_nxv2i64_nxv2i8: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e64, m2, ta, mu +; CHECK-NEXT:    vloxei8.v v8, (a0), v10, v0.t +; CHECK-NEXT:    ret +entry: +  %a = call <vscale x 2 x i64> @llvm.riscv.vloxei.mask.nxv2i64.nxv2i8( +    <vscale x 2 x i64> %0, +    ptr %1, +    <vscale x 2 x i8> %2, +    <vscale x 2 x i1> %3, +    iXLen %4, iXLen 1) + +  ret <vscale x 2 x i64> %a +} + +declare <vscale x 4 x i64> @llvm.riscv.vloxei.nxv4i64.nxv4i8( +  <vscale x 4 x i64>, +  ptr, +  <vscale x 4 x i8>, +  iXLen); + +define <vscale x 4 x i64> @intrinsic_vloxei_v_nxv4i64_nxv4i64_nxv4i8(ptr %0, <vscale x 4 x i8> %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vloxei_v_nxv4i64_nxv4i64_nxv4i8: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetivli zero, 1, e8, m1, ta, ma +; CHECK-NEXT:    vmv1r.v v12, v8 +; CHECK-NEXT:    vsetvli zero, a1, e64, m4, ta, ma +; CHECK-NEXT:    vloxei8.v v8, (a0), v12 +; CHECK-NEXT:    ret +entry: +  %a = call <vscale x 4 x i64> @llvm.riscv.vloxei.nxv4i64.nxv4i8( +    <vscale x 4 x i64> poison, +    ptr %0, +    <vscale x 4 x i8> %1, +    iXLen %2) + +  ret <vscale x 4 x i64> %a +} + +declare <vscale x 4 x i64> @llvm.riscv.vloxei.mask.nxv4i64.nxv4i8( +  <vscale x 4 x i64>, +  ptr, +  <vscale x 4 x i8>, +  <vscale x 4 x i1>, +  iXLen, +  iXLen); + +define <vscale x 4 x i64> @intrinsic_vloxei_mask_v_nxv4i64_nxv4i64_nxv4i8(<vscale x 4 x i64> %0, ptr %1, <vscale x 4 x i8> %2, <vscale x 4 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vloxei_mask_v_nxv4i64_nxv4i64_nxv4i8: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e64, m4, ta, mu +; CHECK-NEXT:    vloxei8.v v8, (a0), v12, v0.t +; CHECK-NEXT:    ret +entry: +  %a = call <vscale x 4 x i64> @llvm.riscv.vloxei.mask.nxv4i64.nxv4i8( +    <vscale x 4 x i64> %0, +    ptr %1, +    <vscale x 4 x i8> %2, +    <vscale x 4 x i1> %3, +    iXLen %4, iXLen 1) + +  ret <vscale x 4 x i64> %a +} + +declare <vscale x 8 x i64> @llvm.riscv.vloxei.nxv8i64.nxv8i8( +  <vscale x 8 x i64>, +  ptr, +  <vscale x 8 x i8>, +  iXLen); + +define <vscale x 8 x i64> @intrinsic_vloxei_v_nxv8i64_nxv8i64_nxv8i8(ptr %0, <vscale x 8 x i8> %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vloxei_v_nxv8i64_nxv8i64_nxv8i8: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetivli zero, 1, e8, m1, ta, ma +; CHECK-NEXT:    vmv1r.v v16, v8 +; CHECK-NEXT:    vsetvli zero, a1, e64, m8, ta, ma +; CHECK-NEXT:    vloxei8.v v8, (a0), v16 +; CHECK-NEXT:    ret +entry: +  %a = call <vscale x 8 x i64> @llvm.riscv.vloxei.nxv8i64.nxv8i8( +    <vscale x 8 x i64> poison, +    ptr %0, +    <vscale x 8 x i8> %1, +    iXLen %2) + +  ret <vscale x 8 x i64> %a +} + +declare <vscale x 8 x i64> @llvm.riscv.vloxei.mask.nxv8i64.nxv8i8( +  <vscale x 8 x i64>, +  ptr, +  <vscale x 8 x i8>, +  <vscale x 8 x i1>, +  iXLen, +  iXLen); + +define <vscale x 8 x i64> @intrinsic_vloxei_mask_v_nxv8i64_nxv8i64_nxv8i8(<vscale x 8 x i64> %0, ptr %1, <vscale x 8 x i8> %2, <vscale x 8 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vloxei_mask_v_nxv8i64_nxv8i64_nxv8i8: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e64, m8, ta, mu +; CHECK-NEXT:    vloxei8.v v8, (a0), v16, v0.t +; CHECK-NEXT:    ret +entry: +  %a = call <vscale x 8 x i64> @llvm.riscv.vloxei.mask.nxv8i64.nxv8i8( +    <vscale x 8 x i64> %0, +    ptr %1, +    <vscale x 8 x i8> %2, +    <vscale x 8 x i1> %3, +    iXLen %4, iXLen 1) + +  ret <vscale x 8 x i64> %a +} + +declare <vscale x 1 x half> @llvm.riscv.vloxei.nxv1f16.nxv1i8( +  <vscale x 1 x half>, +  ptr, +  <vscale x 1 x i8>, +  iXLen); + +define <vscale x 1 x half> @intrinsic_vloxei_v_nxv1f16_nxv1f16_nxv1i8(ptr %0, <vscale x 1 x i8> %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vloxei_v_nxv1f16_nxv1f16_nxv1i8: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e16, mf4, ta, ma +; CHECK-NEXT:    vloxei8.v v9, (a0), v8 +; CHECK-NEXT:    vmv1r.v v8, v9 +; CHECK-NEXT:    ret +entry: +  %a = call <vscale x 1 x half> @llvm.riscv.vloxei.nxv1f16.nxv1i8( +    <vscale x 1 x half> poison, +    ptr %0, +    <vscale x 1 x i8> %1, +    iXLen %2) + +  ret <vscale x 1 x half> %a +} + +declare <vscale x 1 x half> @llvm.riscv.vloxei.mask.nxv1f16.nxv1i8( +  <vscale x 1 x half>, +  ptr, +  <vscale x 1 x i8>, +  <vscale x 1 x i1>, +  iXLen, +  iXLen); + +define <vscale x 1 x half> @intrinsic_vloxei_mask_v_nxv1f16_nxv1f16_nxv1i8(<vscale x 1 x half> %0, ptr %1, <vscale x 1 x i8> %2, <vscale x 1 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vloxei_mask_v_nxv1f16_nxv1f16_nxv1i8: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e16, mf4, ta, mu +; CHECK-NEXT:    vloxei8.v v8, (a0), v9, v0.t +; CHECK-NEXT:    ret +entry: +  %a = call <vscale x 1 x half> @llvm.riscv.vloxei.mask.nxv1f16.nxv1i8( +    <vscale x 1 x half> %0, +    ptr %1, +    <vscale x 1 x i8> %2, +    <vscale x 1 x i1> %3, +    iXLen %4, iXLen 1) + +  ret <vscale x 1 x half> %a +} + +declare <vscale x 2 x half> @llvm.riscv.vloxei.nxv2f16.nxv2i8( +  <vscale x 2 x half>, +  ptr, +  <vscale x 2 x i8>, +  iXLen); + +define <vscale x 2 x half> @intrinsic_vloxei_v_nxv2f16_nxv2f16_nxv2i8(ptr %0, <vscale x 2 x i8> %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vloxei_v_nxv2f16_nxv2f16_nxv2i8: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e16, mf2, ta, ma +; CHECK-NEXT:    vloxei8.v v9, (a0), v8 +; CHECK-NEXT:    vmv1r.v v8, v9 +; CHECK-NEXT:    ret +entry: +  %a = call <vscale x 2 x half> @llvm.riscv.vloxei.nxv2f16.nxv2i8( +    <vscale x 2 x half> poison, +    ptr %0, +    <vscale x 2 x i8> %1, +    iXLen %2) + +  ret <vscale x 2 x half> %a +} + +declare <vscale x 2 x half> @llvm.riscv.vloxei.mask.nxv2f16.nxv2i8( +  <vscale x 2 x half>, +  ptr, +  <vscale x 2 x i8>, +  <vscale x 2 x i1>, +  iXLen, +  iXLen); + +define <vscale x 2 x half> @intrinsic_vloxei_mask_v_nxv2f16_nxv2f16_nxv2i8(<vscale x 2 x half> %0, ptr %1, <vscale x 2 x i8> %2, <vscale x 2 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vloxei_mask_v_nxv2f16_nxv2f16_nxv2i8: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e16, mf2, ta, mu +; CHECK-NEXT:    vloxei8.v v8, (a0), v9, v0.t +; CHECK-NEXT:    ret +entry: +  %a = call <vscale x 2 x half> @llvm.riscv.vloxei.mask.nxv2f16.nxv2i8( +    <vscale x 2 x half> %0, +    ptr %1, +    <vscale x 2 x i8> %2, +    <vscale x 2 x i1> %3, +    iXLen %4, iXLen 1) + +  ret <vscale x 2 x half> %a +} + +declare <vscale x 4 x half> @llvm.riscv.vloxei.nxv4f16.nxv4i8( +  <vscale x 4 x half>, +  ptr, +  <vscale x 4 x i8>, +  iXLen); + +define <vscale x 4 x half> @intrinsic_vloxei_v_nxv4f16_nxv4f16_nxv4i8(ptr %0, <vscale x 4 x i8> %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vloxei_v_nxv4f16_nxv4f16_nxv4i8: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e16, m1, ta, ma +; CHECK-NEXT:    vloxei8.v v9, (a0), v8 +; CHECK-NEXT:    vmv.v.v v8, v9 +; CHECK-NEXT:    ret +entry: +  %a = call <vscale x 4 x half> @llvm.riscv.vloxei.nxv4f16.nxv4i8( +    <vscale x 4 x half> poison, +    ptr %0, +    <vscale x 4 x i8> %1, +    iXLen %2) + +  ret <vscale x 4 x half> %a +} + +declare <vscale x 4 x half> @llvm.riscv.vloxei.mask.nxv4f16.nxv4i8( +  <vscale x 4 x half>, +  ptr, +  <vscale x 4 x i8>, +  <vscale x 4 x i1>, +  iXLen, +  iXLen); + +define <vscale x 4 x half> @intrinsic_vloxei_mask_v_nxv4f16_nxv4f16_nxv4i8(<vscale x 4 x half> %0, ptr %1, <vscale x 4 x i8> %2, <vscale x 4 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vloxei_mask_v_nxv4f16_nxv4f16_nxv4i8: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e16, m1, ta, mu +; CHECK-NEXT:    vloxei8.v v8, (a0), v9, v0.t +; CHECK-NEXT:    ret +entry: +  %a = call <vscale x 4 x half> @llvm.riscv.vloxei.mask.nxv4f16.nxv4i8( +    <vscale x 4 x half> %0, +    ptr %1, +    <vscale x 4 x i8> %2, +    <vscale x 4 x i1> %3, +    iXLen %4, iXLen 1) + +  ret <vscale x 4 x half> %a +} + +declare <vscale x 8 x half> @llvm.riscv.vloxei.nxv8f16.nxv8i8( +  <vscale x 8 x half>, +  ptr, +  <vscale x 8 x i8>, +  iXLen); + +define <vscale x 8 x half> @intrinsic_vloxei_v_nxv8f16_nxv8f16_nxv8i8(ptr %0, <vscale x 8 x i8> %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vloxei_v_nxv8f16_nxv8f16_nxv8i8: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetivli zero, 1, e8, m1, ta, ma +; CHECK-NEXT:    vmv1r.v v10, v8 +; CHECK-NEXT:    vsetvli zero, a1, e16, m2, ta, ma +; CHECK-NEXT:    vloxei8.v v8, (a0), v10 +; CHECK-NEXT:    ret +entry: +  %a = call <vscale x 8 x half> @llvm.riscv.vloxei.nxv8f16.nxv8i8( +    <vscale x 8 x half> poison, +    ptr %0, +    <vscale x 8 x i8> %1, +    iXLen %2) + +  ret <vscale x 8 x half> %a +} + +declare <vscale x 8 x half> @llvm.riscv.vloxei.mask.nxv8f16.nxv8i8( +  <vscale x 8 x half>, +  ptr, +  <vscale x 8 x i8>, +  <vscale x 8 x i1>, +  iXLen, +  iXLen); + +define <vscale x 8 x half> @intrinsic_vloxei_mask_v_nxv8f16_nxv8f16_nxv8i8(<vscale x 8 x half> %0, ptr %1, <vscale x 8 x i8> %2, <vscale x 8 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vloxei_mask_v_nxv8f16_nxv8f16_nxv8i8: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e16, m2, ta, mu +; CHECK-NEXT:    vloxei8.v v8, (a0), v10, v0.t +; CHECK-NEXT:    ret +entry: +  %a = call <vscale x 8 x half> @llvm.riscv.vloxei.mask.nxv8f16.nxv8i8( +    <vscale x 8 x half> %0, +    ptr %1, +    <vscale x 8 x i8> %2, +    <vscale x 8 x i1> %3, +    iXLen %4, iXLen 1) + +  ret <vscale x 8 x half> %a +} + +declare <vscale x 16 x half> @llvm.riscv.vloxei.nxv16f16.nxv16i8( +  <vscale x 16 x half>, +  ptr, +  <vscale x 16 x i8>, +  iXLen); + +define <vscale x 16 x half> @intrinsic_vloxei_v_nxv16f16_nxv16f16_nxv16i8(ptr %0, <vscale x 16 x i8> %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vloxei_v_nxv16f16_nxv16f16_nxv16i8: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetivli zero, 1, e8, m1, ta, ma +; CHECK-NEXT:    vmv2r.v v12, v8 +; CHECK-NEXT:    vsetvli zero, a1, e16, m4, ta, ma +; CHECK-NEXT:    vloxei8.v v8, (a0), v12 +; CHECK-NEXT:    ret +entry: +  %a = call <vscale x 16 x half> @llvm.riscv.vloxei.nxv16f16.nxv16i8( +    <vscale x 16 x half> poison, +    ptr %0, +    <vscale x 16 x i8> %1, +    iXLen %2) + +  ret <vscale x 16 x half> %a +} + +declare <vscale x 16 x half> @llvm.riscv.vloxei.mask.nxv16f16.nxv16i8( +  <vscale x 16 x half>, +  ptr, +  <vscale x 16 x i8>, +  <vscale x 16 x i1>, +  iXLen, +  iXLen); + +define <vscale x 16 x half> @intrinsic_vloxei_mask_v_nxv16f16_nxv16f16_nxv16i8(<vscale x 16 x half> %0, ptr %1, <vscale x 16 x i8> %2, <vscale x 16 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vloxei_mask_v_nxv16f16_nxv16f16_nxv16i8: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e16, m4, ta, mu +; CHECK-NEXT:    vloxei8.v v8, (a0), v12, v0.t +; CHECK-NEXT:    ret +entry: +  %a = call <vscale x 16 x half> @llvm.riscv.vloxei.mask.nxv16f16.nxv16i8( +    <vscale x 16 x half> %0, +    ptr %1, +    <vscale x 16 x i8> %2, +    <vscale x 16 x i1> %3, +    iXLen %4, iXLen 1) + +  ret <vscale x 16 x half> %a +} + +declare <vscale x 32 x half> @llvm.riscv.vloxei.nxv32f16.nxv32i8( +  <vscale x 32 x half>, +  ptr, +  <vscale x 32 x i8>, +  iXLen); + +define <vscale x 32 x half> @intrinsic_vloxei_v_nxv32f16_nxv32f16_nxv32i8(ptr %0, <vscale x 32 x i8> %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vloxei_v_nxv32f16_nxv32f16_nxv32i8: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetivli zero, 1, e8, m1, ta, ma +; CHECK-NEXT:    vmv4r.v v16, v8 +; CHECK-NEXT:    vsetvli zero, a1, e16, m8, ta, ma +; CHECK-NEXT:    vloxei8.v v8, (a0), v16 +; CHECK-NEXT:    ret +entry: +  %a = call <vscale x 32 x half> @llvm.riscv.vloxei.nxv32f16.nxv32i8( +    <vscale x 32 x half> poison, +    ptr %0, +    <vscale x 32 x i8> %1, +    iXLen %2) + +  ret <vscale x 32 x half> %a +} + +declare <vscale x 32 x half> @llvm.riscv.vloxei.mask.nxv32f16.nxv32i8( +  <vscale x 32 x half>, +  ptr, +  <vscale x 32 x i8>, +  <vscale x 32 x i1>, +  iXLen, +  iXLen); + +define <vscale x 32 x half> @intrinsic_vloxei_mask_v_nxv32f16_nxv32f16_nxv32i8(<vscale x 32 x half> %0, ptr %1, <vscale x 32 x i8> %2, <vscale x 32 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vloxei_mask_v_nxv32f16_nxv32f16_nxv32i8: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e16, m8, ta, mu +; CHECK-NEXT:    vloxei8.v v8, (a0), v16, v0.t +; CHECK-NEXT:    ret +entry: +  %a = call <vscale x 32 x half> @llvm.riscv.vloxei.mask.nxv32f16.nxv32i8( +    <vscale x 32 x half> %0, +    ptr %1, +    <vscale x 32 x i8> %2, +    <vscale x 32 x i1> %3, +    iXLen %4, iXLen 1) + +  ret <vscale x 32 x half> %a +} + +declare <vscale x 1 x float> @llvm.riscv.vloxei.nxv1f32.nxv1i8( +  <vscale x 1 x float>, +  ptr, +  <vscale x 1 x i8>, +  iXLen); + +define <vscale x 1 x float> @intrinsic_vloxei_v_nxv1f32_nxv1f32_nxv1i8(ptr %0, <vscale x 1 x i8> %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vloxei_v_nxv1f32_nxv1f32_nxv1i8: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e32, mf2, ta, ma +; CHECK-NEXT:    vloxei8.v v9, (a0), v8 +; CHECK-NEXT:    vmv1r.v v8, v9 +; CHECK-NEXT:    ret +entry: +  %a = call <vscale x 1 x float> @llvm.riscv.vloxei.nxv1f32.nxv1i8( +    <vscale x 1 x float> poison, +    ptr %0, +    <vscale x 1 x i8> %1, +    iXLen %2) + +  ret <vscale x 1 x float> %a +} + +declare <vscale x 1 x float> @llvm.riscv.vloxei.mask.nxv1f32.nxv1i8( +  <vscale x 1 x float>, +  ptr, +  <vscale x 1 x i8>, +  <vscale x 1 x i1>, +  iXLen, +  iXLen); + +define <vscale x 1 x float> @intrinsic_vloxei_mask_v_nxv1f32_nxv1f32_nxv1i8(<vscale x 1 x float> %0, ptr %1, <vscale x 1 x i8> %2, <vscale x 1 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vloxei_mask_v_nxv1f32_nxv1f32_nxv1i8: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e32, mf2, ta, mu +; CHECK-NEXT:    vloxei8.v v8, (a0), v9, v0.t +; CHECK-NEXT:    ret +entry: +  %a = call <vscale x 1 x float> @llvm.riscv.vloxei.mask.nxv1f32.nxv1i8( +    <vscale x 1 x float> %0, +    ptr %1, +    <vscale x 1 x i8> %2, +    <vscale x 1 x i1> %3, +    iXLen %4, iXLen 1) + +  ret <vscale x 1 x float> %a +} + +declare <vscale x 2 x float> @llvm.riscv.vloxei.nxv2f32.nxv2i8( +  <vscale x 2 x float>, +  ptr, +  <vscale x 2 x i8>, +  iXLen); + +define <vscale x 2 x float> @intrinsic_vloxei_v_nxv2f32_nxv2f32_nxv2i8(ptr %0, <vscale x 2 x i8> %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vloxei_v_nxv2f32_nxv2f32_nxv2i8: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e32, m1, ta, ma +; CHECK-NEXT:    vloxei8.v v9, (a0), v8 +; CHECK-NEXT:    vmv.v.v v8, v9 +; CHECK-NEXT:    ret +entry: +  %a = call <vscale x 2 x float> @llvm.riscv.vloxei.nxv2f32.nxv2i8( +    <vscale x 2 x float> poison, +    ptr %0, +    <vscale x 2 x i8> %1, +    iXLen %2) + +  ret <vscale x 2 x float> %a +} + +declare <vscale x 2 x float> @llvm.riscv.vloxei.mask.nxv2f32.nxv2i8( +  <vscale x 2 x float>, +  ptr, +  <vscale x 2 x i8>, +  <vscale x 2 x i1>, +  iXLen, +  iXLen); + +define <vscale x 2 x float> @intrinsic_vloxei_mask_v_nxv2f32_nxv2f32_nxv2i8(<vscale x 2 x float> %0, ptr %1, <vscale x 2 x i8> %2, <vscale x 2 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vloxei_mask_v_nxv2f32_nxv2f32_nxv2i8: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e32, m1, ta, mu +; CHECK-NEXT:    vloxei8.v v8, (a0), v9, v0.t +; CHECK-NEXT:    ret +entry: +  %a = call <vscale x 2 x float> @llvm.riscv.vloxei.mask.nxv2f32.nxv2i8( +    <vscale x 2 x float> %0, +    ptr %1, +    <vscale x 2 x i8> %2, +    <vscale x 2 x i1> %3, +    iXLen %4, iXLen 1) + +  ret <vscale x 2 x float> %a +} + +declare <vscale x 4 x float> @llvm.riscv.vloxei.nxv4f32.nxv4i8( +  <vscale x 4 x float>, +  ptr, +  <vscale x 4 x i8>, +  iXLen); + +define <vscale x 4 x float> @intrinsic_vloxei_v_nxv4f32_nxv4f32_nxv4i8(ptr %0, <vscale x 4 x i8> %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vloxei_v_nxv4f32_nxv4f32_nxv4i8: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetivli zero, 1, e8, m1, ta, ma +; CHECK-NEXT:    vmv1r.v v10, v8 +; CHECK-NEXT:    vsetvli zero, a1, e32, m2, ta, ma +; CHECK-NEXT:    vloxei8.v v8, (a0), v10 +; CHECK-NEXT:    ret +entry: +  %a = call <vscale x 4 x float> @llvm.riscv.vloxei.nxv4f32.nxv4i8( +    <vscale x 4 x float> poison, +    ptr %0, +    <vscale x 4 x i8> %1, +    iXLen %2) + +  ret <vscale x 4 x float> %a +} + +declare <vscale x 4 x float> @llvm.riscv.vloxei.mask.nxv4f32.nxv4i8( +  <vscale x 4 x float>, +  ptr, +  <vscale x 4 x i8>, +  <vscale x 4 x i1>, +  iXLen, +  iXLen); + +define <vscale x 4 x float> @intrinsic_vloxei_mask_v_nxv4f32_nxv4f32_nxv4i8(<vscale x 4 x float> %0, ptr %1, <vscale x 4 x i8> %2, <vscale x 4 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vloxei_mask_v_nxv4f32_nxv4f32_nxv4i8: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e32, m2, ta, mu +; CHECK-NEXT:    vloxei8.v v8, (a0), v10, v0.t +; CHECK-NEXT:    ret +entry: +  %a = call <vscale x 4 x float> @llvm.riscv.vloxei.mask.nxv4f32.nxv4i8( +    <vscale x 4 x float> %0, +    ptr %1, +    <vscale x 4 x i8> %2, +    <vscale x 4 x i1> %3, +    iXLen %4, iXLen 1) + +  ret <vscale x 4 x float> %a +} + +declare <vscale x 8 x float> @llvm.riscv.vloxei.nxv8f32.nxv8i8( +  <vscale x 8 x float>, +  ptr, +  <vscale x 8 x i8>, +  iXLen); + +define <vscale x 8 x float> @intrinsic_vloxei_v_nxv8f32_nxv8f32_nxv8i8(ptr %0, <vscale x 8 x i8> %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vloxei_v_nxv8f32_nxv8f32_nxv8i8: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetivli zero, 1, e8, m1, ta, ma +; CHECK-NEXT:    vmv1r.v v12, v8 +; CHECK-NEXT:    vsetvli zero, a1, e32, m4, ta, ma +; CHECK-NEXT:    vloxei8.v v8, (a0), v12 +; CHECK-NEXT:    ret +entry: +  %a = call <vscale x 8 x float> @llvm.riscv.vloxei.nxv8f32.nxv8i8( +    <vscale x 8 x float> poison, +    ptr %0, +    <vscale x 8 x i8> %1, +    iXLen %2) + +  ret <vscale x 8 x float> %a +} + +declare <vscale x 8 x float> @llvm.riscv.vloxei.mask.nxv8f32.nxv8i8( +  <vscale x 8 x float>, +  ptr, +  <vscale x 8 x i8>, +  <vscale x 8 x i1>, +  iXLen, +  iXLen); + +define <vscale x 8 x float> @intrinsic_vloxei_mask_v_nxv8f32_nxv8f32_nxv8i8(<vscale x 8 x float> %0, ptr %1, <vscale x 8 x i8> %2, <vscale x 8 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vloxei_mask_v_nxv8f32_nxv8f32_nxv8i8: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e32, m4, ta, mu +; CHECK-NEXT:    vloxei8.v v8, (a0), v12, v0.t +; CHECK-NEXT:    ret +entry: +  %a = call <vscale x 8 x float> @llvm.riscv.vloxei.mask.nxv8f32.nxv8i8( +    <vscale x 8 x float> %0, +    ptr %1, +    <vscale x 8 x i8> %2, +    <vscale x 8 x i1> %3, +    iXLen %4, iXLen 1) + +  ret <vscale x 8 x float> %a +} + +declare <vscale x 16 x float> @llvm.riscv.vloxei.nxv16f32.nxv16i8( +  <vscale x 16 x float>, +  ptr, +  <vscale x 16 x i8>, +  iXLen); + +define <vscale x 16 x float> @intrinsic_vloxei_v_nxv16f32_nxv16f32_nxv16i8(ptr %0, <vscale x 16 x i8> %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vloxei_v_nxv16f32_nxv16f32_nxv16i8: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetivli zero, 1, e8, m1, ta, ma +; CHECK-NEXT:    vmv2r.v v16, v8 +; CHECK-NEXT:    vsetvli zero, a1, e32, m8, ta, ma +; CHECK-NEXT:    vloxei8.v v8, (a0), v16 +; CHECK-NEXT:    ret +entry: +  %a = call <vscale x 16 x float> @llvm.riscv.vloxei.nxv16f32.nxv16i8( +    <vscale x 16 x float> poison, +    ptr %0, +    <vscale x 16 x i8> %1, +    iXLen %2) + +  ret <vscale x 16 x float> %a +} + +declare <vscale x 16 x float> @llvm.riscv.vloxei.mask.nxv16f32.nxv16i8( +  <vscale x 16 x float>, +  ptr, +  <vscale x 16 x i8>, +  <vscale x 16 x i1>, +  iXLen, +  iXLen); + +define <vscale x 16 x float> @intrinsic_vloxei_mask_v_nxv16f32_nxv16f32_nxv16i8(<vscale x 16 x float> %0, ptr %1, <vscale x 16 x i8> %2, <vscale x 16 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vloxei_mask_v_nxv16f32_nxv16f32_nxv16i8: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e32, m8, ta, mu +; CHECK-NEXT:    vloxei8.v v8, (a0), v16, v0.t +; CHECK-NEXT:    ret +entry: +  %a = call <vscale x 16 x float> @llvm.riscv.vloxei.mask.nxv16f32.nxv16i8( +    <vscale x 16 x float> %0, +    ptr %1, +    <vscale x 16 x i8> %2, +    <vscale x 16 x i1> %3, +    iXLen %4, iXLen 1) + +  ret <vscale x 16 x float> %a +} + +declare <vscale x 1 x double> @llvm.riscv.vloxei.nxv1f64.nxv1i8( +  <vscale x 1 x double>, +  ptr, +  <vscale x 1 x i8>, +  iXLen); + +define <vscale x 1 x double> @intrinsic_vloxei_v_nxv1f64_nxv1f64_nxv1i8(ptr %0, <vscale x 1 x i8> %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vloxei_v_nxv1f64_nxv1f64_nxv1i8: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e64, m1, ta, ma +; CHECK-NEXT:    vloxei8.v v9, (a0), v8 +; CHECK-NEXT:    vmv.v.v v8, v9 +; CHECK-NEXT:    ret +entry: +  %a = call <vscale x 1 x double> @llvm.riscv.vloxei.nxv1f64.nxv1i8( +    <vscale x 1 x double> poison, +    ptr %0, +    <vscale x 1 x i8> %1, +    iXLen %2) + +  ret <vscale x 1 x double> %a +} + +declare <vscale x 1 x double> @llvm.riscv.vloxei.mask.nxv1f64.nxv1i8( +  <vscale x 1 x double>, +  ptr, +  <vscale x 1 x i8>, +  <vscale x 1 x i1>, +  iXLen, +  iXLen); + +define <vscale x 1 x double> @intrinsic_vloxei_mask_v_nxv1f64_nxv1f64_nxv1i8(<vscale x 1 x double> %0, ptr %1, <vscale x 1 x i8> %2, <vscale x 1 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vloxei_mask_v_nxv1f64_nxv1f64_nxv1i8: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e64, m1, ta, mu +; CHECK-NEXT:    vloxei8.v v8, (a0), v9, v0.t +; CHECK-NEXT:    ret +entry: +  %a = call <vscale x 1 x double> @llvm.riscv.vloxei.mask.nxv1f64.nxv1i8( +    <vscale x 1 x double> %0, +    ptr %1, +    <vscale x 1 x i8> %2, +    <vscale x 1 x i1> %3, +    iXLen %4, iXLen 1) + +  ret <vscale x 1 x double> %a +} + +declare <vscale x 2 x double> @llvm.riscv.vloxei.nxv2f64.nxv2i8( +  <vscale x 2 x double>, +  ptr, +  <vscale x 2 x i8>, +  iXLen); + +define <vscale x 2 x double> @intrinsic_vloxei_v_nxv2f64_nxv2f64_nxv2i8(ptr %0, <vscale x 2 x i8> %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vloxei_v_nxv2f64_nxv2f64_nxv2i8: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetivli zero, 1, e8, m1, ta, ma +; CHECK-NEXT:    vmv1r.v v10, v8 +; CHECK-NEXT:    vsetvli zero, a1, e64, m2, ta, ma +; CHECK-NEXT:    vloxei8.v v8, (a0), v10 +; CHECK-NEXT:    ret +entry: +  %a = call <vscale x 2 x double> @llvm.riscv.vloxei.nxv2f64.nxv2i8( +    <vscale x 2 x double> poison, +    ptr %0, +    <vscale x 2 x i8> %1, +    iXLen %2) + +  ret <vscale x 2 x double> %a +} + +declare <vscale x 2 x double> @llvm.riscv.vloxei.mask.nxv2f64.nxv2i8( +  <vscale x 2 x double>, +  ptr, +  <vscale x 2 x i8>, +  <vscale x 2 x i1>, +  iXLen, +  iXLen); + +define <vscale x 2 x double> @intrinsic_vloxei_mask_v_nxv2f64_nxv2f64_nxv2i8(<vscale x 2 x double> %0, ptr %1, <vscale x 2 x i8> %2, <vscale x 2 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vloxei_mask_v_nxv2f64_nxv2f64_nxv2i8: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e64, m2, ta, mu +; CHECK-NEXT:    vloxei8.v v8, (a0), v10, v0.t +; CHECK-NEXT:    ret +entry: +  %a = call <vscale x 2 x double> @llvm.riscv.vloxei.mask.nxv2f64.nxv2i8( +    <vscale x 2 x double> %0, +    ptr %1, +    <vscale x 2 x i8> %2, +    <vscale x 2 x i1> %3, +    iXLen %4, iXLen 1) + +  ret <vscale x 2 x double> %a +} + +declare <vscale x 4 x double> @llvm.riscv.vloxei.nxv4f64.nxv4i8( +  <vscale x 4 x double>, +  ptr, +  <vscale x 4 x i8>, +  iXLen); + +define <vscale x 4 x double> @intrinsic_vloxei_v_nxv4f64_nxv4f64_nxv4i8(ptr %0, <vscale x 4 x i8> %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vloxei_v_nxv4f64_nxv4f64_nxv4i8: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetivli zero, 1, e8, m1, ta, ma +; CHECK-NEXT:    vmv1r.v v12, v8 +; CHECK-NEXT:    vsetvli zero, a1, e64, m4, ta, ma +; CHECK-NEXT:    vloxei8.v v8, (a0), v12 +; CHECK-NEXT:    ret +entry: +  %a = call <vscale x 4 x double> @llvm.riscv.vloxei.nxv4f64.nxv4i8( +    <vscale x 4 x double> poison, +    ptr %0, +    <vscale x 4 x i8> %1, +    iXLen %2) + +  ret <vscale x 4 x double> %a +} + +declare <vscale x 4 x double> @llvm.riscv.vloxei.mask.nxv4f64.nxv4i8( +  <vscale x 4 x double>, +  ptr, +  <vscale x 4 x i8>, +  <vscale x 4 x i1>, +  iXLen, +  iXLen); + +define <vscale x 4 x double> @intrinsic_vloxei_mask_v_nxv4f64_nxv4f64_nxv4i8(<vscale x 4 x double> %0, ptr %1, <vscale x 4 x i8> %2, <vscale x 4 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vloxei_mask_v_nxv4f64_nxv4f64_nxv4i8: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e64, m4, ta, mu +; CHECK-NEXT:    vloxei8.v v8, (a0), v12, v0.t +; CHECK-NEXT:    ret +entry: +  %a = call <vscale x 4 x double> @llvm.riscv.vloxei.mask.nxv4f64.nxv4i8( +    <vscale x 4 x double> %0, +    ptr %1, +    <vscale x 4 x i8> %2, +    <vscale x 4 x i1> %3, +    iXLen %4, iXLen 1) + +  ret <vscale x 4 x double> %a +} + +declare <vscale x 8 x double> @llvm.riscv.vloxei.nxv8f64.nxv8i8( +  <vscale x 8 x double>, +  ptr, +  <vscale x 8 x i8>, +  iXLen); + +define <vscale x 8 x double> @intrinsic_vloxei_v_nxv8f64_nxv8f64_nxv8i8(ptr %0, <vscale x 8 x i8> %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vloxei_v_nxv8f64_nxv8f64_nxv8i8: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetivli zero, 1, e8, m1, ta, ma +; CHECK-NEXT:    vmv1r.v v16, v8 +; CHECK-NEXT:    vsetvli zero, a1, e64, m8, ta, ma +; CHECK-NEXT:    vloxei8.v v8, (a0), v16 +; CHECK-NEXT:    ret +entry: +  %a = call <vscale x 8 x double> @llvm.riscv.vloxei.nxv8f64.nxv8i8( +    <vscale x 8 x double> poison, +    ptr %0, +    <vscale x 8 x i8> %1, +    iXLen %2) + +  ret <vscale x 8 x double> %a +} + +declare <vscale x 8 x double> @llvm.riscv.vloxei.mask.nxv8f64.nxv8i8( +  <vscale x 8 x double>, +  ptr, +  <vscale x 8 x i8>, +  <vscale x 8 x i1>, +  iXLen, +  iXLen); + +define <vscale x 8 x double> @intrinsic_vloxei_mask_v_nxv8f64_nxv8f64_nxv8i8(<vscale x 8 x double> %0, ptr %1, <vscale x 8 x i8> %2, <vscale x 8 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vloxei_mask_v_nxv8f64_nxv8f64_nxv8i8: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e64, m8, ta, mu +; CHECK-NEXT:    vloxei8.v v8, (a0), v16, v0.t +; CHECK-NEXT:    ret +entry: +  %a = call <vscale x 8 x double> @llvm.riscv.vloxei.mask.nxv8f64.nxv8i8( +    <vscale x 8 x double> %0, +    ptr %1, +    <vscale x 8 x i8> %2, +    <vscale x 8 x i1> %3, +    iXLen %4, iXLen 1) + +  ret <vscale x 8 x double> %a +} diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/rvv/vluxei-rv64.ll b/llvm/test/CodeGen/RISCV/GlobalISel/rvv/vluxei-rv64.ll new file mode 100644 index 0000000..916af25 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/GlobalISel/rvv/vluxei-rv64.ll @@ -0,0 +1,1341 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=riscv64 -mattr=+v,+zvfhmin,+zvfbfmin -global-isel -verify-machineinstrs \ +; RUN:   < %s | FileCheck %s + +; The intrinsics are not supported with RV32. + +declare <vscale x 1 x i8> @llvm.riscv.vluxei.nxv1i8.nxv1i64( +  <vscale x 1 x i8>, +  ptr, +  <vscale x 1 x i64>, +  i64); + +define <vscale x 1 x i8> @intrinsic_vluxei_v_nxv1i8_nxv1i8_nxv1i64(ptr %0, <vscale x 1 x i64> %1, i64 %2) nounwind { +; CHECK-LABEL: intrinsic_vluxei_v_nxv1i8_nxv1i8_nxv1i64: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e8, mf8, ta, ma +; CHECK-NEXT:    vluxei64.v v9, (a0), v8 +; CHECK-NEXT:    vmv1r.v v8, v9 +; CHECK-NEXT:    ret +entry: +  %a = call <vscale x 1 x i8> @llvm.riscv.vluxei.nxv1i8.nxv1i64( +    <vscale x 1 x i8> poison, +    ptr %0, +    <vscale x 1 x i64> %1, +    i64 %2) + +  ret <vscale x 1 x i8> %a +} + +declare <vscale x 1 x i8> @llvm.riscv.vluxei.mask.nxv1i8.nxv1i64( +  <vscale x 1 x i8>, +  ptr, +  <vscale x 1 x i64>, +  <vscale x 1 x i1>, +  i64, +  i64); + +define <vscale x 1 x i8> @intrinsic_vluxei_mask_v_nxv1i8_nxv1i8_nxv1i64(<vscale x 1 x i8> %0, ptr %1, <vscale x 1 x i64> %2, <vscale x 1 x i1> %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vluxei_mask_v_nxv1i8_nxv1i8_nxv1i64: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e8, mf8, ta, mu +; CHECK-NEXT:    vluxei64.v v8, (a0), v9, v0.t +; CHECK-NEXT:    ret +entry: +  %a = call <vscale x 1 x i8> @llvm.riscv.vluxei.mask.nxv1i8.nxv1i64( +    <vscale x 1 x i8> %0, +    ptr %1, +    <vscale x 1 x i64> %2, +    <vscale x 1 x i1> %3, +    i64 %4, i64 1) + +  ret <vscale x 1 x i8> %a +} + +declare <vscale x 2 x i8> @llvm.riscv.vluxei.nxv2i8.nxv2i64( +  <vscale x 2 x i8>, +  ptr, +  <vscale x 2 x i64>, +  i64); + +define <vscale x 2 x i8> @intrinsic_vluxei_v_nxv2i8_nxv2i8_nxv2i64(ptr %0, <vscale x 2 x i64> %1, i64 %2) nounwind { +; CHECK-LABEL: intrinsic_vluxei_v_nxv2i8_nxv2i8_nxv2i64: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e8, mf4, ta, ma +; CHECK-NEXT:    vluxei64.v v10, (a0), v8 +; CHECK-NEXT:    vmv1r.v v8, v10 +; CHECK-NEXT:    ret +entry: +  %a = call <vscale x 2 x i8> @llvm.riscv.vluxei.nxv2i8.nxv2i64( +    <vscale x 2 x i8> poison, +    ptr %0, +    <vscale x 2 x i64> %1, +    i64 %2) + +  ret <vscale x 2 x i8> %a +} + +declare <vscale x 2 x i8> @llvm.riscv.vluxei.mask.nxv2i8.nxv2i64( +  <vscale x 2 x i8>, +  ptr, +  <vscale x 2 x i64>, +  <vscale x 2 x i1>, +  i64, +  i64); + +define <vscale x 2 x i8> @intrinsic_vluxei_mask_v_nxv2i8_nxv2i8_nxv2i64(<vscale x 2 x i8> %0, ptr %1, <vscale x 2 x i64> %2, <vscale x 2 x i1> %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vluxei_mask_v_nxv2i8_nxv2i8_nxv2i64: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e8, mf4, ta, mu +; CHECK-NEXT:    vluxei64.v v8, (a0), v10, v0.t +; CHECK-NEXT:    ret +entry: +  %a = call <vscale x 2 x i8> @llvm.riscv.vluxei.mask.nxv2i8.nxv2i64( +    <vscale x 2 x i8> %0, +    ptr %1, +    <vscale x 2 x i64> %2, +    <vscale x 2 x i1> %3, +    i64 %4, i64 1) + +  ret <vscale x 2 x i8> %a +} + +declare <vscale x 4 x i8> @llvm.riscv.vluxei.nxv4i8.nxv4i64( +  <vscale x 4 x i8>, +  ptr, +  <vscale x 4 x i64>, +  i64); + +define <vscale x 4 x i8> @intrinsic_vluxei_v_nxv4i8_nxv4i8_nxv4i64(ptr %0, <vscale x 4 x i64> %1, i64 %2) nounwind { +; CHECK-LABEL: intrinsic_vluxei_v_nxv4i8_nxv4i8_nxv4i64: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e8, mf2, ta, ma +; CHECK-NEXT:    vluxei64.v v12, (a0), v8 +; CHECK-NEXT:    vmv1r.v v8, v12 +; CHECK-NEXT:    ret +entry: +  %a = call <vscale x 4 x i8> @llvm.riscv.vluxei.nxv4i8.nxv4i64( +    <vscale x 4 x i8> poison, +    ptr %0, +    <vscale x 4 x i64> %1, +    i64 %2) + +  ret <vscale x 4 x i8> %a +} + +declare <vscale x 4 x i8> @llvm.riscv.vluxei.mask.nxv4i8.nxv4i64( +  <vscale x 4 x i8>, +  ptr, +  <vscale x 4 x i64>, +  <vscale x 4 x i1>, +  i64, +  i64); + +define <vscale x 4 x i8> @intrinsic_vluxei_mask_v_nxv4i8_nxv4i8_nxv4i64(<vscale x 4 x i8> %0, ptr %1, <vscale x 4 x i64> %2, <vscale x 4 x i1> %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vluxei_mask_v_nxv4i8_nxv4i8_nxv4i64: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e8, mf2, ta, mu +; CHECK-NEXT:    vluxei64.v v8, (a0), v12, v0.t +; CHECK-NEXT:    ret +entry: +  %a = call <vscale x 4 x i8> @llvm.riscv.vluxei.mask.nxv4i8.nxv4i64( +    <vscale x 4 x i8> %0, +    ptr %1, +    <vscale x 4 x i64> %2, +    <vscale x 4 x i1> %3, +    i64 %4, i64 1) + +  ret <vscale x 4 x i8> %a +} + +declare <vscale x 8 x i8> @llvm.riscv.vluxei.nxv8i8.nxv8i64( +  <vscale x 8 x i8>, +  ptr, +  <vscale x 8 x i64>, +  i64); + +define <vscale x 8 x i8> @intrinsic_vluxei_v_nxv8i8_nxv8i8_nxv8i64(ptr %0, <vscale x 8 x i64> %1, i64 %2) nounwind { +; CHECK-LABEL: intrinsic_vluxei_v_nxv8i8_nxv8i8_nxv8i64: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e8, m1, ta, ma +; CHECK-NEXT:    vluxei64.v v16, (a0), v8 +; CHECK-NEXT:    vmv.v.v v8, v16 +; CHECK-NEXT:    ret +entry: +  %a = call <vscale x 8 x i8> @llvm.riscv.vluxei.nxv8i8.nxv8i64( +    <vscale x 8 x i8> poison, +    ptr %0, +    <vscale x 8 x i64> %1, +    i64 %2) + +  ret <vscale x 8 x i8> %a +} + +declare <vscale x 8 x i8> @llvm.riscv.vluxei.mask.nxv8i8.nxv8i64( +  <vscale x 8 x i8>, +  ptr, +  <vscale x 8 x i64>, +  <vscale x 8 x i1>, +  i64, +  i64); + +define <vscale x 8 x i8> @intrinsic_vluxei_mask_v_nxv8i8_nxv8i8_nxv8i64(<vscale x 8 x i8> %0, ptr %1, <vscale x 8 x i64> %2, <vscale x 8 x i1> %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vluxei_mask_v_nxv8i8_nxv8i8_nxv8i64: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e8, m1, ta, mu +; CHECK-NEXT:    vluxei64.v v8, (a0), v16, v0.t +; CHECK-NEXT:    ret +entry: +  %a = call <vscale x 8 x i8> @llvm.riscv.vluxei.mask.nxv8i8.nxv8i64( +    <vscale x 8 x i8> %0, +    ptr %1, +    <vscale x 8 x i64> %2, +    <vscale x 8 x i1> %3, +    i64 %4, i64 1) + +  ret <vscale x 8 x i8> %a +} + +declare <vscale x 1 x i16> @llvm.riscv.vluxei.nxv1i16.nxv1i64( +  <vscale x 1 x i16>, +  ptr, +  <vscale x 1 x i64>, +  i64); + +define <vscale x 1 x i16> @intrinsic_vluxei_v_nxv1i16_nxv1i16_nxv1i64(ptr %0, <vscale x 1 x i64> %1, i64 %2) nounwind { +; CHECK-LABEL: intrinsic_vluxei_v_nxv1i16_nxv1i16_nxv1i64: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e16, mf4, ta, ma +; CHECK-NEXT:    vluxei64.v v9, (a0), v8 +; CHECK-NEXT:    vmv1r.v v8, v9 +; CHECK-NEXT:    ret +entry: +  %a = call <vscale x 1 x i16> @llvm.riscv.vluxei.nxv1i16.nxv1i64( +    <vscale x 1 x i16> poison, +    ptr %0, +    <vscale x 1 x i64> %1, +    i64 %2) + +  ret <vscale x 1 x i16> %a +} + +declare <vscale x 1 x i16> @llvm.riscv.vluxei.mask.nxv1i16.nxv1i64( +  <vscale x 1 x i16>, +  ptr, +  <vscale x 1 x i64>, +  <vscale x 1 x i1>, +  i64, +  i64); + +define <vscale x 1 x i16> @intrinsic_vluxei_mask_v_nxv1i16_nxv1i16_nxv1i64(<vscale x 1 x i16> %0, ptr %1, <vscale x 1 x i64> %2, <vscale x 1 x i1> %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vluxei_mask_v_nxv1i16_nxv1i16_nxv1i64: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e16, mf4, ta, mu +; CHECK-NEXT:    vluxei64.v v8, (a0), v9, v0.t +; CHECK-NEXT:    ret +entry: +  %a = call <vscale x 1 x i16> @llvm.riscv.vluxei.mask.nxv1i16.nxv1i64( +    <vscale x 1 x i16> %0, +    ptr %1, +    <vscale x 1 x i64> %2, +    <vscale x 1 x i1> %3, +    i64 %4, i64 1) + +  ret <vscale x 1 x i16> %a +} + +declare <vscale x 2 x i16> @llvm.riscv.vluxei.nxv2i16.nxv2i64( +  <vscale x 2 x i16>, +  ptr, +  <vscale x 2 x i64>, +  i64); + +define <vscale x 2 x i16> @intrinsic_vluxei_v_nxv2i16_nxv2i16_nxv2i64(ptr %0, <vscale x 2 x i64> %1, i64 %2) nounwind { +; CHECK-LABEL: intrinsic_vluxei_v_nxv2i16_nxv2i16_nxv2i64: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e16, mf2, ta, ma +; CHECK-NEXT:    vluxei64.v v10, (a0), v8 +; CHECK-NEXT:    vmv1r.v v8, v10 +; CHECK-NEXT:    ret +entry: +  %a = call <vscale x 2 x i16> @llvm.riscv.vluxei.nxv2i16.nxv2i64( +    <vscale x 2 x i16> poison, +    ptr %0, +    <vscale x 2 x i64> %1, +    i64 %2) + +  ret <vscale x 2 x i16> %a +} + +declare <vscale x 2 x i16> @llvm.riscv.vluxei.mask.nxv2i16.nxv2i64( +  <vscale x 2 x i16>, +  ptr, +  <vscale x 2 x i64>, +  <vscale x 2 x i1>, +  i64, +  i64); + +define <vscale x 2 x i16> @intrinsic_vluxei_mask_v_nxv2i16_nxv2i16_nxv2i64(<vscale x 2 x i16> %0, ptr %1, <vscale x 2 x i64> %2, <vscale x 2 x i1> %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vluxei_mask_v_nxv2i16_nxv2i16_nxv2i64: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e16, mf2, ta, mu +; CHECK-NEXT:    vluxei64.v v8, (a0), v10, v0.t +; CHECK-NEXT:    ret +entry: +  %a = call <vscale x 2 x i16> @llvm.riscv.vluxei.mask.nxv2i16.nxv2i64( +    <vscale x 2 x i16> %0, +    ptr %1, +    <vscale x 2 x i64> %2, +    <vscale x 2 x i1> %3, +    i64 %4, i64 1) + +  ret <vscale x 2 x i16> %a +} + +declare <vscale x 4 x i16> @llvm.riscv.vluxei.nxv4i16.nxv4i64( +  <vscale x 4 x i16>, +  ptr, +  <vscale x 4 x i64>, +  i64); + +define <vscale x 4 x i16> @intrinsic_vluxei_v_nxv4i16_nxv4i16_nxv4i64(ptr %0, <vscale x 4 x i64> %1, i64 %2) nounwind { +; CHECK-LABEL: intrinsic_vluxei_v_nxv4i16_nxv4i16_nxv4i64: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e16, m1, ta, ma +; CHECK-NEXT:    vluxei64.v v12, (a0), v8 +; CHECK-NEXT:    vmv.v.v v8, v12 +; CHECK-NEXT:    ret +entry: +  %a = call <vscale x 4 x i16> @llvm.riscv.vluxei.nxv4i16.nxv4i64( +    <vscale x 4 x i16> poison, +    ptr %0, +    <vscale x 4 x i64> %1, +    i64 %2) + +  ret <vscale x 4 x i16> %a +} + +declare <vscale x 4 x i16> @llvm.riscv.vluxei.mask.nxv4i16.nxv4i64( +  <vscale x 4 x i16>, +  ptr, +  <vscale x 4 x i64>, +  <vscale x 4 x i1>, +  i64, +  i64); + +define <vscale x 4 x i16> @intrinsic_vluxei_mask_v_nxv4i16_nxv4i16_nxv4i64(<vscale x 4 x i16> %0, ptr %1, <vscale x 4 x i64> %2, <vscale x 4 x i1> %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vluxei_mask_v_nxv4i16_nxv4i16_nxv4i64: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e16, m1, ta, mu +; CHECK-NEXT:    vluxei64.v v8, (a0), v12, v0.t +; CHECK-NEXT:    ret +entry: +  %a = call <vscale x 4 x i16> @llvm.riscv.vluxei.mask.nxv4i16.nxv4i64( +    <vscale x 4 x i16> %0, +    ptr %1, +    <vscale x 4 x i64> %2, +    <vscale x 4 x i1> %3, +    i64 %4, i64 1) + +  ret <vscale x 4 x i16> %a +} + +declare <vscale x 8 x i16> @llvm.riscv.vluxei.nxv8i16.nxv8i64( +  <vscale x 8 x i16>, +  ptr, +  <vscale x 8 x i64>, +  i64); + +define <vscale x 8 x i16> @intrinsic_vluxei_v_nxv8i16_nxv8i16_nxv8i64(ptr %0, <vscale x 8 x i64> %1, i64 %2) nounwind { +; CHECK-LABEL: intrinsic_vluxei_v_nxv8i16_nxv8i16_nxv8i64: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e16, m2, ta, ma +; CHECK-NEXT:    vluxei64.v v16, (a0), v8 +; CHECK-NEXT:    vmv.v.v v8, v16 +; CHECK-NEXT:    ret +entry: +  %a = call <vscale x 8 x i16> @llvm.riscv.vluxei.nxv8i16.nxv8i64( +    <vscale x 8 x i16> poison, +    ptr %0, +    <vscale x 8 x i64> %1, +    i64 %2) + +  ret <vscale x 8 x i16> %a +} + +declare <vscale x 8 x i16> @llvm.riscv.vluxei.mask.nxv8i16.nxv8i64( +  <vscale x 8 x i16>, +  ptr, +  <vscale x 8 x i64>, +  <vscale x 8 x i1>, +  i64, +  i64); + +define <vscale x 8 x i16> @intrinsic_vluxei_mask_v_nxv8i16_nxv8i16_nxv8i64(<vscale x 8 x i16> %0, ptr %1, <vscale x 8 x i64> %2, <vscale x 8 x i1> %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vluxei_mask_v_nxv8i16_nxv8i16_nxv8i64: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e16, m2, ta, mu +; CHECK-NEXT:    vluxei64.v v8, (a0), v16, v0.t +; CHECK-NEXT:    ret +entry: +  %a = call <vscale x 8 x i16> @llvm.riscv.vluxei.mask.nxv8i16.nxv8i64( +    <vscale x 8 x i16> %0, +    ptr %1, +    <vscale x 8 x i64> %2, +    <vscale x 8 x i1> %3, +    i64 %4, i64 1) + +  ret <vscale x 8 x i16> %a +} + +declare <vscale x 1 x i32> @llvm.riscv.vluxei.nxv1i32.nxv1i64( +  <vscale x 1 x i32>, +  ptr, +  <vscale x 1 x i64>, +  i64); + +define <vscale x 1 x i32> @intrinsic_vluxei_v_nxv1i32_nxv1i32_nxv1i64(ptr %0, <vscale x 1 x i64> %1, i64 %2) nounwind { +; CHECK-LABEL: intrinsic_vluxei_v_nxv1i32_nxv1i32_nxv1i64: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e32, mf2, ta, ma +; CHECK-NEXT:    vluxei64.v v9, (a0), v8 +; CHECK-NEXT:    vmv1r.v v8, v9 +; CHECK-NEXT:    ret +entry: +  %a = call <vscale x 1 x i32> @llvm.riscv.vluxei.nxv1i32.nxv1i64( +    <vscale x 1 x i32> poison, +    ptr %0, +    <vscale x 1 x i64> %1, +    i64 %2) + +  ret <vscale x 1 x i32> %a +} + +declare <vscale x 1 x i32> @llvm.riscv.vluxei.mask.nxv1i32.nxv1i64( +  <vscale x 1 x i32>, +  ptr, +  <vscale x 1 x i64>, +  <vscale x 1 x i1>, +  i64, +  i64); + +define <vscale x 1 x i32> @intrinsic_vluxei_mask_v_nxv1i32_nxv1i32_nxv1i64(<vscale x 1 x i32> %0, ptr %1, <vscale x 1 x i64> %2, <vscale x 1 x i1> %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vluxei_mask_v_nxv1i32_nxv1i32_nxv1i64: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e32, mf2, ta, mu +; CHECK-NEXT:    vluxei64.v v8, (a0), v9, v0.t +; CHECK-NEXT:    ret +entry: +  %a = call <vscale x 1 x i32> @llvm.riscv.vluxei.mask.nxv1i32.nxv1i64( +    <vscale x 1 x i32> %0, +    ptr %1, +    <vscale x 1 x i64> %2, +    <vscale x 1 x i1> %3, +    i64 %4, i64 1) + +  ret <vscale x 1 x i32> %a +} + +declare <vscale x 2 x i32> @llvm.riscv.vluxei.nxv2i32.nxv2i64( +  <vscale x 2 x i32>, +  ptr, +  <vscale x 2 x i64>, +  i64); + +define <vscale x 2 x i32> @intrinsic_vluxei_v_nxv2i32_nxv2i32_nxv2i64(ptr %0, <vscale x 2 x i64> %1, i64 %2) nounwind { +; CHECK-LABEL: intrinsic_vluxei_v_nxv2i32_nxv2i32_nxv2i64: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e32, m1, ta, ma +; CHECK-NEXT:    vluxei64.v v10, (a0), v8 +; CHECK-NEXT:    vmv.v.v v8, v10 +; CHECK-NEXT:    ret +entry: +  %a = call <vscale x 2 x i32> @llvm.riscv.vluxei.nxv2i32.nxv2i64( +    <vscale x 2 x i32> poison, +    ptr %0, +    <vscale x 2 x i64> %1, +    i64 %2) + +  ret <vscale x 2 x i32> %a +} + +declare <vscale x 2 x i32> @llvm.riscv.vluxei.mask.nxv2i32.nxv2i64( +  <vscale x 2 x i32>, +  ptr, +  <vscale x 2 x i64>, +  <vscale x 2 x i1>, +  i64, +  i64); + +define <vscale x 2 x i32> @intrinsic_vluxei_mask_v_nxv2i32_nxv2i32_nxv2i64(<vscale x 2 x i32> %0, ptr %1, <vscale x 2 x i64> %2, <vscale x 2 x i1> %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vluxei_mask_v_nxv2i32_nxv2i32_nxv2i64: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e32, m1, ta, mu +; CHECK-NEXT:    vluxei64.v v8, (a0), v10, v0.t +; CHECK-NEXT:    ret +entry: +  %a = call <vscale x 2 x i32> @llvm.riscv.vluxei.mask.nxv2i32.nxv2i64( +    <vscale x 2 x i32> %0, +    ptr %1, +    <vscale x 2 x i64> %2, +    <vscale x 2 x i1> %3, +    i64 %4, i64 1) + +  ret <vscale x 2 x i32> %a +} + +declare <vscale x 4 x i32> @llvm.riscv.vluxei.nxv4i32.nxv4i64( +  <vscale x 4 x i32>, +  ptr, +  <vscale x 4 x i64>, +  i64); + +define <vscale x 4 x i32> @intrinsic_vluxei_v_nxv4i32_nxv4i32_nxv4i64(ptr %0, <vscale x 4 x i64> %1, i64 %2) nounwind { +; CHECK-LABEL: intrinsic_vluxei_v_nxv4i32_nxv4i32_nxv4i64: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e32, m2, ta, ma +; CHECK-NEXT:    vluxei64.v v12, (a0), v8 +; CHECK-NEXT:    vmv.v.v v8, v12 +; CHECK-NEXT:    ret +entry: +  %a = call <vscale x 4 x i32> @llvm.riscv.vluxei.nxv4i32.nxv4i64( +    <vscale x 4 x i32> poison, +    ptr %0, +    <vscale x 4 x i64> %1, +    i64 %2) + +  ret <vscale x 4 x i32> %a +} + +declare <vscale x 4 x i32> @llvm.riscv.vluxei.mask.nxv4i32.nxv4i64( +  <vscale x 4 x i32>, +  ptr, +  <vscale x 4 x i64>, +  <vscale x 4 x i1>, +  i64, +  i64); + +define <vscale x 4 x i32> @intrinsic_vluxei_mask_v_nxv4i32_nxv4i32_nxv4i64(<vscale x 4 x i32> %0, ptr %1, <vscale x 4 x i64> %2, <vscale x 4 x i1> %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vluxei_mask_v_nxv4i32_nxv4i32_nxv4i64: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e32, m2, ta, mu +; CHECK-NEXT:    vluxei64.v v8, (a0), v12, v0.t +; CHECK-NEXT:    ret +entry: +  %a = call <vscale x 4 x i32> @llvm.riscv.vluxei.mask.nxv4i32.nxv4i64( +    <vscale x 4 x i32> %0, +    ptr %1, +    <vscale x 4 x i64> %2, +    <vscale x 4 x i1> %3, +    i64 %4, i64 1) + +  ret <vscale x 4 x i32> %a +} + +declare <vscale x 8 x i32> @llvm.riscv.vluxei.nxv8i32.nxv8i64( +  <vscale x 8 x i32>, +  ptr, +  <vscale x 8 x i64>, +  i64); + +define <vscale x 8 x i32> @intrinsic_vluxei_v_nxv8i32_nxv8i32_nxv8i64(ptr %0, <vscale x 8 x i64> %1, i64 %2) nounwind { +; CHECK-LABEL: intrinsic_vluxei_v_nxv8i32_nxv8i32_nxv8i64: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e32, m4, ta, ma +; CHECK-NEXT:    vluxei64.v v16, (a0), v8 +; CHECK-NEXT:    vmv.v.v v8, v16 +; CHECK-NEXT:    ret +entry: +  %a = call <vscale x 8 x i32> @llvm.riscv.vluxei.nxv8i32.nxv8i64( +    <vscale x 8 x i32> poison, +    ptr %0, +    <vscale x 8 x i64> %1, +    i64 %2) + +  ret <vscale x 8 x i32> %a +} + +declare <vscale x 8 x i32> @llvm.riscv.vluxei.mask.nxv8i32.nxv8i64( +  <vscale x 8 x i32>, +  ptr, +  <vscale x 8 x i64>, +  <vscale x 8 x i1>, +  i64, +  i64); + +define <vscale x 8 x i32> @intrinsic_vluxei_mask_v_nxv8i32_nxv8i32_nxv8i64(<vscale x 8 x i32> %0, ptr %1, <vscale x 8 x i64> %2, <vscale x 8 x i1> %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vluxei_mask_v_nxv8i32_nxv8i32_nxv8i64: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e32, m4, ta, mu +; CHECK-NEXT:    vluxei64.v v8, (a0), v16, v0.t +; CHECK-NEXT:    ret +entry: +  %a = call <vscale x 8 x i32> @llvm.riscv.vluxei.mask.nxv8i32.nxv8i64( +    <vscale x 8 x i32> %0, +    ptr %1, +    <vscale x 8 x i64> %2, +    <vscale x 8 x i1> %3, +    i64 %4, i64 1) + +  ret <vscale x 8 x i32> %a +} + +declare <vscale x 1 x i64> @llvm.riscv.vluxei.nxv1i64.nxv1i64( +  <vscale x 1 x i64>, +  ptr, +  <vscale x 1 x i64>, +  i64); + +define <vscale x 1 x i64> @intrinsic_vluxei_v_nxv1i64_nxv1i64_nxv1i64(ptr %0, <vscale x 1 x i64> %1, i64 %2) nounwind { +; CHECK-LABEL: intrinsic_vluxei_v_nxv1i64_nxv1i64_nxv1i64: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e64, m1, ta, ma +; CHECK-NEXT:    vluxei64.v v8, (a0), v8 +; CHECK-NEXT:    ret +entry: +  %a = call <vscale x 1 x i64> @llvm.riscv.vluxei.nxv1i64.nxv1i64( +    <vscale x 1 x i64> poison, +    ptr %0, +    <vscale x 1 x i64> %1, +    i64 %2) + +  ret <vscale x 1 x i64> %a +} + +declare <vscale x 1 x i64> @llvm.riscv.vluxei.mask.nxv1i64.nxv1i64( +  <vscale x 1 x i64>, +  ptr, +  <vscale x 1 x i64>, +  <vscale x 1 x i1>, +  i64, +  i64); + +define <vscale x 1 x i64> @intrinsic_vluxei_mask_v_nxv1i64_nxv1i64_nxv1i64(<vscale x 1 x i64> %0, ptr %1, <vscale x 1 x i64> %2, <vscale x 1 x i1> %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vluxei_mask_v_nxv1i64_nxv1i64_nxv1i64: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e64, m1, ta, mu +; CHECK-NEXT:    vluxei64.v v8, (a0), v9, v0.t +; CHECK-NEXT:    ret +entry: +  %a = call <vscale x 1 x i64> @llvm.riscv.vluxei.mask.nxv1i64.nxv1i64( +    <vscale x 1 x i64> %0, +    ptr %1, +    <vscale x 1 x i64> %2, +    <vscale x 1 x i1> %3, +    i64 %4, i64 1) + +  ret <vscale x 1 x i64> %a +} + +declare <vscale x 2 x i64> @llvm.riscv.vluxei.nxv2i64.nxv2i64( +  <vscale x 2 x i64>, +  ptr, +  <vscale x 2 x i64>, +  i64); + +define <vscale x 2 x i64> @intrinsic_vluxei_v_nxv2i64_nxv2i64_nxv2i64(ptr %0, <vscale x 2 x i64> %1, i64 %2) nounwind { +; CHECK-LABEL: intrinsic_vluxei_v_nxv2i64_nxv2i64_nxv2i64: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e64, m2, ta, ma +; CHECK-NEXT:    vluxei64.v v8, (a0), v8 +; CHECK-NEXT:    ret +entry: +  %a = call <vscale x 2 x i64> @llvm.riscv.vluxei.nxv2i64.nxv2i64( +    <vscale x 2 x i64> poison, +    ptr %0, +    <vscale x 2 x i64> %1, +    i64 %2) + +  ret <vscale x 2 x i64> %a +} + +declare <vscale x 2 x i64> @llvm.riscv.vluxei.mask.nxv2i64.nxv2i64( +  <vscale x 2 x i64>, +  ptr, +  <vscale x 2 x i64>, +  <vscale x 2 x i1>, +  i64, +  i64); + +define <vscale x 2 x i64> @intrinsic_vluxei_mask_v_nxv2i64_nxv2i64_nxv2i64(<vscale x 2 x i64> %0, ptr %1, <vscale x 2 x i64> %2, <vscale x 2 x i1> %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vluxei_mask_v_nxv2i64_nxv2i64_nxv2i64: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e64, m2, ta, mu +; CHECK-NEXT:    vluxei64.v v8, (a0), v10, v0.t +; CHECK-NEXT:    ret +entry: +  %a = call <vscale x 2 x i64> @llvm.riscv.vluxei.mask.nxv2i64.nxv2i64( +    <vscale x 2 x i64> %0, +    ptr %1, +    <vscale x 2 x i64> %2, +    <vscale x 2 x i1> %3, +    i64 %4, i64 1) + +  ret <vscale x 2 x i64> %a +} + +declare <vscale x 4 x i64> @llvm.riscv.vluxei.nxv4i64.nxv4i64( +  <vscale x 4 x i64>, +  ptr, +  <vscale x 4 x i64>, +  i64); + +define <vscale x 4 x i64> @intrinsic_vluxei_v_nxv4i64_nxv4i64_nxv4i64(ptr %0, <vscale x 4 x i64> %1, i64 %2) nounwind { +; CHECK-LABEL: intrinsic_vluxei_v_nxv4i64_nxv4i64_nxv4i64: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e64, m4, ta, ma +; CHECK-NEXT:    vluxei64.v v8, (a0), v8 +; CHECK-NEXT:    ret +entry: +  %a = call <vscale x 4 x i64> @llvm.riscv.vluxei.nxv4i64.nxv4i64( +    <vscale x 4 x i64> poison, +    ptr %0, +    <vscale x 4 x i64> %1, +    i64 %2) + +  ret <vscale x 4 x i64> %a +} + +declare <vscale x 4 x i64> @llvm.riscv.vluxei.mask.nxv4i64.nxv4i64( +  <vscale x 4 x i64>, +  ptr, +  <vscale x 4 x i64>, +  <vscale x 4 x i1>, +  i64, +  i64); + +define <vscale x 4 x i64> @intrinsic_vluxei_mask_v_nxv4i64_nxv4i64_nxv4i64(<vscale x 4 x i64> %0, ptr %1, <vscale x 4 x i64> %2, <vscale x 4 x i1> %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vluxei_mask_v_nxv4i64_nxv4i64_nxv4i64: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e64, m4, ta, mu +; CHECK-NEXT:    vluxei64.v v8, (a0), v12, v0.t +; CHECK-NEXT:    ret +entry: +  %a = call <vscale x 4 x i64> @llvm.riscv.vluxei.mask.nxv4i64.nxv4i64( +    <vscale x 4 x i64> %0, +    ptr %1, +    <vscale x 4 x i64> %2, +    <vscale x 4 x i1> %3, +    i64 %4, i64 1) + +  ret <vscale x 4 x i64> %a +} + +declare <vscale x 8 x i64> @llvm.riscv.vluxei.nxv8i64.nxv8i64( +  <vscale x 8 x i64>, +  ptr, +  <vscale x 8 x i64>, +  i64); + +define <vscale x 8 x i64> @intrinsic_vluxei_v_nxv8i64_nxv8i64_nxv8i64(ptr %0, <vscale x 8 x i64> %1, i64 %2) nounwind { +; CHECK-LABEL: intrinsic_vluxei_v_nxv8i64_nxv8i64_nxv8i64: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e64, m8, ta, ma +; CHECK-NEXT:    vluxei64.v v8, (a0), v8 +; CHECK-NEXT:    ret +entry: +  %a = call <vscale x 8 x i64> @llvm.riscv.vluxei.nxv8i64.nxv8i64( +    <vscale x 8 x i64> poison, +    ptr %0, +    <vscale x 8 x i64> %1, +    i64 %2) + +  ret <vscale x 8 x i64> %a +} + +declare <vscale x 8 x i64> @llvm.riscv.vluxei.mask.nxv8i64.nxv8i64( +  <vscale x 8 x i64>, +  ptr, +  <vscale x 8 x i64>, +  <vscale x 8 x i1>, +  i64, +  i64); + +define <vscale x 8 x i64> @intrinsic_vluxei_mask_v_nxv8i64_nxv8i64_nxv8i64(<vscale x 8 x i64> %0, ptr %1, <vscale x 8 x i64> %2, <vscale x 8 x i1> %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vluxei_mask_v_nxv8i64_nxv8i64_nxv8i64: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e64, m8, ta, mu +; CHECK-NEXT:    vluxei64.v v8, (a0), v16, v0.t +; CHECK-NEXT:    ret +entry: +  %a = call <vscale x 8 x i64> @llvm.riscv.vluxei.mask.nxv8i64.nxv8i64( +    <vscale x 8 x i64> %0, +    ptr %1, +    <vscale x 8 x i64> %2, +    <vscale x 8 x i1> %3, +    i64 %4, i64 1) + +  ret <vscale x 8 x i64> %a +} + +declare <vscale x 1 x half> @llvm.riscv.vluxei.nxv1f16.nxv1i64( +  <vscale x 1 x half>, +  ptr, +  <vscale x 1 x i64>, +  i64); + +define <vscale x 1 x half> @intrinsic_vluxei_v_nxv1f16_nxv1f16_nxv1i64(ptr %0, <vscale x 1 x i64> %1, i64 %2) nounwind { +; CHECK-LABEL: intrinsic_vluxei_v_nxv1f16_nxv1f16_nxv1i64: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e16, mf4, ta, ma +; CHECK-NEXT:    vluxei64.v v9, (a0), v8 +; CHECK-NEXT:    vmv1r.v v8, v9 +; CHECK-NEXT:    ret +entry: +  %a = call <vscale x 1 x half> @llvm.riscv.vluxei.nxv1f16.nxv1i64( +    <vscale x 1 x half> poison, +    ptr %0, +    <vscale x 1 x i64> %1, +    i64 %2) + +  ret <vscale x 1 x half> %a +} + +declare <vscale x 1 x half> @llvm.riscv.vluxei.mask.nxv1f16.nxv1i64( +  <vscale x 1 x half>, +  ptr, +  <vscale x 1 x i64>, +  <vscale x 1 x i1>, +  i64, +  i64); + +define <vscale x 1 x half> @intrinsic_vluxei_mask_v_nxv1f16_nxv1f16_nxv1i64(<vscale x 1 x half> %0, ptr %1, <vscale x 1 x i64> %2, <vscale x 1 x i1> %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vluxei_mask_v_nxv1f16_nxv1f16_nxv1i64: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e16, mf4, ta, mu +; CHECK-NEXT:    vluxei64.v v8, (a0), v9, v0.t +; CHECK-NEXT:    ret +entry: +  %a = call <vscale x 1 x half> @llvm.riscv.vluxei.mask.nxv1f16.nxv1i64( +    <vscale x 1 x half> %0, +    ptr %1, +    <vscale x 1 x i64> %2, +    <vscale x 1 x i1> %3, +    i64 %4, i64 1) + +  ret <vscale x 1 x half> %a +} + +declare <vscale x 2 x half> @llvm.riscv.vluxei.nxv2f16.nxv2i64( +  <vscale x 2 x half>, +  ptr, +  <vscale x 2 x i64>, +  i64); + +define <vscale x 2 x half> @intrinsic_vluxei_v_nxv2f16_nxv2f16_nxv2i64(ptr %0, <vscale x 2 x i64> %1, i64 %2) nounwind { +; CHECK-LABEL: intrinsic_vluxei_v_nxv2f16_nxv2f16_nxv2i64: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e16, mf2, ta, ma +; CHECK-NEXT:    vluxei64.v v10, (a0), v8 +; CHECK-NEXT:    vmv1r.v v8, v10 +; CHECK-NEXT:    ret +entry: +  %a = call <vscale x 2 x half> @llvm.riscv.vluxei.nxv2f16.nxv2i64( +    <vscale x 2 x half> poison, +    ptr %0, +    <vscale x 2 x i64> %1, +    i64 %2) + +  ret <vscale x 2 x half> %a +} + +declare <vscale x 2 x half> @llvm.riscv.vluxei.mask.nxv2f16.nxv2i64( +  <vscale x 2 x half>, +  ptr, +  <vscale x 2 x i64>, +  <vscale x 2 x i1>, +  i64, +  i64); + +define <vscale x 2 x half> @intrinsic_vluxei_mask_v_nxv2f16_nxv2f16_nxv2i64(<vscale x 2 x half> %0, ptr %1, <vscale x 2 x i64> %2, <vscale x 2 x i1> %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vluxei_mask_v_nxv2f16_nxv2f16_nxv2i64: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e16, mf2, ta, mu +; CHECK-NEXT:    vluxei64.v v8, (a0), v10, v0.t +; CHECK-NEXT:    ret +entry: +  %a = call <vscale x 2 x half> @llvm.riscv.vluxei.mask.nxv2f16.nxv2i64( +    <vscale x 2 x half> %0, +    ptr %1, +    <vscale x 2 x i64> %2, +    <vscale x 2 x i1> %3, +    i64 %4, i64 1) + +  ret <vscale x 2 x half> %a +} + +declare <vscale x 4 x half> @llvm.riscv.vluxei.nxv4f16.nxv4i64( +  <vscale x 4 x half>, +  ptr, +  <vscale x 4 x i64>, +  i64); + +define <vscale x 4 x half> @intrinsic_vluxei_v_nxv4f16_nxv4f16_nxv4i64(ptr %0, <vscale x 4 x i64> %1, i64 %2) nounwind { +; CHECK-LABEL: intrinsic_vluxei_v_nxv4f16_nxv4f16_nxv4i64: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e16, m1, ta, ma +; CHECK-NEXT:    vluxei64.v v12, (a0), v8 +; CHECK-NEXT:    vmv.v.v v8, v12 +; CHECK-NEXT:    ret +entry: +  %a = call <vscale x 4 x half> @llvm.riscv.vluxei.nxv4f16.nxv4i64( +    <vscale x 4 x half> poison, +    ptr %0, +    <vscale x 4 x i64> %1, +    i64 %2) + +  ret <vscale x 4 x half> %a +} + +declare <vscale x 4 x half> @llvm.riscv.vluxei.mask.nxv4f16.nxv4i64( +  <vscale x 4 x half>, +  ptr, +  <vscale x 4 x i64>, +  <vscale x 4 x i1>, +  i64, +  i64); + +define <vscale x 4 x half> @intrinsic_vluxei_mask_v_nxv4f16_nxv4f16_nxv4i64(<vscale x 4 x half> %0, ptr %1, <vscale x 4 x i64> %2, <vscale x 4 x i1> %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vluxei_mask_v_nxv4f16_nxv4f16_nxv4i64: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e16, m1, ta, mu +; CHECK-NEXT:    vluxei64.v v8, (a0), v12, v0.t +; CHECK-NEXT:    ret +entry: +  %a = call <vscale x 4 x half> @llvm.riscv.vluxei.mask.nxv4f16.nxv4i64( +    <vscale x 4 x half> %0, +    ptr %1, +    <vscale x 4 x i64> %2, +    <vscale x 4 x i1> %3, +    i64 %4, i64 1) + +  ret <vscale x 4 x half> %a +} + +declare <vscale x 8 x half> @llvm.riscv.vluxei.nxv8f16.nxv8i64( +  <vscale x 8 x half>, +  ptr, +  <vscale x 8 x i64>, +  i64); + +define <vscale x 8 x half> @intrinsic_vluxei_v_nxv8f16_nxv8f16_nxv8i64(ptr %0, <vscale x 8 x i64> %1, i64 %2) nounwind { +; CHECK-LABEL: intrinsic_vluxei_v_nxv8f16_nxv8f16_nxv8i64: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e16, m2, ta, ma +; CHECK-NEXT:    vluxei64.v v16, (a0), v8 +; CHECK-NEXT:    vmv.v.v v8, v16 +; CHECK-NEXT:    ret +entry: +  %a = call <vscale x 8 x half> @llvm.riscv.vluxei.nxv8f16.nxv8i64( +    <vscale x 8 x half> poison, +    ptr %0, +    <vscale x 8 x i64> %1, +    i64 %2) + +  ret <vscale x 8 x half> %a +} + +declare <vscale x 8 x half> @llvm.riscv.vluxei.mask.nxv8f16.nxv8i64( +  <vscale x 8 x half>, +  ptr, +  <vscale x 8 x i64>, +  <vscale x 8 x i1>, +  i64, +  i64); + +define <vscale x 8 x half> @intrinsic_vluxei_mask_v_nxv8f16_nxv8f16_nxv8i64(<vscale x 8 x half> %0, ptr %1, <vscale x 8 x i64> %2, <vscale x 8 x i1> %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vluxei_mask_v_nxv8f16_nxv8f16_nxv8i64: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e16, m2, ta, mu +; CHECK-NEXT:    vluxei64.v v8, (a0), v16, v0.t +; CHECK-NEXT:    ret +entry: +  %a = call <vscale x 8 x half> @llvm.riscv.vluxei.mask.nxv8f16.nxv8i64( +    <vscale x 8 x half> %0, +    ptr %1, +    <vscale x 8 x i64> %2, +    <vscale x 8 x i1> %3, +    i64 %4, i64 1) + +  ret <vscale x 8 x half> %a +} + +declare <vscale x 1 x float> @llvm.riscv.vluxei.nxv1f32.nxv1i64( +  <vscale x 1 x float>, +  ptr, +  <vscale x 1 x i64>, +  i64); + +define <vscale x 1 x float> @intrinsic_vluxei_v_nxv1f32_nxv1f32_nxv1i64(ptr %0, <vscale x 1 x i64> %1, i64 %2) nounwind { +; CHECK-LABEL: intrinsic_vluxei_v_nxv1f32_nxv1f32_nxv1i64: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e32, mf2, ta, ma +; CHECK-NEXT:    vluxei64.v v9, (a0), v8 +; CHECK-NEXT:    vmv1r.v v8, v9 +; CHECK-NEXT:    ret +entry: +  %a = call <vscale x 1 x float> @llvm.riscv.vluxei.nxv1f32.nxv1i64( +    <vscale x 1 x float> poison, +    ptr %0, +    <vscale x 1 x i64> %1, +    i64 %2) + +  ret <vscale x 1 x float> %a +} + +declare <vscale x 1 x float> @llvm.riscv.vluxei.mask.nxv1f32.nxv1i64( +  <vscale x 1 x float>, +  ptr, +  <vscale x 1 x i64>, +  <vscale x 1 x i1>, +  i64, +  i64); + +define <vscale x 1 x float> @intrinsic_vluxei_mask_v_nxv1f32_nxv1f32_nxv1i64(<vscale x 1 x float> %0, ptr %1, <vscale x 1 x i64> %2, <vscale x 1 x i1> %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vluxei_mask_v_nxv1f32_nxv1f32_nxv1i64: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e32, mf2, ta, mu +; CHECK-NEXT:    vluxei64.v v8, (a0), v9, v0.t +; CHECK-NEXT:    ret +entry: +  %a = call <vscale x 1 x float> @llvm.riscv.vluxei.mask.nxv1f32.nxv1i64( +    <vscale x 1 x float> %0, +    ptr %1, +    <vscale x 1 x i64> %2, +    <vscale x 1 x i1> %3, +    i64 %4, i64 1) + +  ret <vscale x 1 x float> %a +} + +declare <vscale x 2 x float> @llvm.riscv.vluxei.nxv2f32.nxv2i64( +  <vscale x 2 x float>, +  ptr, +  <vscale x 2 x i64>, +  i64); + +define <vscale x 2 x float> @intrinsic_vluxei_v_nxv2f32_nxv2f32_nxv2i64(ptr %0, <vscale x 2 x i64> %1, i64 %2) nounwind { +; CHECK-LABEL: intrinsic_vluxei_v_nxv2f32_nxv2f32_nxv2i64: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e32, m1, ta, ma +; CHECK-NEXT:    vluxei64.v v10, (a0), v8 +; CHECK-NEXT:    vmv.v.v v8, v10 +; CHECK-NEXT:    ret +entry: +  %a = call <vscale x 2 x float> @llvm.riscv.vluxei.nxv2f32.nxv2i64( +    <vscale x 2 x float> poison, +    ptr %0, +    <vscale x 2 x i64> %1, +    i64 %2) + +  ret <vscale x 2 x float> %a +} + +declare <vscale x 2 x float> @llvm.riscv.vluxei.mask.nxv2f32.nxv2i64( +  <vscale x 2 x float>, +  ptr, +  <vscale x 2 x i64>, +  <vscale x 2 x i1>, +  i64, +  i64); + +define <vscale x 2 x float> @intrinsic_vluxei_mask_v_nxv2f32_nxv2f32_nxv2i64(<vscale x 2 x float> %0, ptr %1, <vscale x 2 x i64> %2, <vscale x 2 x i1> %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vluxei_mask_v_nxv2f32_nxv2f32_nxv2i64: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e32, m1, ta, mu +; CHECK-NEXT:    vluxei64.v v8, (a0), v10, v0.t +; CHECK-NEXT:    ret +entry: +  %a = call <vscale x 2 x float> @llvm.riscv.vluxei.mask.nxv2f32.nxv2i64( +    <vscale x 2 x float> %0, +    ptr %1, +    <vscale x 2 x i64> %2, +    <vscale x 2 x i1> %3, +    i64 %4, i64 1) + +  ret <vscale x 2 x float> %a +} + +declare <vscale x 4 x float> @llvm.riscv.vluxei.nxv4f32.nxv4i64( +  <vscale x 4 x float>, +  ptr, +  <vscale x 4 x i64>, +  i64); + +define <vscale x 4 x float> @intrinsic_vluxei_v_nxv4f32_nxv4f32_nxv4i64(ptr %0, <vscale x 4 x i64> %1, i64 %2) nounwind { +; CHECK-LABEL: intrinsic_vluxei_v_nxv4f32_nxv4f32_nxv4i64: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e32, m2, ta, ma +; CHECK-NEXT:    vluxei64.v v12, (a0), v8 +; CHECK-NEXT:    vmv.v.v v8, v12 +; CHECK-NEXT:    ret +entry: +  %a = call <vscale x 4 x float> @llvm.riscv.vluxei.nxv4f32.nxv4i64( +    <vscale x 4 x float> poison, +    ptr %0, +    <vscale x 4 x i64> %1, +    i64 %2) + +  ret <vscale x 4 x float> %a +} + +declare <vscale x 4 x float> @llvm.riscv.vluxei.mask.nxv4f32.nxv4i64( +  <vscale x 4 x float>, +  ptr, +  <vscale x 4 x i64>, +  <vscale x 4 x i1>, +  i64, +  i64); + +define <vscale x 4 x float> @intrinsic_vluxei_mask_v_nxv4f32_nxv4f32_nxv4i64(<vscale x 4 x float> %0, ptr %1, <vscale x 4 x i64> %2, <vscale x 4 x i1> %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vluxei_mask_v_nxv4f32_nxv4f32_nxv4i64: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e32, m2, ta, mu +; CHECK-NEXT:    vluxei64.v v8, (a0), v12, v0.t +; CHECK-NEXT:    ret +entry: +  %a = call <vscale x 4 x float> @llvm.riscv.vluxei.mask.nxv4f32.nxv4i64( +    <vscale x 4 x float> %0, +    ptr %1, +    <vscale x 4 x i64> %2, +    <vscale x 4 x i1> %3, +    i64 %4, i64 1) + +  ret <vscale x 4 x float> %a +} + +declare <vscale x 8 x float> @llvm.riscv.vluxei.nxv8f32.nxv8i64( +  <vscale x 8 x float>, +  ptr, +  <vscale x 8 x i64>, +  i64); + +define <vscale x 8 x float> @intrinsic_vluxei_v_nxv8f32_nxv8f32_nxv8i64(ptr %0, <vscale x 8 x i64> %1, i64 %2) nounwind { +; CHECK-LABEL: intrinsic_vluxei_v_nxv8f32_nxv8f32_nxv8i64: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e32, m4, ta, ma +; CHECK-NEXT:    vluxei64.v v16, (a0), v8 +; CHECK-NEXT:    vmv.v.v v8, v16 +; CHECK-NEXT:    ret +entry: +  %a = call <vscale x 8 x float> @llvm.riscv.vluxei.nxv8f32.nxv8i64( +    <vscale x 8 x float> poison, +    ptr %0, +    <vscale x 8 x i64> %1, +    i64 %2) + +  ret <vscale x 8 x float> %a +} + +declare <vscale x 8 x float> @llvm.riscv.vluxei.mask.nxv8f32.nxv8i64( +  <vscale x 8 x float>, +  ptr, +  <vscale x 8 x i64>, +  <vscale x 8 x i1>, +  i64, +  i64); + +define <vscale x 8 x float> @intrinsic_vluxei_mask_v_nxv8f32_nxv8f32_nxv8i64(<vscale x 8 x float> %0, ptr %1, <vscale x 8 x i64> %2, <vscale x 8 x i1> %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vluxei_mask_v_nxv8f32_nxv8f32_nxv8i64: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e32, m4, ta, mu +; CHECK-NEXT:    vluxei64.v v8, (a0), v16, v0.t +; CHECK-NEXT:    ret +entry: +  %a = call <vscale x 8 x float> @llvm.riscv.vluxei.mask.nxv8f32.nxv8i64( +    <vscale x 8 x float> %0, +    ptr %1, +    <vscale x 8 x i64> %2, +    <vscale x 8 x i1> %3, +    i64 %4, i64 1) + +  ret <vscale x 8 x float> %a +} + +declare <vscale x 1 x double> @llvm.riscv.vluxei.nxv1f64.nxv1i64( +  <vscale x 1 x double>, +  ptr, +  <vscale x 1 x i64>, +  i64); + +define <vscale x 1 x double> @intrinsic_vluxei_v_nxv1f64_nxv1f64_nxv1i64(ptr %0, <vscale x 1 x i64> %1, i64 %2) nounwind { +; CHECK-LABEL: intrinsic_vluxei_v_nxv1f64_nxv1f64_nxv1i64: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e64, m1, ta, ma +; CHECK-NEXT:    vluxei64.v v8, (a0), v8 +; CHECK-NEXT:    ret +entry: +  %a = call <vscale x 1 x double> @llvm.riscv.vluxei.nxv1f64.nxv1i64( +    <vscale x 1 x double> poison, +    ptr %0, +    <vscale x 1 x i64> %1, +    i64 %2) + +  ret <vscale x 1 x double> %a +} + +declare <vscale x 1 x double> @llvm.riscv.vluxei.mask.nxv1f64.nxv1i64( +  <vscale x 1 x double>, +  ptr, +  <vscale x 1 x i64>, +  <vscale x 1 x i1>, +  i64, +  i64); + +define <vscale x 1 x double> @intrinsic_vluxei_mask_v_nxv1f64_nxv1f64_nxv1i64(<vscale x 1 x double> %0, ptr %1, <vscale x 1 x i64> %2, <vscale x 1 x i1> %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vluxei_mask_v_nxv1f64_nxv1f64_nxv1i64: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e64, m1, ta, mu +; CHECK-NEXT:    vluxei64.v v8, (a0), v9, v0.t +; CHECK-NEXT:    ret +entry: +  %a = call <vscale x 1 x double> @llvm.riscv.vluxei.mask.nxv1f64.nxv1i64( +    <vscale x 1 x double> %0, +    ptr %1, +    <vscale x 1 x i64> %2, +    <vscale x 1 x i1> %3, +    i64 %4, i64 1) + +  ret <vscale x 1 x double> %a +} + +declare <vscale x 2 x double> @llvm.riscv.vluxei.nxv2f64.nxv2i64( +  <vscale x 2 x double>, +  ptr, +  <vscale x 2 x i64>, +  i64); + +define <vscale x 2 x double> @intrinsic_vluxei_v_nxv2f64_nxv2f64_nxv2i64(ptr %0, <vscale x 2 x i64> %1, i64 %2) nounwind { +; CHECK-LABEL: intrinsic_vluxei_v_nxv2f64_nxv2f64_nxv2i64: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e64, m2, ta, ma +; CHECK-NEXT:    vluxei64.v v8, (a0), v8 +; CHECK-NEXT:    ret +entry: +  %a = call <vscale x 2 x double> @llvm.riscv.vluxei.nxv2f64.nxv2i64( +    <vscale x 2 x double> poison, +    ptr %0, +    <vscale x 2 x i64> %1, +    i64 %2) + +  ret <vscale x 2 x double> %a +} + +declare <vscale x 2 x double> @llvm.riscv.vluxei.mask.nxv2f64.nxv2i64( +  <vscale x 2 x double>, +  ptr, +  <vscale x 2 x i64>, +  <vscale x 2 x i1>, +  i64, +  i64); + +define <vscale x 2 x double> @intrinsic_vluxei_mask_v_nxv2f64_nxv2f64_nxv2i64(<vscale x 2 x double> %0, ptr %1, <vscale x 2 x i64> %2, <vscale x 2 x i1> %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vluxei_mask_v_nxv2f64_nxv2f64_nxv2i64: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e64, m2, ta, mu +; CHECK-NEXT:    vluxei64.v v8, (a0), v10, v0.t +; CHECK-NEXT:    ret +entry: +  %a = call <vscale x 2 x double> @llvm.riscv.vluxei.mask.nxv2f64.nxv2i64( +    <vscale x 2 x double> %0, +    ptr %1, +    <vscale x 2 x i64> %2, +    <vscale x 2 x i1> %3, +    i64 %4, i64 1) + +  ret <vscale x 2 x double> %a +} + +declare <vscale x 4 x double> @llvm.riscv.vluxei.nxv4f64.nxv4i64( +  <vscale x 4 x double>, +  ptr, +  <vscale x 4 x i64>, +  i64); + +define <vscale x 4 x double> @intrinsic_vluxei_v_nxv4f64_nxv4f64_nxv4i64(ptr %0, <vscale x 4 x i64> %1, i64 %2) nounwind { +; CHECK-LABEL: intrinsic_vluxei_v_nxv4f64_nxv4f64_nxv4i64: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e64, m4, ta, ma +; CHECK-NEXT:    vluxei64.v v8, (a0), v8 +; CHECK-NEXT:    ret +entry: +  %a = call <vscale x 4 x double> @llvm.riscv.vluxei.nxv4f64.nxv4i64( +    <vscale x 4 x double> poison, +    ptr %0, +    <vscale x 4 x i64> %1, +    i64 %2) + +  ret <vscale x 4 x double> %a +} + +declare <vscale x 4 x double> @llvm.riscv.vluxei.mask.nxv4f64.nxv4i64( +  <vscale x 4 x double>, +  ptr, +  <vscale x 4 x i64>, +  <vscale x 4 x i1>, +  i64, +  i64); + +define <vscale x 4 x double> @intrinsic_vluxei_mask_v_nxv4f64_nxv4f64_nxv4i64(<vscale x 4 x double> %0, ptr %1, <vscale x 4 x i64> %2, <vscale x 4 x i1> %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vluxei_mask_v_nxv4f64_nxv4f64_nxv4i64: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e64, m4, ta, mu +; CHECK-NEXT:    vluxei64.v v8, (a0), v12, v0.t +; CHECK-NEXT:    ret +entry: +  %a = call <vscale x 4 x double> @llvm.riscv.vluxei.mask.nxv4f64.nxv4i64( +    <vscale x 4 x double> %0, +    ptr %1, +    <vscale x 4 x i64> %2, +    <vscale x 4 x i1> %3, +    i64 %4, i64 1) + +  ret <vscale x 4 x double> %a +} + +declare <vscale x 8 x double> @llvm.riscv.vluxei.nxv8f64.nxv8i64( +  <vscale x 8 x double>, +  ptr, +  <vscale x 8 x i64>, +  i64); + +define <vscale x 8 x double> @intrinsic_vluxei_v_nxv8f64_nxv8f64_nxv8i64(ptr %0, <vscale x 8 x i64> %1, i64 %2) nounwind { +; CHECK-LABEL: intrinsic_vluxei_v_nxv8f64_nxv8f64_nxv8i64: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e64, m8, ta, ma +; CHECK-NEXT:    vluxei64.v v8, (a0), v8 +; CHECK-NEXT:    ret +entry: +  %a = call <vscale x 8 x double> @llvm.riscv.vluxei.nxv8f64.nxv8i64( +    <vscale x 8 x double> poison, +    ptr %0, +    <vscale x 8 x i64> %1, +    i64 %2) + +  ret <vscale x 8 x double> %a +} + +declare <vscale x 8 x double> @llvm.riscv.vluxei.mask.nxv8f64.nxv8i64( +  <vscale x 8 x double>, +  ptr, +  <vscale x 8 x i64>, +  <vscale x 8 x i1>, +  i64, +  i64); + +define <vscale x 8 x double> @intrinsic_vluxei_mask_v_nxv8f64_nxv8f64_nxv8i64(<vscale x 8 x double> %0, ptr %1, <vscale x 8 x i64> %2, <vscale x 8 x i1> %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vluxei_mask_v_nxv8f64_nxv8f64_nxv8i64: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e64, m8, ta, mu +; CHECK-NEXT:    vluxei64.v v8, (a0), v16, v0.t +; CHECK-NEXT:    ret +entry: +  %a = call <vscale x 8 x double> @llvm.riscv.vluxei.mask.nxv8f64.nxv8i64( +    <vscale x 8 x double> %0, +    ptr %1, +    <vscale x 8 x i64> %2, +    <vscale x 8 x i1> %3, +    i64 %4, i64 1) + +  ret <vscale x 8 x double> %a +} diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/rvv/vluxei.ll b/llvm/test/CodeGen/RISCV/GlobalISel/rvv/vluxei.ll new file mode 100644 index 0000000..8dd32a1 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/GlobalISel/rvv/vluxei.ll @@ -0,0 +1,5100 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v,+zvfhmin,+zvfbfmin \ +; RUN:   -global-isel -verify-machineinstrs -target-abi=ilp32d | FileCheck %s +; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v,+zvfhmin,+zvfbfmin \ +; RUN:   -global-isel -verify-machineinstrs -target-abi=lp64d | FileCheck %s + +declare <vscale x 1 x i8> @llvm.riscv.vluxei.nxv1i8.nxv1i32( +  <vscale x 1 x i8>, +  ptr, +  <vscale x 1 x i32>, +  iXLen); + +define <vscale x 1 x i8> @intrinsic_vluxei_v_nxv1i8_nxv1i8_nxv1i32(ptr %0, <vscale x 1 x i32> %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vluxei_v_nxv1i8_nxv1i8_nxv1i32: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e8, mf8, ta, ma +; CHECK-NEXT:    vluxei32.v v9, (a0), v8 +; CHECK-NEXT:    vmv1r.v v8, v9 +; CHECK-NEXT:    ret +entry: +  %a = call <vscale x 1 x i8> @llvm.riscv.vluxei.nxv1i8.nxv1i32( +    <vscale x 1 x i8> poison, +    ptr %0, +    <vscale x 1 x i32> %1, +    iXLen %2) + +  ret <vscale x 1 x i8> %a +} + +declare <vscale x 1 x i8> @llvm.riscv.vluxei.mask.nxv1i8.nxv1i32( +  <vscale x 1 x i8>, +  ptr, +  <vscale x 1 x i32>, +  <vscale x 1 x i1>, +  iXLen, +  iXLen); + +define <vscale x 1 x i8> @intrinsic_vluxei_mask_v_nxv1i8_nxv1i8_nxv1i32(<vscale x 1 x i8> %0, ptr %1, <vscale x 1 x i32> %2, <vscale x 1 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vluxei_mask_v_nxv1i8_nxv1i8_nxv1i32: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e8, mf8, ta, mu +; CHECK-NEXT:    vluxei32.v v8, (a0), v9, v0.t +; CHECK-NEXT:    ret +entry: +  %a = call <vscale x 1 x i8> @llvm.riscv.vluxei.mask.nxv1i8.nxv1i32( +    <vscale x 1 x i8> %0, +    ptr %1, +    <vscale x 1 x i32> %2, +    <vscale x 1 x i1> %3, +    iXLen %4, iXLen 1) + +  ret <vscale x 1 x i8> %a +} + +declare <vscale x 2 x i8> @llvm.riscv.vluxei.nxv2i8.nxv2i32( +  <vscale x 2 x i8>, +  ptr, +  <vscale x 2 x i32>, +  iXLen); + +define <vscale x 2 x i8> @intrinsic_vluxei_v_nxv2i8_nxv2i8_nxv2i32(ptr %0, <vscale x 2 x i32> %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vluxei_v_nxv2i8_nxv2i8_nxv2i32: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e8, mf4, ta, ma +; CHECK-NEXT:    vluxei32.v v9, (a0), v8 +; CHECK-NEXT:    vmv1r.v v8, v9 +; CHECK-NEXT:    ret +entry: +  %a = call <vscale x 2 x i8> @llvm.riscv.vluxei.nxv2i8.nxv2i32( +    <vscale x 2 x i8> poison, +    ptr %0, +    <vscale x 2 x i32> %1, +    iXLen %2) + +  ret <vscale x 2 x i8> %a +} + +declare <vscale x 2 x i8> @llvm.riscv.vluxei.mask.nxv2i8.nxv2i32( +  <vscale x 2 x i8>, +  ptr, +  <vscale x 2 x i32>, +  <vscale x 2 x i1>, +  iXLen, +  iXLen); + +define <vscale x 2 x i8> @intrinsic_vluxei_mask_v_nxv2i8_nxv2i8_nxv2i32(<vscale x 2 x i8> %0, ptr %1, <vscale x 2 x i32> %2, <vscale x 2 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vluxei_mask_v_nxv2i8_nxv2i8_nxv2i32: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e8, mf4, ta, mu +; CHECK-NEXT:    vluxei32.v v8, (a0), v9, v0.t +; CHECK-NEXT:    ret +entry: +  %a = call <vscale x 2 x i8> @llvm.riscv.vluxei.mask.nxv2i8.nxv2i32( +    <vscale x 2 x i8> %0, +    ptr %1, +    <vscale x 2 x i32> %2, +    <vscale x 2 x i1> %3, +    iXLen %4, iXLen 1) + +  ret <vscale x 2 x i8> %a +} + +declare <vscale x 4 x i8> @llvm.riscv.vluxei.nxv4i8.nxv4i32( +  <vscale x 4 x i8>, +  ptr, +  <vscale x 4 x i32>, +  iXLen); + +define <vscale x 4 x i8> @intrinsic_vluxei_v_nxv4i8_nxv4i8_nxv4i32(ptr %0, <vscale x 4 x i32> %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vluxei_v_nxv4i8_nxv4i8_nxv4i32: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e8, mf2, ta, ma +; CHECK-NEXT:    vluxei32.v v10, (a0), v8 +; CHECK-NEXT:    vmv1r.v v8, v10 +; CHECK-NEXT:    ret +entry: +  %a = call <vscale x 4 x i8> @llvm.riscv.vluxei.nxv4i8.nxv4i32( +    <vscale x 4 x i8> poison, +    ptr %0, +    <vscale x 4 x i32> %1, +    iXLen %2) + +  ret <vscale x 4 x i8> %a +} + +declare <vscale x 4 x i8> @llvm.riscv.vluxei.mask.nxv4i8.nxv4i32( +  <vscale x 4 x i8>, +  ptr, +  <vscale x 4 x i32>, +  <vscale x 4 x i1>, +  iXLen, +  iXLen); + +define <vscale x 4 x i8> @intrinsic_vluxei_mask_v_nxv4i8_nxv4i8_nxv4i32(<vscale x 4 x i8> %0, ptr %1, <vscale x 4 x i32> %2, <vscale x 4 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vluxei_mask_v_nxv4i8_nxv4i8_nxv4i32: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e8, mf2, ta, mu +; CHECK-NEXT:    vluxei32.v v8, (a0), v10, v0.t +; CHECK-NEXT:    ret +entry: +  %a = call <vscale x 4 x i8> @llvm.riscv.vluxei.mask.nxv4i8.nxv4i32( +    <vscale x 4 x i8> %0, +    ptr %1, +    <vscale x 4 x i32> %2, +    <vscale x 4 x i1> %3, +    iXLen %4, iXLen 1) + +  ret <vscale x 4 x i8> %a +} + +declare <vscale x 8 x i8> @llvm.riscv.vluxei.nxv8i8.nxv8i32( +  <vscale x 8 x i8>, +  ptr, +  <vscale x 8 x i32>, +  iXLen); + +define <vscale x 8 x i8> @intrinsic_vluxei_v_nxv8i8_nxv8i8_nxv8i32(ptr %0, <vscale x 8 x i32> %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vluxei_v_nxv8i8_nxv8i8_nxv8i32: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e8, m1, ta, ma +; CHECK-NEXT:    vluxei32.v v12, (a0), v8 +; CHECK-NEXT:    vmv.v.v v8, v12 +; CHECK-NEXT:    ret +entry: +  %a = call <vscale x 8 x i8> @llvm.riscv.vluxei.nxv8i8.nxv8i32( +    <vscale x 8 x i8> poison, +    ptr %0, +    <vscale x 8 x i32> %1, +    iXLen %2) + +  ret <vscale x 8 x i8> %a +} + +declare <vscale x 8 x i8> @llvm.riscv.vluxei.mask.nxv8i8.nxv8i32( +  <vscale x 8 x i8>, +  ptr, +  <vscale x 8 x i32>, +  <vscale x 8 x i1>, +  iXLen, +  iXLen); + +define <vscale x 8 x i8> @intrinsic_vluxei_mask_v_nxv8i8_nxv8i8_nxv8i32(<vscale x 8 x i8> %0, ptr %1, <vscale x 8 x i32> %2, <vscale x 8 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vluxei_mask_v_nxv8i8_nxv8i8_nxv8i32: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e8, m1, ta, mu +; CHECK-NEXT:    vluxei32.v v8, (a0), v12, v0.t +; CHECK-NEXT:    ret +entry: +  %a = call <vscale x 8 x i8> @llvm.riscv.vluxei.mask.nxv8i8.nxv8i32( +    <vscale x 8 x i8> %0, +    ptr %1, +    <vscale x 8 x i32> %2, +    <vscale x 8 x i1> %3, +    iXLen %4, iXLen 1) + +  ret <vscale x 8 x i8> %a +} + +declare <vscale x 16 x i8> @llvm.riscv.vluxei.nxv16i8.nxv16i32( +  <vscale x 16 x i8>, +  ptr, +  <vscale x 16 x i32>, +  iXLen); + +define <vscale x 16 x i8> @intrinsic_vluxei_v_nxv16i8_nxv16i8_nxv16i32(ptr %0, <vscale x 16 x i32> %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vluxei_v_nxv16i8_nxv16i8_nxv16i32: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e8, m2, ta, ma +; CHECK-NEXT:    vluxei32.v v16, (a0), v8 +; CHECK-NEXT:    vmv.v.v v8, v16 +; CHECK-NEXT:    ret +entry: +  %a = call <vscale x 16 x i8> @llvm.riscv.vluxei.nxv16i8.nxv16i32( +    <vscale x 16 x i8> poison, +    ptr %0, +    <vscale x 16 x i32> %1, +    iXLen %2) + +  ret <vscale x 16 x i8> %a +} + +declare <vscale x 16 x i8> @llvm.riscv.vluxei.mask.nxv16i8.nxv16i32( +  <vscale x 16 x i8>, +  ptr, +  <vscale x 16 x i32>, +  <vscale x 16 x i1>, +  iXLen, +  iXLen); + +define <vscale x 16 x i8> @intrinsic_vluxei_mask_v_nxv16i8_nxv16i8_nxv16i32(<vscale x 16 x i8> %0, ptr %1, <vscale x 16 x i32> %2, <vscale x 16 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vluxei_mask_v_nxv16i8_nxv16i8_nxv16i32: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e8, m2, ta, mu +; CHECK-NEXT:    vluxei32.v v8, (a0), v16, v0.t +; CHECK-NEXT:    ret +entry: +  %a = call <vscale x 16 x i8> @llvm.riscv.vluxei.mask.nxv16i8.nxv16i32( +    <vscale x 16 x i8> %0, +    ptr %1, +    <vscale x 16 x i32> %2, +    <vscale x 16 x i1> %3, +    iXLen %4, iXLen 1) + +  ret <vscale x 16 x i8> %a +} + +declare <vscale x 1 x i16> @llvm.riscv.vluxei.nxv1i16.nxv1i32( +  <vscale x 1 x i16>, +  ptr, +  <vscale x 1 x i32>, +  iXLen); + +define <vscale x 1 x i16> @intrinsic_vluxei_v_nxv1i16_nxv1i16_nxv1i32(ptr %0, <vscale x 1 x i32> %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vluxei_v_nxv1i16_nxv1i16_nxv1i32: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e16, mf4, ta, ma +; CHECK-NEXT:    vluxei32.v v9, (a0), v8 +; CHECK-NEXT:    vmv1r.v v8, v9 +; CHECK-NEXT:    ret +entry: +  %a = call <vscale x 1 x i16> @llvm.riscv.vluxei.nxv1i16.nxv1i32( +    <vscale x 1 x i16> poison, +    ptr %0, +    <vscale x 1 x i32> %1, +    iXLen %2) + +  ret <vscale x 1 x i16> %a +} + +declare <vscale x 1 x i16> @llvm.riscv.vluxei.mask.nxv1i16.nxv1i32( +  <vscale x 1 x i16>, +  ptr, +  <vscale x 1 x i32>, +  <vscale x 1 x i1>, +  iXLen, +  iXLen); + +define <vscale x 1 x i16> @intrinsic_vluxei_mask_v_nxv1i16_nxv1i16_nxv1i32(<vscale x 1 x i16> %0, ptr %1, <vscale x 1 x i32> %2, <vscale x 1 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vluxei_mask_v_nxv1i16_nxv1i16_nxv1i32: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e16, mf4, ta, mu +; CHECK-NEXT:    vluxei32.v v8, (a0), v9, v0.t +; CHECK-NEXT:    ret +entry: +  %a = call <vscale x 1 x i16> @llvm.riscv.vluxei.mask.nxv1i16.nxv1i32( +    <vscale x 1 x i16> %0, +    ptr %1, +    <vscale x 1 x i32> %2, +    <vscale x 1 x i1> %3, +    iXLen %4, iXLen 1) + +  ret <vscale x 1 x i16> %a +} + +declare <vscale x 2 x i16> @llvm.riscv.vluxei.nxv2i16.nxv2i32( +  <vscale x 2 x i16>, +  ptr, +  <vscale x 2 x i32>, +  iXLen); + +define <vscale x 2 x i16> @intrinsic_vluxei_v_nxv2i16_nxv2i16_nxv2i32(ptr %0, <vscale x 2 x i32> %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vluxei_v_nxv2i16_nxv2i16_nxv2i32: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e16, mf2, ta, ma +; CHECK-NEXT:    vluxei32.v v9, (a0), v8 +; CHECK-NEXT:    vmv1r.v v8, v9 +; CHECK-NEXT:    ret +entry: +  %a = call <vscale x 2 x i16> @llvm.riscv.vluxei.nxv2i16.nxv2i32( +    <vscale x 2 x i16> poison, +    ptr %0, +    <vscale x 2 x i32> %1, +    iXLen %2) + +  ret <vscale x 2 x i16> %a +} + +declare <vscale x 2 x i16> @llvm.riscv.vluxei.mask.nxv2i16.nxv2i32( +  <vscale x 2 x i16>, +  ptr, +  <vscale x 2 x i32>, +  <vscale x 2 x i1>, +  iXLen, +  iXLen); + +define <vscale x 2 x i16> @intrinsic_vluxei_mask_v_nxv2i16_nxv2i16_nxv2i32(<vscale x 2 x i16> %0, ptr %1, <vscale x 2 x i32> %2, <vscale x 2 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vluxei_mask_v_nxv2i16_nxv2i16_nxv2i32: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e16, mf2, ta, mu +; CHECK-NEXT:    vluxei32.v v8, (a0), v9, v0.t +; CHECK-NEXT:    ret +entry: +  %a = call <vscale x 2 x i16> @llvm.riscv.vluxei.mask.nxv2i16.nxv2i32( +    <vscale x 2 x i16> %0, +    ptr %1, +    <vscale x 2 x i32> %2, +    <vscale x 2 x i1> %3, +    iXLen %4, iXLen 1) + +  ret <vscale x 2 x i16> %a +} + +declare <vscale x 4 x i16> @llvm.riscv.vluxei.nxv4i16.nxv4i32( +  <vscale x 4 x i16>, +  ptr, +  <vscale x 4 x i32>, +  iXLen); + +define <vscale x 4 x i16> @intrinsic_vluxei_v_nxv4i16_nxv4i16_nxv4i32(ptr %0, <vscale x 4 x i32> %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vluxei_v_nxv4i16_nxv4i16_nxv4i32: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e16, m1, ta, ma +; CHECK-NEXT:    vluxei32.v v10, (a0), v8 +; CHECK-NEXT:    vmv.v.v v8, v10 +; CHECK-NEXT:    ret +entry: +  %a = call <vscale x 4 x i16> @llvm.riscv.vluxei.nxv4i16.nxv4i32( +    <vscale x 4 x i16> poison, +    ptr %0, +    <vscale x 4 x i32> %1, +    iXLen %2) + +  ret <vscale x 4 x i16> %a +} + +declare <vscale x 4 x i16> @llvm.riscv.vluxei.mask.nxv4i16.nxv4i32( +  <vscale x 4 x i16>, +  ptr, +  <vscale x 4 x i32>, +  <vscale x 4 x i1>, +  iXLen, +  iXLen); + +define <vscale x 4 x i16> @intrinsic_vluxei_mask_v_nxv4i16_nxv4i16_nxv4i32(<vscale x 4 x i16> %0, ptr %1, <vscale x 4 x i32> %2, <vscale x 4 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vluxei_mask_v_nxv4i16_nxv4i16_nxv4i32: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e16, m1, ta, mu +; CHECK-NEXT:    vluxei32.v v8, (a0), v10, v0.t +; CHECK-NEXT:    ret +entry: +  %a = call <vscale x 4 x i16> @llvm.riscv.vluxei.mask.nxv4i16.nxv4i32( +    <vscale x 4 x i16> %0, +    ptr %1, +    <vscale x 4 x i32> %2, +    <vscale x 4 x i1> %3, +    iXLen %4, iXLen 1) + +  ret <vscale x 4 x i16> %a +} + +declare <vscale x 8 x i16> @llvm.riscv.vluxei.nxv8i16.nxv8i32( +  <vscale x 8 x i16>, +  ptr, +  <vscale x 8 x i32>, +  iXLen); + +define <vscale x 8 x i16> @intrinsic_vluxei_v_nxv8i16_nxv8i16_nxv8i32(ptr %0, <vscale x 8 x i32> %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vluxei_v_nxv8i16_nxv8i16_nxv8i32: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e16, m2, ta, ma +; CHECK-NEXT:    vluxei32.v v12, (a0), v8 +; CHECK-NEXT:    vmv.v.v v8, v12 +; CHECK-NEXT:    ret +entry: +  %a = call <vscale x 8 x i16> @llvm.riscv.vluxei.nxv8i16.nxv8i32( +    <vscale x 8 x i16> poison, +    ptr %0, +    <vscale x 8 x i32> %1, +    iXLen %2) + +  ret <vscale x 8 x i16> %a +} + +declare <vscale x 8 x i16> @llvm.riscv.vluxei.mask.nxv8i16.nxv8i32( +  <vscale x 8 x i16>, +  ptr, +  <vscale x 8 x i32>, +  <vscale x 8 x i1>, +  iXLen, +  iXLen); + +define <vscale x 8 x i16> @intrinsic_vluxei_mask_v_nxv8i16_nxv8i16_nxv8i32(<vscale x 8 x i16> %0, ptr %1, <vscale x 8 x i32> %2, <vscale x 8 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vluxei_mask_v_nxv8i16_nxv8i16_nxv8i32: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e16, m2, ta, mu +; CHECK-NEXT:    vluxei32.v v8, (a0), v12, v0.t +; CHECK-NEXT:    ret +entry: +  %a = call <vscale x 8 x i16> @llvm.riscv.vluxei.mask.nxv8i16.nxv8i32( +    <vscale x 8 x i16> %0, +    ptr %1, +    <vscale x 8 x i32> %2, +    <vscale x 8 x i1> %3, +    iXLen %4, iXLen 1) + +  ret <vscale x 8 x i16> %a +} + +declare <vscale x 16 x i16> @llvm.riscv.vluxei.nxv16i16.nxv16i32( +  <vscale x 16 x i16>, +  ptr, +  <vscale x 16 x i32>, +  iXLen); + +define <vscale x 16 x i16> @intrinsic_vluxei_v_nxv16i16_nxv16i16_nxv16i32(ptr %0, <vscale x 16 x i32> %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vluxei_v_nxv16i16_nxv16i16_nxv16i32: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e16, m4, ta, ma +; CHECK-NEXT:    vluxei32.v v16, (a0), v8 +; CHECK-NEXT:    vmv.v.v v8, v16 +; CHECK-NEXT:    ret +entry: +  %a = call <vscale x 16 x i16> @llvm.riscv.vluxei.nxv16i16.nxv16i32( +    <vscale x 16 x i16> poison, +    ptr %0, +    <vscale x 16 x i32> %1, +    iXLen %2) + +  ret <vscale x 16 x i16> %a +} + +declare <vscale x 16 x i16> @llvm.riscv.vluxei.mask.nxv16i16.nxv16i32( +  <vscale x 16 x i16>, +  ptr, +  <vscale x 16 x i32>, +  <vscale x 16 x i1>, +  iXLen, +  iXLen); + +define <vscale x 16 x i16> @intrinsic_vluxei_mask_v_nxv16i16_nxv16i16_nxv16i32(<vscale x 16 x i16> %0, ptr %1, <vscale x 16 x i32> %2, <vscale x 16 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vluxei_mask_v_nxv16i16_nxv16i16_nxv16i32: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e16, m4, ta, mu +; CHECK-NEXT:    vluxei32.v v8, (a0), v16, v0.t +; CHECK-NEXT:    ret +entry: +  %a = call <vscale x 16 x i16> @llvm.riscv.vluxei.mask.nxv16i16.nxv16i32( +    <vscale x 16 x i16> %0, +    ptr %1, +    <vscale x 16 x i32> %2, +    <vscale x 16 x i1> %3, +    iXLen %4, iXLen 1) + +  ret <vscale x 16 x i16> %a +} + +declare <vscale x 1 x i32> @llvm.riscv.vluxei.nxv1i32.nxv1i32( +  <vscale x 1 x i32>, +  ptr, +  <vscale x 1 x i32>, +  iXLen); + +define <vscale x 1 x i32> @intrinsic_vluxei_v_nxv1i32_nxv1i32_nxv1i32(ptr %0, <vscale x 1 x i32> %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vluxei_v_nxv1i32_nxv1i32_nxv1i32: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e32, mf2, ta, ma +; CHECK-NEXT:    vluxei32.v v8, (a0), v8 +; CHECK-NEXT:    ret +entry: +  %a = call <vscale x 1 x i32> @llvm.riscv.vluxei.nxv1i32.nxv1i32( +    <vscale x 1 x i32> poison, +    ptr %0, +    <vscale x 1 x i32> %1, +    iXLen %2) + +  ret <vscale x 1 x i32> %a +} + +declare <vscale x 1 x i32> @llvm.riscv.vluxei.mask.nxv1i32.nxv1i32( +  <vscale x 1 x i32>, +  ptr, +  <vscale x 1 x i32>, +  <vscale x 1 x i1>, +  iXLen, +  iXLen); + +define <vscale x 1 x i32> @intrinsic_vluxei_mask_v_nxv1i32_nxv1i32_nxv1i32(<vscale x 1 x i32> %0, ptr %1, <vscale x 1 x i32> %2, <vscale x 1 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vluxei_mask_v_nxv1i32_nxv1i32_nxv1i32: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e32, mf2, ta, mu +; CHECK-NEXT:    vluxei32.v v8, (a0), v9, v0.t +; CHECK-NEXT:    ret +entry: +  %a = call <vscale x 1 x i32> @llvm.riscv.vluxei.mask.nxv1i32.nxv1i32( +    <vscale x 1 x i32> %0, +    ptr %1, +    <vscale x 1 x i32> %2, +    <vscale x 1 x i1> %3, +    iXLen %4, iXLen 1) + +  ret <vscale x 1 x i32> %a +} + +declare <vscale x 2 x i32> @llvm.riscv.vluxei.nxv2i32.nxv2i32( +  <vscale x 2 x i32>, +  ptr, +  <vscale x 2 x i32>, +  iXLen); + +define <vscale x 2 x i32> @intrinsic_vluxei_v_nxv2i32_nxv2i32_nxv2i32(ptr %0, <vscale x 2 x i32> %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vluxei_v_nxv2i32_nxv2i32_nxv2i32: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e32, m1, ta, ma +; CHECK-NEXT:    vluxei32.v v8, (a0), v8 +; CHECK-NEXT:    ret +entry: +  %a = call <vscale x 2 x i32> @llvm.riscv.vluxei.nxv2i32.nxv2i32( +    <vscale x 2 x i32> poison, +    ptr %0, +    <vscale x 2 x i32> %1, +    iXLen %2) + +  ret <vscale x 2 x i32> %a +} + +declare <vscale x 2 x i32> @llvm.riscv.vluxei.mask.nxv2i32.nxv2i32( +  <vscale x 2 x i32>, +  ptr, +  <vscale x 2 x i32>, +  <vscale x 2 x i1>, +  iXLen, +  iXLen); + +define <vscale x 2 x i32> @intrinsic_vluxei_mask_v_nxv2i32_nxv2i32_nxv2i32(<vscale x 2 x i32> %0, ptr %1, <vscale x 2 x i32> %2, <vscale x 2 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vluxei_mask_v_nxv2i32_nxv2i32_nxv2i32: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e32, m1, ta, mu +; CHECK-NEXT:    vluxei32.v v8, (a0), v9, v0.t +; CHECK-NEXT:    ret +entry: +  %a = call <vscale x 2 x i32> @llvm.riscv.vluxei.mask.nxv2i32.nxv2i32( +    <vscale x 2 x i32> %0, +    ptr %1, +    <vscale x 2 x i32> %2, +    <vscale x 2 x i1> %3, +    iXLen %4, iXLen 1) + +  ret <vscale x 2 x i32> %a +} + +declare <vscale x 4 x i32> @llvm.riscv.vluxei.nxv4i32.nxv4i32( +  <vscale x 4 x i32>, +  ptr, +  <vscale x 4 x i32>, +  iXLen); + +define <vscale x 4 x i32> @intrinsic_vluxei_v_nxv4i32_nxv4i32_nxv4i32(ptr %0, <vscale x 4 x i32> %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vluxei_v_nxv4i32_nxv4i32_nxv4i32: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e32, m2, ta, ma +; CHECK-NEXT:    vluxei32.v v8, (a0), v8 +; CHECK-NEXT:    ret +entry: +  %a = call <vscale x 4 x i32> @llvm.riscv.vluxei.nxv4i32.nxv4i32( +    <vscale x 4 x i32> poison, +    ptr %0, +    <vscale x 4 x i32> %1, +    iXLen %2) + +  ret <vscale x 4 x i32> %a +} + +declare <vscale x 4 x i32> @llvm.riscv.vluxei.mask.nxv4i32.nxv4i32( +  <vscale x 4 x i32>, +  ptr, +  <vscale x 4 x i32>, +  <vscale x 4 x i1>, +  iXLen, +  iXLen); + +define <vscale x 4 x i32> @intrinsic_vluxei_mask_v_nxv4i32_nxv4i32_nxv4i32(<vscale x 4 x i32> %0, ptr %1, <vscale x 4 x i32> %2, <vscale x 4 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vluxei_mask_v_nxv4i32_nxv4i32_nxv4i32: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e32, m2, ta, mu +; CHECK-NEXT:    vluxei32.v v8, (a0), v10, v0.t +; CHECK-NEXT:    ret +entry: +  %a = call <vscale x 4 x i32> @llvm.riscv.vluxei.mask.nxv4i32.nxv4i32( +    <vscale x 4 x i32> %0, +    ptr %1, +    <vscale x 4 x i32> %2, +    <vscale x 4 x i1> %3, +    iXLen %4, iXLen 1) + +  ret <vscale x 4 x i32> %a +} + +declare <vscale x 8 x i32> @llvm.riscv.vluxei.nxv8i32.nxv8i32( +  <vscale x 8 x i32>, +  ptr, +  <vscale x 8 x i32>, +  iXLen); + +define <vscale x 8 x i32> @intrinsic_vluxei_v_nxv8i32_nxv8i32_nxv8i32(ptr %0, <vscale x 8 x i32> %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vluxei_v_nxv8i32_nxv8i32_nxv8i32: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e32, m4, ta, ma +; CHECK-NEXT:    vluxei32.v v8, (a0), v8 +; CHECK-NEXT:    ret +entry: +  %a = call <vscale x 8 x i32> @llvm.riscv.vluxei.nxv8i32.nxv8i32( +    <vscale x 8 x i32> poison, +    ptr %0, +    <vscale x 8 x i32> %1, +    iXLen %2) + +  ret <vscale x 8 x i32> %a +} + +declare <vscale x 8 x i32> @llvm.riscv.vluxei.mask.nxv8i32.nxv8i32( +  <vscale x 8 x i32>, +  ptr, +  <vscale x 8 x i32>, +  <vscale x 8 x i1>, +  iXLen, +  iXLen); + +define <vscale x 8 x i32> @intrinsic_vluxei_mask_v_nxv8i32_nxv8i32_nxv8i32(<vscale x 8 x i32> %0, ptr %1, <vscale x 8 x i32> %2, <vscale x 8 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vluxei_mask_v_nxv8i32_nxv8i32_nxv8i32: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e32, m4, ta, mu +; CHECK-NEXT:    vluxei32.v v8, (a0), v12, v0.t +; CHECK-NEXT:    ret +entry: +  %a = call <vscale x 8 x i32> @llvm.riscv.vluxei.mask.nxv8i32.nxv8i32( +    <vscale x 8 x i32> %0, +    ptr %1, +    <vscale x 8 x i32> %2, +    <vscale x 8 x i1> %3, +    iXLen %4, iXLen 1) + +  ret <vscale x 8 x i32> %a +} + +declare <vscale x 16 x i32> @llvm.riscv.vluxei.nxv16i32.nxv16i32( +  <vscale x 16 x i32>, +  ptr, +  <vscale x 16 x i32>, +  iXLen); + +define <vscale x 16 x i32> @intrinsic_vluxei_v_nxv16i32_nxv16i32_nxv16i32(ptr %0, <vscale x 16 x i32> %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vluxei_v_nxv16i32_nxv16i32_nxv16i32: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e32, m8, ta, ma +; CHECK-NEXT:    vluxei32.v v8, (a0), v8 +; CHECK-NEXT:    ret +entry: +  %a = call <vscale x 16 x i32> @llvm.riscv.vluxei.nxv16i32.nxv16i32( +    <vscale x 16 x i32> poison, +    ptr %0, +    <vscale x 16 x i32> %1, +    iXLen %2) + +  ret <vscale x 16 x i32> %a +} + +declare <vscale x 16 x i32> @llvm.riscv.vluxei.mask.nxv16i32.nxv16i32( +  <vscale x 16 x i32>, +  ptr, +  <vscale x 16 x i32>, +  <vscale x 16 x i1>, +  iXLen, +  iXLen); + +define <vscale x 16 x i32> @intrinsic_vluxei_mask_v_nxv16i32_nxv16i32_nxv16i32(<vscale x 16 x i32> %0, ptr %1, <vscale x 16 x i32> %2, <vscale x 16 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vluxei_mask_v_nxv16i32_nxv16i32_nxv16i32: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e32, m8, ta, mu +; CHECK-NEXT:    vluxei32.v v8, (a0), v16, v0.t +; CHECK-NEXT:    ret +entry: +  %a = call <vscale x 16 x i32> @llvm.riscv.vluxei.mask.nxv16i32.nxv16i32( +    <vscale x 16 x i32> %0, +    ptr %1, +    <vscale x 16 x i32> %2, +    <vscale x 16 x i1> %3, +    iXLen %4, iXLen 1) + +  ret <vscale x 16 x i32> %a +} + +declare <vscale x 1 x i64> @llvm.riscv.vluxei.nxv1i64.nxv1i32( +  <vscale x 1 x i64>, +  ptr, +  <vscale x 1 x i32>, +  iXLen); + +define <vscale x 1 x i64> @intrinsic_vluxei_v_nxv1i64_nxv1i64_nxv1i32(ptr %0, <vscale x 1 x i32> %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vluxei_v_nxv1i64_nxv1i64_nxv1i32: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e64, m1, ta, ma +; CHECK-NEXT:    vluxei32.v v9, (a0), v8 +; CHECK-NEXT:    vmv.v.v v8, v9 +; CHECK-NEXT:    ret +entry: +  %a = call <vscale x 1 x i64> @llvm.riscv.vluxei.nxv1i64.nxv1i32( +    <vscale x 1 x i64> poison, +    ptr %0, +    <vscale x 1 x i32> %1, +    iXLen %2) + +  ret <vscale x 1 x i64> %a +} + +declare <vscale x 1 x i64> @llvm.riscv.vluxei.mask.nxv1i64.nxv1i32( +  <vscale x 1 x i64>, +  ptr, +  <vscale x 1 x i32>, +  <vscale x 1 x i1>, +  iXLen, +  iXLen); + +define <vscale x 1 x i64> @intrinsic_vluxei_mask_v_nxv1i64_nxv1i64_nxv1i32(<vscale x 1 x i64> %0, ptr %1, <vscale x 1 x i32> %2, <vscale x 1 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vluxei_mask_v_nxv1i64_nxv1i64_nxv1i32: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e64, m1, ta, mu +; CHECK-NEXT:    vluxei32.v v8, (a0), v9, v0.t +; CHECK-NEXT:    ret +entry: +  %a = call <vscale x 1 x i64> @llvm.riscv.vluxei.mask.nxv1i64.nxv1i32( +    <vscale x 1 x i64> %0, +    ptr %1, +    <vscale x 1 x i32> %2, +    <vscale x 1 x i1> %3, +    iXLen %4, iXLen 1) + +  ret <vscale x 1 x i64> %a +} + +declare <vscale x 2 x i64> @llvm.riscv.vluxei.nxv2i64.nxv2i32( +  <vscale x 2 x i64>, +  ptr, +  <vscale x 2 x i32>, +  iXLen); + +define <vscale x 2 x i64> @intrinsic_vluxei_v_nxv2i64_nxv2i64_nxv2i32(ptr %0, <vscale x 2 x i32> %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vluxei_v_nxv2i64_nxv2i64_nxv2i32: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetivli zero, 1, e8, m1, ta, ma +; CHECK-NEXT:    vmv1r.v v10, v8 +; CHECK-NEXT:    vsetvli zero, a1, e64, m2, ta, ma +; CHECK-NEXT:    vluxei32.v v8, (a0), v10 +; CHECK-NEXT:    ret +entry: +  %a = call <vscale x 2 x i64> @llvm.riscv.vluxei.nxv2i64.nxv2i32( +    <vscale x 2 x i64> poison, +    ptr %0, +    <vscale x 2 x i32> %1, +    iXLen %2) + +  ret <vscale x 2 x i64> %a +} + +declare <vscale x 2 x i64> @llvm.riscv.vluxei.mask.nxv2i64.nxv2i32( +  <vscale x 2 x i64>, +  ptr, +  <vscale x 2 x i32>, +  <vscale x 2 x i1>, +  iXLen, +  iXLen); + +define <vscale x 2 x i64> @intrinsic_vluxei_mask_v_nxv2i64_nxv2i64_nxv2i32(<vscale x 2 x i64> %0, ptr %1, <vscale x 2 x i32> %2, <vscale x 2 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vluxei_mask_v_nxv2i64_nxv2i64_nxv2i32: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e64, m2, ta, mu +; CHECK-NEXT:    vluxei32.v v8, (a0), v10, v0.t +; CHECK-NEXT:    ret +entry: +  %a = call <vscale x 2 x i64> @llvm.riscv.vluxei.mask.nxv2i64.nxv2i32( +    <vscale x 2 x i64> %0, +    ptr %1, +    <vscale x 2 x i32> %2, +    <vscale x 2 x i1> %3, +    iXLen %4, iXLen 1) + +  ret <vscale x 2 x i64> %a +} + +declare <vscale x 4 x i64> @llvm.riscv.vluxei.nxv4i64.nxv4i32( +  <vscale x 4 x i64>, +  ptr, +  <vscale x 4 x i32>, +  iXLen); + +define <vscale x 4 x i64> @intrinsic_vluxei_v_nxv4i64_nxv4i64_nxv4i32(ptr %0, <vscale x 4 x i32> %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vluxei_v_nxv4i64_nxv4i64_nxv4i32: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetivli zero, 1, e8, m1, ta, ma +; CHECK-NEXT:    vmv2r.v v12, v8 +; CHECK-NEXT:    vsetvli zero, a1, e64, m4, ta, ma +; CHECK-NEXT:    vluxei32.v v8, (a0), v12 +; CHECK-NEXT:    ret +entry: +  %a = call <vscale x 4 x i64> @llvm.riscv.vluxei.nxv4i64.nxv4i32( +    <vscale x 4 x i64> poison, +    ptr %0, +    <vscale x 4 x i32> %1, +    iXLen %2) + +  ret <vscale x 4 x i64> %a +} + +declare <vscale x 4 x i64> @llvm.riscv.vluxei.mask.nxv4i64.nxv4i32( +  <vscale x 4 x i64>, +  ptr, +  <vscale x 4 x i32>, +  <vscale x 4 x i1>, +  iXLen, +  iXLen); + +define <vscale x 4 x i64> @intrinsic_vluxei_mask_v_nxv4i64_nxv4i64_nxv4i32(<vscale x 4 x i64> %0, ptr %1, <vscale x 4 x i32> %2, <vscale x 4 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vluxei_mask_v_nxv4i64_nxv4i64_nxv4i32: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e64, m4, ta, mu +; CHECK-NEXT:    vluxei32.v v8, (a0), v12, v0.t +; CHECK-NEXT:    ret +entry: +  %a = call <vscale x 4 x i64> @llvm.riscv.vluxei.mask.nxv4i64.nxv4i32( +    <vscale x 4 x i64> %0, +    ptr %1, +    <vscale x 4 x i32> %2, +    <vscale x 4 x i1> %3, +    iXLen %4, iXLen 1) + +  ret <vscale x 4 x i64> %a +} + +declare <vscale x 8 x i64> @llvm.riscv.vluxei.nxv8i64.nxv8i32( +  <vscale x 8 x i64>, +  ptr, +  <vscale x 8 x i32>, +  iXLen); + +define <vscale x 8 x i64> @intrinsic_vluxei_v_nxv8i64_nxv8i64_nxv8i32(ptr %0, <vscale x 8 x i32> %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vluxei_v_nxv8i64_nxv8i64_nxv8i32: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetivli zero, 1, e8, m1, ta, ma +; CHECK-NEXT:    vmv4r.v v16, v8 +; CHECK-NEXT:    vsetvli zero, a1, e64, m8, ta, ma +; CHECK-NEXT:    vluxei32.v v8, (a0), v16 +; CHECK-NEXT:    ret +entry: +  %a = call <vscale x 8 x i64> @llvm.riscv.vluxei.nxv8i64.nxv8i32( +    <vscale x 8 x i64> poison, +    ptr %0, +    <vscale x 8 x i32> %1, +    iXLen %2) + +  ret <vscale x 8 x i64> %a +} + +declare <vscale x 8 x i64> @llvm.riscv.vluxei.mask.nxv8i64.nxv8i32( +  <vscale x 8 x i64>, +  ptr, +  <vscale x 8 x i32>, +  <vscale x 8 x i1>, +  iXLen, +  iXLen); + +define <vscale x 8 x i64> @intrinsic_vluxei_mask_v_nxv8i64_nxv8i64_nxv8i32(<vscale x 8 x i64> %0, ptr %1, <vscale x 8 x i32> %2, <vscale x 8 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vluxei_mask_v_nxv8i64_nxv8i64_nxv8i32: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e64, m8, ta, mu +; CHECK-NEXT:    vluxei32.v v8, (a0), v16, v0.t +; CHECK-NEXT:    ret +entry: +  %a = call <vscale x 8 x i64> @llvm.riscv.vluxei.mask.nxv8i64.nxv8i32( +    <vscale x 8 x i64> %0, +    ptr %1, +    <vscale x 8 x i32> %2, +    <vscale x 8 x i1> %3, +    iXLen %4, iXLen 1) + +  ret <vscale x 8 x i64> %a +} + +declare <vscale x 1 x half> @llvm.riscv.vluxei.nxv1f16.nxv1i32( +  <vscale x 1 x half>, +  ptr, +  <vscale x 1 x i32>, +  iXLen); + +define <vscale x 1 x half> @intrinsic_vluxei_v_nxv1f16_nxv1f16_nxv1i32(ptr %0, <vscale x 1 x i32> %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vluxei_v_nxv1f16_nxv1f16_nxv1i32: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e16, mf4, ta, ma +; CHECK-NEXT:    vluxei32.v v9, (a0), v8 +; CHECK-NEXT:    vmv1r.v v8, v9 +; CHECK-NEXT:    ret +entry: +  %a = call <vscale x 1 x half> @llvm.riscv.vluxei.nxv1f16.nxv1i32( +    <vscale x 1 x half> poison, +    ptr %0, +    <vscale x 1 x i32> %1, +    iXLen %2) + +  ret <vscale x 1 x half> %a +} + +declare <vscale x 1 x half> @llvm.riscv.vluxei.mask.nxv1f16.nxv1i32( +  <vscale x 1 x half>, +  ptr, +  <vscale x 1 x i32>, +  <vscale x 1 x i1>, +  iXLen, +  iXLen); + +define <vscale x 1 x half> @intrinsic_vluxei_mask_v_nxv1f16_nxv1f16_nxv1i32(<vscale x 1 x half> %0, ptr %1, <vscale x 1 x i32> %2, <vscale x 1 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vluxei_mask_v_nxv1f16_nxv1f16_nxv1i32: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e16, mf4, ta, mu +; CHECK-NEXT:    vluxei32.v v8, (a0), v9, v0.t +; CHECK-NEXT:    ret +entry: +  %a = call <vscale x 1 x half> @llvm.riscv.vluxei.mask.nxv1f16.nxv1i32( +    <vscale x 1 x half> %0, +    ptr %1, +    <vscale x 1 x i32> %2, +    <vscale x 1 x i1> %3, +    iXLen %4, iXLen 1) + +  ret <vscale x 1 x half> %a +} + +declare <vscale x 2 x half> @llvm.riscv.vluxei.nxv2f16.nxv2i32( +  <vscale x 2 x half>, +  ptr, +  <vscale x 2 x i32>, +  iXLen); + +define <vscale x 2 x half> @intrinsic_vluxei_v_nxv2f16_nxv2f16_nxv2i32(ptr %0, <vscale x 2 x i32> %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vluxei_v_nxv2f16_nxv2f16_nxv2i32: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e16, mf2, ta, ma +; CHECK-NEXT:    vluxei32.v v9, (a0), v8 +; CHECK-NEXT:    vmv1r.v v8, v9 +; CHECK-NEXT:    ret +entry: +  %a = call <vscale x 2 x half> @llvm.riscv.vluxei.nxv2f16.nxv2i32( +    <vscale x 2 x half> poison, +    ptr %0, +    <vscale x 2 x i32> %1, +    iXLen %2) + +  ret <vscale x 2 x half> %a +} + +declare <vscale x 2 x half> @llvm.riscv.vluxei.mask.nxv2f16.nxv2i32( +  <vscale x 2 x half>, +  ptr, +  <vscale x 2 x i32>, +  <vscale x 2 x i1>, +  iXLen, +  iXLen); + +define <vscale x 2 x half> @intrinsic_vluxei_mask_v_nxv2f16_nxv2f16_nxv2i32(<vscale x 2 x half> %0, ptr %1, <vscale x 2 x i32> %2, <vscale x 2 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vluxei_mask_v_nxv2f16_nxv2f16_nxv2i32: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e16, mf2, ta, mu +; CHECK-NEXT:    vluxei32.v v8, (a0), v9, v0.t +; CHECK-NEXT:    ret +entry: +  %a = call <vscale x 2 x half> @llvm.riscv.vluxei.mask.nxv2f16.nxv2i32( +    <vscale x 2 x half> %0, +    ptr %1, +    <vscale x 2 x i32> %2, +    <vscale x 2 x i1> %3, +    iXLen %4, iXLen 1) + +  ret <vscale x 2 x half> %a +} + +declare <vscale x 4 x half> @llvm.riscv.vluxei.nxv4f16.nxv4i32( +  <vscale x 4 x half>, +  ptr, +  <vscale x 4 x i32>, +  iXLen); + +define <vscale x 4 x half> @intrinsic_vluxei_v_nxv4f16_nxv4f16_nxv4i32(ptr %0, <vscale x 4 x i32> %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vluxei_v_nxv4f16_nxv4f16_nxv4i32: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e16, m1, ta, ma +; CHECK-NEXT:    vluxei32.v v10, (a0), v8 +; CHECK-NEXT:    vmv.v.v v8, v10 +; CHECK-NEXT:    ret +entry: +  %a = call <vscale x 4 x half> @llvm.riscv.vluxei.nxv4f16.nxv4i32( +    <vscale x 4 x half> poison, +    ptr %0, +    <vscale x 4 x i32> %1, +    iXLen %2) + +  ret <vscale x 4 x half> %a +} + +declare <vscale x 4 x half> @llvm.riscv.vluxei.mask.nxv4f16.nxv4i32( +  <vscale x 4 x half>, +  ptr, +  <vscale x 4 x i32>, +  <vscale x 4 x i1>, +  iXLen, +  iXLen); + +define <vscale x 4 x half> @intrinsic_vluxei_mask_v_nxv4f16_nxv4f16_nxv4i32(<vscale x 4 x half> %0, ptr %1, <vscale x 4 x i32> %2, <vscale x 4 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vluxei_mask_v_nxv4f16_nxv4f16_nxv4i32: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e16, m1, ta, mu +; CHECK-NEXT:    vluxei32.v v8, (a0), v10, v0.t +; CHECK-NEXT:    ret +entry: +  %a = call <vscale x 4 x half> @llvm.riscv.vluxei.mask.nxv4f16.nxv4i32( +    <vscale x 4 x half> %0, +    ptr %1, +    <vscale x 4 x i32> %2, +    <vscale x 4 x i1> %3, +    iXLen %4, iXLen 1) + +  ret <vscale x 4 x half> %a +} + +declare <vscale x 8 x half> @llvm.riscv.vluxei.nxv8f16.nxv8i32( +  <vscale x 8 x half>, +  ptr, +  <vscale x 8 x i32>, +  iXLen); + +define <vscale x 8 x half> @intrinsic_vluxei_v_nxv8f16_nxv8f16_nxv8i32(ptr %0, <vscale x 8 x i32> %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vluxei_v_nxv8f16_nxv8f16_nxv8i32: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e16, m2, ta, ma +; CHECK-NEXT:    vluxei32.v v12, (a0), v8 +; CHECK-NEXT:    vmv.v.v v8, v12 +; CHECK-NEXT:    ret +entry: +  %a = call <vscale x 8 x half> @llvm.riscv.vluxei.nxv8f16.nxv8i32( +    <vscale x 8 x half> poison, +    ptr %0, +    <vscale x 8 x i32> %1, +    iXLen %2) + +  ret <vscale x 8 x half> %a +} + +declare <vscale x 8 x half> @llvm.riscv.vluxei.mask.nxv8f16.nxv8i32( +  <vscale x 8 x half>, +  ptr, +  <vscale x 8 x i32>, +  <vscale x 8 x i1>, +  iXLen, +  iXLen); + +define <vscale x 8 x half> @intrinsic_vluxei_mask_v_nxv8f16_nxv8f16_nxv8i32(<vscale x 8 x half> %0, ptr %1, <vscale x 8 x i32> %2, <vscale x 8 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vluxei_mask_v_nxv8f16_nxv8f16_nxv8i32: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e16, m2, ta, mu +; CHECK-NEXT:    vluxei32.v v8, (a0), v12, v0.t +; CHECK-NEXT:    ret +entry: +  %a = call <vscale x 8 x half> @llvm.riscv.vluxei.mask.nxv8f16.nxv8i32( +    <vscale x 8 x half> %0, +    ptr %1, +    <vscale x 8 x i32> %2, +    <vscale x 8 x i1> %3, +    iXLen %4, iXLen 1) + +  ret <vscale x 8 x half> %a +} + +declare <vscale x 16 x half> @llvm.riscv.vluxei.nxv16f16.nxv16i32( +  <vscale x 16 x half>, +  ptr, +  <vscale x 16 x i32>, +  iXLen); + +define <vscale x 16 x half> @intrinsic_vluxei_v_nxv16f16_nxv16f16_nxv16i32(ptr %0, <vscale x 16 x i32> %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vluxei_v_nxv16f16_nxv16f16_nxv16i32: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e16, m4, ta, ma +; CHECK-NEXT:    vluxei32.v v16, (a0), v8 +; CHECK-NEXT:    vmv.v.v v8, v16 +; CHECK-NEXT:    ret +entry: +  %a = call <vscale x 16 x half> @llvm.riscv.vluxei.nxv16f16.nxv16i32( +    <vscale x 16 x half> poison, +    ptr %0, +    <vscale x 16 x i32> %1, +    iXLen %2) + +  ret <vscale x 16 x half> %a +} + +declare <vscale x 16 x half> @llvm.riscv.vluxei.mask.nxv16f16.nxv16i32( +  <vscale x 16 x half>, +  ptr, +  <vscale x 16 x i32>, +  <vscale x 16 x i1>, +  iXLen, +  iXLen); + +define <vscale x 16 x half> @intrinsic_vluxei_mask_v_nxv16f16_nxv16f16_nxv16i32(<vscale x 16 x half> %0, ptr %1, <vscale x 16 x i32> %2, <vscale x 16 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vluxei_mask_v_nxv16f16_nxv16f16_nxv16i32: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e16, m4, ta, mu +; CHECK-NEXT:    vluxei32.v v8, (a0), v16, v0.t +; CHECK-NEXT:    ret +entry: +  %a = call <vscale x 16 x half> @llvm.riscv.vluxei.mask.nxv16f16.nxv16i32( +    <vscale x 16 x half> %0, +    ptr %1, +    <vscale x 16 x i32> %2, +    <vscale x 16 x i1> %3, +    iXLen %4, iXLen 1) + +  ret <vscale x 16 x half> %a +} + +declare <vscale x 1 x float> @llvm.riscv.vluxei.nxv1f32.nxv1i32( +  <vscale x 1 x float>, +  ptr, +  <vscale x 1 x i32>, +  iXLen); + +define <vscale x 1 x float> @intrinsic_vluxei_v_nxv1f32_nxv1f32_nxv1i32(ptr %0, <vscale x 1 x i32> %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vluxei_v_nxv1f32_nxv1f32_nxv1i32: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e32, mf2, ta, ma +; CHECK-NEXT:    vluxei32.v v8, (a0), v8 +; CHECK-NEXT:    ret +entry: +  %a = call <vscale x 1 x float> @llvm.riscv.vluxei.nxv1f32.nxv1i32( +    <vscale x 1 x float> poison, +    ptr %0, +    <vscale x 1 x i32> %1, +    iXLen %2) + +  ret <vscale x 1 x float> %a +} + +declare <vscale x 1 x float> @llvm.riscv.vluxei.mask.nxv1f32.nxv1i32( +  <vscale x 1 x float>, +  ptr, +  <vscale x 1 x i32>, +  <vscale x 1 x i1>, +  iXLen, +  iXLen); + +define <vscale x 1 x float> @intrinsic_vluxei_mask_v_nxv1f32_nxv1f32_nxv1i32(<vscale x 1 x float> %0, ptr %1, <vscale x 1 x i32> %2, <vscale x 1 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vluxei_mask_v_nxv1f32_nxv1f32_nxv1i32: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e32, mf2, ta, mu +; CHECK-NEXT:    vluxei32.v v8, (a0), v9, v0.t +; CHECK-NEXT:    ret +entry: +  %a = call <vscale x 1 x float> @llvm.riscv.vluxei.mask.nxv1f32.nxv1i32( +    <vscale x 1 x float> %0, +    ptr %1, +    <vscale x 1 x i32> %2, +    <vscale x 1 x i1> %3, +    iXLen %4, iXLen 1) + +  ret <vscale x 1 x float> %a +} + +declare <vscale x 2 x float> @llvm.riscv.vluxei.nxv2f32.nxv2i32( +  <vscale x 2 x float>, +  ptr, +  <vscale x 2 x i32>, +  iXLen); + +define <vscale x 2 x float> @intrinsic_vluxei_v_nxv2f32_nxv2f32_nxv2i32(ptr %0, <vscale x 2 x i32> %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vluxei_v_nxv2f32_nxv2f32_nxv2i32: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e32, m1, ta, ma +; CHECK-NEXT:    vluxei32.v v8, (a0), v8 +; CHECK-NEXT:    ret +entry: +  %a = call <vscale x 2 x float> @llvm.riscv.vluxei.nxv2f32.nxv2i32( +    <vscale x 2 x float> poison, +    ptr %0, +    <vscale x 2 x i32> %1, +    iXLen %2) + +  ret <vscale x 2 x float> %a +} + +declare <vscale x 2 x float> @llvm.riscv.vluxei.mask.nxv2f32.nxv2i32( +  <vscale x 2 x float>, +  ptr, +  <vscale x 2 x i32>, +  <vscale x 2 x i1>, +  iXLen, +  iXLen); + +define <vscale x 2 x float> @intrinsic_vluxei_mask_v_nxv2f32_nxv2f32_nxv2i32(<vscale x 2 x float> %0, ptr %1, <vscale x 2 x i32> %2, <vscale x 2 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vluxei_mask_v_nxv2f32_nxv2f32_nxv2i32: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e32, m1, ta, mu +; CHECK-NEXT:    vluxei32.v v8, (a0), v9, v0.t +; CHECK-NEXT:    ret +entry: +  %a = call <vscale x 2 x float> @llvm.riscv.vluxei.mask.nxv2f32.nxv2i32( +    <vscale x 2 x float> %0, +    ptr %1, +    <vscale x 2 x i32> %2, +    <vscale x 2 x i1> %3, +    iXLen %4, iXLen 1) + +  ret <vscale x 2 x float> %a +} + +declare <vscale x 4 x float> @llvm.riscv.vluxei.nxv4f32.nxv4i32( +  <vscale x 4 x float>, +  ptr, +  <vscale x 4 x i32>, +  iXLen); + +define <vscale x 4 x float> @intrinsic_vluxei_v_nxv4f32_nxv4f32_nxv4i32(ptr %0, <vscale x 4 x i32> %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vluxei_v_nxv4f32_nxv4f32_nxv4i32: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e32, m2, ta, ma +; CHECK-NEXT:    vluxei32.v v8, (a0), v8 +; CHECK-NEXT:    ret +entry: +  %a = call <vscale x 4 x float> @llvm.riscv.vluxei.nxv4f32.nxv4i32( +    <vscale x 4 x float> poison, +    ptr %0, +    <vscale x 4 x i32> %1, +    iXLen %2) + +  ret <vscale x 4 x float> %a +} + +declare <vscale x 4 x float> @llvm.riscv.vluxei.mask.nxv4f32.nxv4i32( +  <vscale x 4 x float>, +  ptr, +  <vscale x 4 x i32>, +  <vscale x 4 x i1>, +  iXLen, +  iXLen); + +define <vscale x 4 x float> @intrinsic_vluxei_mask_v_nxv4f32_nxv4f32_nxv4i32(<vscale x 4 x float> %0, ptr %1, <vscale x 4 x i32> %2, <vscale x 4 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vluxei_mask_v_nxv4f32_nxv4f32_nxv4i32: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e32, m2, ta, mu +; CHECK-NEXT:    vluxei32.v v8, (a0), v10, v0.t +; CHECK-NEXT:    ret +entry: +  %a = call <vscale x 4 x float> @llvm.riscv.vluxei.mask.nxv4f32.nxv4i32( +    <vscale x 4 x float> %0, +    ptr %1, +    <vscale x 4 x i32> %2, +    <vscale x 4 x i1> %3, +    iXLen %4, iXLen 1) + +  ret <vscale x 4 x float> %a +} + +declare <vscale x 8 x float> @llvm.riscv.vluxei.nxv8f32.nxv8i32( +  <vscale x 8 x float>, +  ptr, +  <vscale x 8 x i32>, +  iXLen); + +define <vscale x 8 x float> @intrinsic_vluxei_v_nxv8f32_nxv8f32_nxv8i32(ptr %0, <vscale x 8 x i32> %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vluxei_v_nxv8f32_nxv8f32_nxv8i32: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e32, m4, ta, ma +; CHECK-NEXT:    vluxei32.v v8, (a0), v8 +; CHECK-NEXT:    ret +entry: +  %a = call <vscale x 8 x float> @llvm.riscv.vluxei.nxv8f32.nxv8i32( +    <vscale x 8 x float> poison, +    ptr %0, +    <vscale x 8 x i32> %1, +    iXLen %2) + +  ret <vscale x 8 x float> %a +} + +declare <vscale x 8 x float> @llvm.riscv.vluxei.mask.nxv8f32.nxv8i32( +  <vscale x 8 x float>, +  ptr, +  <vscale x 8 x i32>, +  <vscale x 8 x i1>, +  iXLen, +  iXLen); + +define <vscale x 8 x float> @intrinsic_vluxei_mask_v_nxv8f32_nxv8f32_nxv8i32(<vscale x 8 x float> %0, ptr %1, <vscale x 8 x i32> %2, <vscale x 8 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vluxei_mask_v_nxv8f32_nxv8f32_nxv8i32: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e32, m4, ta, mu +; CHECK-NEXT:    vluxei32.v v8, (a0), v12, v0.t +; CHECK-NEXT:    ret +entry: +  %a = call <vscale x 8 x float> @llvm.riscv.vluxei.mask.nxv8f32.nxv8i32( +    <vscale x 8 x float> %0, +    ptr %1, +    <vscale x 8 x i32> %2, +    <vscale x 8 x i1> %3, +    iXLen %4, iXLen 1) + +  ret <vscale x 8 x float> %a +} + +declare <vscale x 16 x float> @llvm.riscv.vluxei.nxv16f32.nxv16i32( +  <vscale x 16 x float>, +  ptr, +  <vscale x 16 x i32>, +  iXLen); + +define <vscale x 16 x float> @intrinsic_vluxei_v_nxv16f32_nxv16f32_nxv16i32(ptr %0, <vscale x 16 x i32> %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vluxei_v_nxv16f32_nxv16f32_nxv16i32: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e32, m8, ta, ma +; CHECK-NEXT:    vluxei32.v v8, (a0), v8 +; CHECK-NEXT:    ret +entry: +  %a = call <vscale x 16 x float> @llvm.riscv.vluxei.nxv16f32.nxv16i32( +    <vscale x 16 x float> poison, +    ptr %0, +    <vscale x 16 x i32> %1, +    iXLen %2) + +  ret <vscale x 16 x float> %a +} + +declare <vscale x 16 x float> @llvm.riscv.vluxei.mask.nxv16f32.nxv16i32( +  <vscale x 16 x float>, +  ptr, +  <vscale x 16 x i32>, +  <vscale x 16 x i1>, +  iXLen, +  iXLen); + +define <vscale x 16 x float> @intrinsic_vluxei_mask_v_nxv16f32_nxv16f32_nxv16i32(<vscale x 16 x float> %0, ptr %1, <vscale x 16 x i32> %2, <vscale x 16 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vluxei_mask_v_nxv16f32_nxv16f32_nxv16i32: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e32, m8, ta, mu +; CHECK-NEXT:    vluxei32.v v8, (a0), v16, v0.t +; CHECK-NEXT:    ret +entry: +  %a = call <vscale x 16 x float> @llvm.riscv.vluxei.mask.nxv16f32.nxv16i32( +    <vscale x 16 x float> %0, +    ptr %1, +    <vscale x 16 x i32> %2, +    <vscale x 16 x i1> %3, +    iXLen %4, iXLen 1) + +  ret <vscale x 16 x float> %a +} + +declare <vscale x 1 x double> @llvm.riscv.vluxei.nxv1f64.nxv1i32( +  <vscale x 1 x double>, +  ptr, +  <vscale x 1 x i32>, +  iXLen); + +define <vscale x 1 x double> @intrinsic_vluxei_v_nxv1f64_nxv1f64_nxv1i32(ptr %0, <vscale x 1 x i32> %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vluxei_v_nxv1f64_nxv1f64_nxv1i32: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e64, m1, ta, ma +; CHECK-NEXT:    vluxei32.v v9, (a0), v8 +; CHECK-NEXT:    vmv.v.v v8, v9 +; CHECK-NEXT:    ret +entry: +  %a = call <vscale x 1 x double> @llvm.riscv.vluxei.nxv1f64.nxv1i32( +    <vscale x 1 x double> poison, +    ptr %0, +    <vscale x 1 x i32> %1, +    iXLen %2) + +  ret <vscale x 1 x double> %a +} + +declare <vscale x 1 x double> @llvm.riscv.vluxei.mask.nxv1f64.nxv1i32( +  <vscale x 1 x double>, +  ptr, +  <vscale x 1 x i32>, +  <vscale x 1 x i1>, +  iXLen, +  iXLen); + +define <vscale x 1 x double> @intrinsic_vluxei_mask_v_nxv1f64_nxv1f64_nxv1i32(<vscale x 1 x double> %0, ptr %1, <vscale x 1 x i32> %2, <vscale x 1 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vluxei_mask_v_nxv1f64_nxv1f64_nxv1i32: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e64, m1, ta, mu +; CHECK-NEXT:    vluxei32.v v8, (a0), v9, v0.t +; CHECK-NEXT:    ret +entry: +  %a = call <vscale x 1 x double> @llvm.riscv.vluxei.mask.nxv1f64.nxv1i32( +    <vscale x 1 x double> %0, +    ptr %1, +    <vscale x 1 x i32> %2, +    <vscale x 1 x i1> %3, +    iXLen %4, iXLen 1) + +  ret <vscale x 1 x double> %a +} + +declare <vscale x 2 x double> @llvm.riscv.vluxei.nxv2f64.nxv2i32( +  <vscale x 2 x double>, +  ptr, +  <vscale x 2 x i32>, +  iXLen); + +define <vscale x 2 x double> @intrinsic_vluxei_v_nxv2f64_nxv2f64_nxv2i32(ptr %0, <vscale x 2 x i32> %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vluxei_v_nxv2f64_nxv2f64_nxv2i32: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetivli zero, 1, e8, m1, ta, ma +; CHECK-NEXT:    vmv1r.v v10, v8 +; CHECK-NEXT:    vsetvli zero, a1, e64, m2, ta, ma +; CHECK-NEXT:    vluxei32.v v8, (a0), v10 +; CHECK-NEXT:    ret +entry: +  %a = call <vscale x 2 x double> @llvm.riscv.vluxei.nxv2f64.nxv2i32( +    <vscale x 2 x double> poison, +    ptr %0, +    <vscale x 2 x i32> %1, +    iXLen %2) + +  ret <vscale x 2 x double> %a +} + +declare <vscale x 2 x double> @llvm.riscv.vluxei.mask.nxv2f64.nxv2i32( +  <vscale x 2 x double>, +  ptr, +  <vscale x 2 x i32>, +  <vscale x 2 x i1>, +  iXLen, +  iXLen); + +define <vscale x 2 x double> @intrinsic_vluxei_mask_v_nxv2f64_nxv2f64_nxv2i32(<vscale x 2 x double> %0, ptr %1, <vscale x 2 x i32> %2, <vscale x 2 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vluxei_mask_v_nxv2f64_nxv2f64_nxv2i32: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e64, m2, ta, mu +; CHECK-NEXT:    vluxei32.v v8, (a0), v10, v0.t +; CHECK-NEXT:    ret +entry: +  %a = call <vscale x 2 x double> @llvm.riscv.vluxei.mask.nxv2f64.nxv2i32( +    <vscale x 2 x double> %0, +    ptr %1, +    <vscale x 2 x i32> %2, +    <vscale x 2 x i1> %3, +    iXLen %4, iXLen 1) + +  ret <vscale x 2 x double> %a +} + +declare <vscale x 4 x double> @llvm.riscv.vluxei.nxv4f64.nxv4i32( +  <vscale x 4 x double>, +  ptr, +  <vscale x 4 x i32>, +  iXLen); + +define <vscale x 4 x double> @intrinsic_vluxei_v_nxv4f64_nxv4f64_nxv4i32(ptr %0, <vscale x 4 x i32> %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vluxei_v_nxv4f64_nxv4f64_nxv4i32: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetivli zero, 1, e8, m1, ta, ma +; CHECK-NEXT:    vmv2r.v v12, v8 +; CHECK-NEXT:    vsetvli zero, a1, e64, m4, ta, ma +; CHECK-NEXT:    vluxei32.v v8, (a0), v12 +; CHECK-NEXT:    ret +entry: +  %a = call <vscale x 4 x double> @llvm.riscv.vluxei.nxv4f64.nxv4i32( +    <vscale x 4 x double> poison, +    ptr %0, +    <vscale x 4 x i32> %1, +    iXLen %2) + +  ret <vscale x 4 x double> %a +} + +declare <vscale x 4 x double> @llvm.riscv.vluxei.mask.nxv4f64.nxv4i32( +  <vscale x 4 x double>, +  ptr, +  <vscale x 4 x i32>, +  <vscale x 4 x i1>, +  iXLen, +  iXLen); + +define <vscale x 4 x double> @intrinsic_vluxei_mask_v_nxv4f64_nxv4f64_nxv4i32(<vscale x 4 x double> %0, ptr %1, <vscale x 4 x i32> %2, <vscale x 4 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vluxei_mask_v_nxv4f64_nxv4f64_nxv4i32: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e64, m4, ta, mu +; CHECK-NEXT:    vluxei32.v v8, (a0), v12, v0.t +; CHECK-NEXT:    ret +entry: +  %a = call <vscale x 4 x double> @llvm.riscv.vluxei.mask.nxv4f64.nxv4i32( +    <vscale x 4 x double> %0, +    ptr %1, +    <vscale x 4 x i32> %2, +    <vscale x 4 x i1> %3, +    iXLen %4, iXLen 1) + +  ret <vscale x 4 x double> %a +} + +declare <vscale x 8 x double> @llvm.riscv.vluxei.nxv8f64.nxv8i32( +  <vscale x 8 x double>, +  ptr, +  <vscale x 8 x i32>, +  iXLen); + +define <vscale x 8 x double> @intrinsic_vluxei_v_nxv8f64_nxv8f64_nxv8i32(ptr %0, <vscale x 8 x i32> %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vluxei_v_nxv8f64_nxv8f64_nxv8i32: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetivli zero, 1, e8, m1, ta, ma +; CHECK-NEXT:    vmv4r.v v16, v8 +; CHECK-NEXT:    vsetvli zero, a1, e64, m8, ta, ma +; CHECK-NEXT:    vluxei32.v v8, (a0), v16 +; CHECK-NEXT:    ret +entry: +  %a = call <vscale x 8 x double> @llvm.riscv.vluxei.nxv8f64.nxv8i32( +    <vscale x 8 x double> poison, +    ptr %0, +    <vscale x 8 x i32> %1, +    iXLen %2) + +  ret <vscale x 8 x double> %a +} + +declare <vscale x 8 x double> @llvm.riscv.vluxei.mask.nxv8f64.nxv8i32( +  <vscale x 8 x double>, +  ptr, +  <vscale x 8 x i32>, +  <vscale x 8 x i1>, +  iXLen, +  iXLen); + +define <vscale x 8 x double> @intrinsic_vluxei_mask_v_nxv8f64_nxv8f64_nxv8i32(<vscale x 8 x double> %0, ptr %1, <vscale x 8 x i32> %2, <vscale x 8 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vluxei_mask_v_nxv8f64_nxv8f64_nxv8i32: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e64, m8, ta, mu +; CHECK-NEXT:    vluxei32.v v8, (a0), v16, v0.t +; CHECK-NEXT:    ret +entry: +  %a = call <vscale x 8 x double> @llvm.riscv.vluxei.mask.nxv8f64.nxv8i32( +    <vscale x 8 x double> %0, +    ptr %1, +    <vscale x 8 x i32> %2, +    <vscale x 8 x i1> %3, +    iXLen %4, iXLen 1) + +  ret <vscale x 8 x double> %a +} + +declare <vscale x 1 x i8> @llvm.riscv.vluxei.nxv1i8.nxv1i16( +  <vscale x 1 x i8>, +  ptr, +  <vscale x 1 x i16>, +  iXLen); + +define <vscale x 1 x i8> @intrinsic_vluxei_v_nxv1i8_nxv1i8_nxv1i16(ptr %0, <vscale x 1 x i16> %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vluxei_v_nxv1i8_nxv1i8_nxv1i16: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e8, mf8, ta, ma +; CHECK-NEXT:    vluxei16.v v9, (a0), v8 +; CHECK-NEXT:    vmv1r.v v8, v9 +; CHECK-NEXT:    ret +entry: +  %a = call <vscale x 1 x i8> @llvm.riscv.vluxei.nxv1i8.nxv1i16( +    <vscale x 1 x i8> poison, +    ptr %0, +    <vscale x 1 x i16> %1, +    iXLen %2) + +  ret <vscale x 1 x i8> %a +} + +declare <vscale x 1 x i8> @llvm.riscv.vluxei.mask.nxv1i8.nxv1i16( +  <vscale x 1 x i8>, +  ptr, +  <vscale x 1 x i16>, +  <vscale x 1 x i1>, +  iXLen, +  iXLen); + +define <vscale x 1 x i8> @intrinsic_vluxei_mask_v_nxv1i8_nxv1i8_nxv1i16(<vscale x 1 x i8> %0, ptr %1, <vscale x 1 x i16> %2, <vscale x 1 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vluxei_mask_v_nxv1i8_nxv1i8_nxv1i16: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e8, mf8, ta, mu +; CHECK-NEXT:    vluxei16.v v8, (a0), v9, v0.t +; CHECK-NEXT:    ret +entry: +  %a = call <vscale x 1 x i8> @llvm.riscv.vluxei.mask.nxv1i8.nxv1i16( +    <vscale x 1 x i8> %0, +    ptr %1, +    <vscale x 1 x i16> %2, +    <vscale x 1 x i1> %3, +    iXLen %4, iXLen 1) + +  ret <vscale x 1 x i8> %a +} + +declare <vscale x 2 x i8> @llvm.riscv.vluxei.nxv2i8.nxv2i16( +  <vscale x 2 x i8>, +  ptr, +  <vscale x 2 x i16>, +  iXLen); + +define <vscale x 2 x i8> @intrinsic_vluxei_v_nxv2i8_nxv2i8_nxv2i16(ptr %0, <vscale x 2 x i16> %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vluxei_v_nxv2i8_nxv2i8_nxv2i16: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e8, mf4, ta, ma +; CHECK-NEXT:    vluxei16.v v9, (a0), v8 +; CHECK-NEXT:    vmv1r.v v8, v9 +; CHECK-NEXT:    ret +entry: +  %a = call <vscale x 2 x i8> @llvm.riscv.vluxei.nxv2i8.nxv2i16( +    <vscale x 2 x i8> poison, +    ptr %0, +    <vscale x 2 x i16> %1, +    iXLen %2) + +  ret <vscale x 2 x i8> %a +} + +declare <vscale x 2 x i8> @llvm.riscv.vluxei.mask.nxv2i8.nxv2i16( +  <vscale x 2 x i8>, +  ptr, +  <vscale x 2 x i16>, +  <vscale x 2 x i1>, +  iXLen, +  iXLen); + +define <vscale x 2 x i8> @intrinsic_vluxei_mask_v_nxv2i8_nxv2i8_nxv2i16(<vscale x 2 x i8> %0, ptr %1, <vscale x 2 x i16> %2, <vscale x 2 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vluxei_mask_v_nxv2i8_nxv2i8_nxv2i16: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e8, mf4, ta, mu +; CHECK-NEXT:    vluxei16.v v8, (a0), v9, v0.t +; CHECK-NEXT:    ret +entry: +  %a = call <vscale x 2 x i8> @llvm.riscv.vluxei.mask.nxv2i8.nxv2i16( +    <vscale x 2 x i8> %0, +    ptr %1, +    <vscale x 2 x i16> %2, +    <vscale x 2 x i1> %3, +    iXLen %4, iXLen 1) + +  ret <vscale x 2 x i8> %a +} + +declare <vscale x 4 x i8> @llvm.riscv.vluxei.nxv4i8.nxv4i16( +  <vscale x 4 x i8>, +  ptr, +  <vscale x 4 x i16>, +  iXLen); + +define <vscale x 4 x i8> @intrinsic_vluxei_v_nxv4i8_nxv4i8_nxv4i16(ptr %0, <vscale x 4 x i16> %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vluxei_v_nxv4i8_nxv4i8_nxv4i16: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e8, mf2, ta, ma +; CHECK-NEXT:    vluxei16.v v9, (a0), v8 +; CHECK-NEXT:    vmv1r.v v8, v9 +; CHECK-NEXT:    ret +entry: +  %a = call <vscale x 4 x i8> @llvm.riscv.vluxei.nxv4i8.nxv4i16( +    <vscale x 4 x i8> poison, +    ptr %0, +    <vscale x 4 x i16> %1, +    iXLen %2) + +  ret <vscale x 4 x i8> %a +} + +declare <vscale x 4 x i8> @llvm.riscv.vluxei.mask.nxv4i8.nxv4i16( +  <vscale x 4 x i8>, +  ptr, +  <vscale x 4 x i16>, +  <vscale x 4 x i1>, +  iXLen, +  iXLen); + +define <vscale x 4 x i8> @intrinsic_vluxei_mask_v_nxv4i8_nxv4i8_nxv4i16(<vscale x 4 x i8> %0, ptr %1, <vscale x 4 x i16> %2, <vscale x 4 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vluxei_mask_v_nxv4i8_nxv4i8_nxv4i16: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e8, mf2, ta, mu +; CHECK-NEXT:    vluxei16.v v8, (a0), v9, v0.t +; CHECK-NEXT:    ret +entry: +  %a = call <vscale x 4 x i8> @llvm.riscv.vluxei.mask.nxv4i8.nxv4i16( +    <vscale x 4 x i8> %0, +    ptr %1, +    <vscale x 4 x i16> %2, +    <vscale x 4 x i1> %3, +    iXLen %4, iXLen 1) + +  ret <vscale x 4 x i8> %a +} + +declare <vscale x 8 x i8> @llvm.riscv.vluxei.nxv8i8.nxv8i16( +  <vscale x 8 x i8>, +  ptr, +  <vscale x 8 x i16>, +  iXLen); + +define <vscale x 8 x i8> @intrinsic_vluxei_v_nxv8i8_nxv8i8_nxv8i16(ptr %0, <vscale x 8 x i16> %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vluxei_v_nxv8i8_nxv8i8_nxv8i16: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e8, m1, ta, ma +; CHECK-NEXT:    vluxei16.v v10, (a0), v8 +; CHECK-NEXT:    vmv.v.v v8, v10 +; CHECK-NEXT:    ret +entry: +  %a = call <vscale x 8 x i8> @llvm.riscv.vluxei.nxv8i8.nxv8i16( +    <vscale x 8 x i8> poison, +    ptr %0, +    <vscale x 8 x i16> %1, +    iXLen %2) + +  ret <vscale x 8 x i8> %a +} + +declare <vscale x 8 x i8> @llvm.riscv.vluxei.mask.nxv8i8.nxv8i16( +  <vscale x 8 x i8>, +  ptr, +  <vscale x 8 x i16>, +  <vscale x 8 x i1>, +  iXLen, +  iXLen); + +define <vscale x 8 x i8> @intrinsic_vluxei_mask_v_nxv8i8_nxv8i8_nxv8i16(<vscale x 8 x i8> %0, ptr %1, <vscale x 8 x i16> %2, <vscale x 8 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vluxei_mask_v_nxv8i8_nxv8i8_nxv8i16: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e8, m1, ta, mu +; CHECK-NEXT:    vluxei16.v v8, (a0), v10, v0.t +; CHECK-NEXT:    ret +entry: +  %a = call <vscale x 8 x i8> @llvm.riscv.vluxei.mask.nxv8i8.nxv8i16( +    <vscale x 8 x i8> %0, +    ptr %1, +    <vscale x 8 x i16> %2, +    <vscale x 8 x i1> %3, +    iXLen %4, iXLen 1) + +  ret <vscale x 8 x i8> %a +} + +declare <vscale x 16 x i8> @llvm.riscv.vluxei.nxv16i8.nxv16i16( +  <vscale x 16 x i8>, +  ptr, +  <vscale x 16 x i16>, +  iXLen); + +define <vscale x 16 x i8> @intrinsic_vluxei_v_nxv16i8_nxv16i8_nxv16i16(ptr %0, <vscale x 16 x i16> %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vluxei_v_nxv16i8_nxv16i8_nxv16i16: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e8, m2, ta, ma +; CHECK-NEXT:    vluxei16.v v12, (a0), v8 +; CHECK-NEXT:    vmv.v.v v8, v12 +; CHECK-NEXT:    ret +entry: +  %a = call <vscale x 16 x i8> @llvm.riscv.vluxei.nxv16i8.nxv16i16( +    <vscale x 16 x i8> poison, +    ptr %0, +    <vscale x 16 x i16> %1, +    iXLen %2) + +  ret <vscale x 16 x i8> %a +} + +declare <vscale x 16 x i8> @llvm.riscv.vluxei.mask.nxv16i8.nxv16i16( +  <vscale x 16 x i8>, +  ptr, +  <vscale x 16 x i16>, +  <vscale x 16 x i1>, +  iXLen, +  iXLen); + +define <vscale x 16 x i8> @intrinsic_vluxei_mask_v_nxv16i8_nxv16i8_nxv16i16(<vscale x 16 x i8> %0, ptr %1, <vscale x 16 x i16> %2, <vscale x 16 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vluxei_mask_v_nxv16i8_nxv16i8_nxv16i16: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e8, m2, ta, mu +; CHECK-NEXT:    vluxei16.v v8, (a0), v12, v0.t +; CHECK-NEXT:    ret +entry: +  %a = call <vscale x 16 x i8> @llvm.riscv.vluxei.mask.nxv16i8.nxv16i16( +    <vscale x 16 x i8> %0, +    ptr %1, +    <vscale x 16 x i16> %2, +    <vscale x 16 x i1> %3, +    iXLen %4, iXLen 1) + +  ret <vscale x 16 x i8> %a +} + +declare <vscale x 32 x i8> @llvm.riscv.vluxei.nxv32i8.nxv32i16( +  <vscale x 32 x i8>, +  ptr, +  <vscale x 32 x i16>, +  iXLen); + +define <vscale x 32 x i8> @intrinsic_vluxei_v_nxv32i8_nxv32i8_nxv32i16(ptr %0, <vscale x 32 x i16> %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vluxei_v_nxv32i8_nxv32i8_nxv32i16: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e8, m4, ta, ma +; CHECK-NEXT:    vluxei16.v v16, (a0), v8 +; CHECK-NEXT:    vmv.v.v v8, v16 +; CHECK-NEXT:    ret +entry: +  %a = call <vscale x 32 x i8> @llvm.riscv.vluxei.nxv32i8.nxv32i16( +    <vscale x 32 x i8> poison, +    ptr %0, +    <vscale x 32 x i16> %1, +    iXLen %2) + +  ret <vscale x 32 x i8> %a +} + +declare <vscale x 32 x i8> @llvm.riscv.vluxei.mask.nxv32i8.nxv32i16( +  <vscale x 32 x i8>, +  ptr, +  <vscale x 32 x i16>, +  <vscale x 32 x i1>, +  iXLen, +  iXLen); + +define <vscale x 32 x i8> @intrinsic_vluxei_mask_v_nxv32i8_nxv32i8_nxv32i16(<vscale x 32 x i8> %0, ptr %1, <vscale x 32 x i16> %2, <vscale x 32 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vluxei_mask_v_nxv32i8_nxv32i8_nxv32i16: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e8, m4, ta, mu +; CHECK-NEXT:    vluxei16.v v8, (a0), v16, v0.t +; CHECK-NEXT:    ret +entry: +  %a = call <vscale x 32 x i8> @llvm.riscv.vluxei.mask.nxv32i8.nxv32i16( +    <vscale x 32 x i8> %0, +    ptr %1, +    <vscale x 32 x i16> %2, +    <vscale x 32 x i1> %3, +    iXLen %4, iXLen 1) + +  ret <vscale x 32 x i8> %a +} + +declare <vscale x 1 x i16> @llvm.riscv.vluxei.nxv1i16.nxv1i16( +  <vscale x 1 x i16>, +  ptr, +  <vscale x 1 x i16>, +  iXLen); + +define <vscale x 1 x i16> @intrinsic_vluxei_v_nxv1i16_nxv1i16_nxv1i16(ptr %0, <vscale x 1 x i16> %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vluxei_v_nxv1i16_nxv1i16_nxv1i16: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e16, mf4, ta, ma +; CHECK-NEXT:    vluxei16.v v8, (a0), v8 +; CHECK-NEXT:    ret +entry: +  %a = call <vscale x 1 x i16> @llvm.riscv.vluxei.nxv1i16.nxv1i16( +    <vscale x 1 x i16> poison, +    ptr %0, +    <vscale x 1 x i16> %1, +    iXLen %2) + +  ret <vscale x 1 x i16> %a +} + +declare <vscale x 1 x i16> @llvm.riscv.vluxei.mask.nxv1i16.nxv1i16( +  <vscale x 1 x i16>, +  ptr, +  <vscale x 1 x i16>, +  <vscale x 1 x i1>, +  iXLen, +  iXLen); + +define <vscale x 1 x i16> @intrinsic_vluxei_mask_v_nxv1i16_nxv1i16_nxv1i16(<vscale x 1 x i16> %0, ptr %1, <vscale x 1 x i16> %2, <vscale x 1 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vluxei_mask_v_nxv1i16_nxv1i16_nxv1i16: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e16, mf4, ta, mu +; CHECK-NEXT:    vluxei16.v v8, (a0), v9, v0.t +; CHECK-NEXT:    ret +entry: +  %a = call <vscale x 1 x i16> @llvm.riscv.vluxei.mask.nxv1i16.nxv1i16( +    <vscale x 1 x i16> %0, +    ptr %1, +    <vscale x 1 x i16> %2, +    <vscale x 1 x i1> %3, +    iXLen %4, iXLen 1) + +  ret <vscale x 1 x i16> %a +} + +declare <vscale x 2 x i16> @llvm.riscv.vluxei.nxv2i16.nxv2i16( +  <vscale x 2 x i16>, +  ptr, +  <vscale x 2 x i16>, +  iXLen); + +define <vscale x 2 x i16> @intrinsic_vluxei_v_nxv2i16_nxv2i16_nxv2i16(ptr %0, <vscale x 2 x i16> %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vluxei_v_nxv2i16_nxv2i16_nxv2i16: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e16, mf2, ta, ma +; CHECK-NEXT:    vluxei16.v v8, (a0), v8 +; CHECK-NEXT:    ret +entry: +  %a = call <vscale x 2 x i16> @llvm.riscv.vluxei.nxv2i16.nxv2i16( +    <vscale x 2 x i16> poison, +    ptr %0, +    <vscale x 2 x i16> %1, +    iXLen %2) + +  ret <vscale x 2 x i16> %a +} + +declare <vscale x 2 x i16> @llvm.riscv.vluxei.mask.nxv2i16.nxv2i16( +  <vscale x 2 x i16>, +  ptr, +  <vscale x 2 x i16>, +  <vscale x 2 x i1>, +  iXLen, +  iXLen); + +define <vscale x 2 x i16> @intrinsic_vluxei_mask_v_nxv2i16_nxv2i16_nxv2i16(<vscale x 2 x i16> %0, ptr %1, <vscale x 2 x i16> %2, <vscale x 2 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vluxei_mask_v_nxv2i16_nxv2i16_nxv2i16: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e16, mf2, ta, mu +; CHECK-NEXT:    vluxei16.v v8, (a0), v9, v0.t +; CHECK-NEXT:    ret +entry: +  %a = call <vscale x 2 x i16> @llvm.riscv.vluxei.mask.nxv2i16.nxv2i16( +    <vscale x 2 x i16> %0, +    ptr %1, +    <vscale x 2 x i16> %2, +    <vscale x 2 x i1> %3, +    iXLen %4, iXLen 1) + +  ret <vscale x 2 x i16> %a +} + +declare <vscale x 4 x i16> @llvm.riscv.vluxei.nxv4i16.nxv4i16( +  <vscale x 4 x i16>, +  ptr, +  <vscale x 4 x i16>, +  iXLen); + +define <vscale x 4 x i16> @intrinsic_vluxei_v_nxv4i16_nxv4i16_nxv4i16(ptr %0, <vscale x 4 x i16> %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vluxei_v_nxv4i16_nxv4i16_nxv4i16: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e16, m1, ta, ma +; CHECK-NEXT:    vluxei16.v v8, (a0), v8 +; CHECK-NEXT:    ret +entry: +  %a = call <vscale x 4 x i16> @llvm.riscv.vluxei.nxv4i16.nxv4i16( +    <vscale x 4 x i16> poison, +    ptr %0, +    <vscale x 4 x i16> %1, +    iXLen %2) + +  ret <vscale x 4 x i16> %a +} + +declare <vscale x 4 x i16> @llvm.riscv.vluxei.mask.nxv4i16.nxv4i16( +  <vscale x 4 x i16>, +  ptr, +  <vscale x 4 x i16>, +  <vscale x 4 x i1>, +  iXLen, +  iXLen); + +define <vscale x 4 x i16> @intrinsic_vluxei_mask_v_nxv4i16_nxv4i16_nxv4i16(<vscale x 4 x i16> %0, ptr %1, <vscale x 4 x i16> %2, <vscale x 4 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vluxei_mask_v_nxv4i16_nxv4i16_nxv4i16: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e16, m1, ta, mu +; CHECK-NEXT:    vluxei16.v v8, (a0), v9, v0.t +; CHECK-NEXT:    ret +entry: +  %a = call <vscale x 4 x i16> @llvm.riscv.vluxei.mask.nxv4i16.nxv4i16( +    <vscale x 4 x i16> %0, +    ptr %1, +    <vscale x 4 x i16> %2, +    <vscale x 4 x i1> %3, +    iXLen %4, iXLen 1) + +  ret <vscale x 4 x i16> %a +} + +declare <vscale x 8 x i16> @llvm.riscv.vluxei.nxv8i16.nxv8i16( +  <vscale x 8 x i16>, +  ptr, +  <vscale x 8 x i16>, +  iXLen); + +define <vscale x 8 x i16> @intrinsic_vluxei_v_nxv8i16_nxv8i16_nxv8i16(ptr %0, <vscale x 8 x i16> %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vluxei_v_nxv8i16_nxv8i16_nxv8i16: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e16, m2, ta, ma +; CHECK-NEXT:    vluxei16.v v8, (a0), v8 +; CHECK-NEXT:    ret +entry: +  %a = call <vscale x 8 x i16> @llvm.riscv.vluxei.nxv8i16.nxv8i16( +    <vscale x 8 x i16> poison, +    ptr %0, +    <vscale x 8 x i16> %1, +    iXLen %2) + +  ret <vscale x 8 x i16> %a +} + +declare <vscale x 8 x i16> @llvm.riscv.vluxei.mask.nxv8i16.nxv8i16( +  <vscale x 8 x i16>, +  ptr, +  <vscale x 8 x i16>, +  <vscale x 8 x i1>, +  iXLen, +  iXLen); + +define <vscale x 8 x i16> @intrinsic_vluxei_mask_v_nxv8i16_nxv8i16_nxv8i16(<vscale x 8 x i16> %0, ptr %1, <vscale x 8 x i16> %2, <vscale x 8 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vluxei_mask_v_nxv8i16_nxv8i16_nxv8i16: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e16, m2, ta, mu +; CHECK-NEXT:    vluxei16.v v8, (a0), v10, v0.t +; CHECK-NEXT:    ret +entry: +  %a = call <vscale x 8 x i16> @llvm.riscv.vluxei.mask.nxv8i16.nxv8i16( +    <vscale x 8 x i16> %0, +    ptr %1, +    <vscale x 8 x i16> %2, +    <vscale x 8 x i1> %3, +    iXLen %4, iXLen 1) + +  ret <vscale x 8 x i16> %a +} + +declare <vscale x 16 x i16> @llvm.riscv.vluxei.nxv16i16.nxv16i16( +  <vscale x 16 x i16>, +  ptr, +  <vscale x 16 x i16>, +  iXLen); + +define <vscale x 16 x i16> @intrinsic_vluxei_v_nxv16i16_nxv16i16_nxv16i16(ptr %0, <vscale x 16 x i16> %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vluxei_v_nxv16i16_nxv16i16_nxv16i16: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e16, m4, ta, ma +; CHECK-NEXT:    vluxei16.v v8, (a0), v8 +; CHECK-NEXT:    ret +entry: +  %a = call <vscale x 16 x i16> @llvm.riscv.vluxei.nxv16i16.nxv16i16( +    <vscale x 16 x i16> poison, +    ptr %0, +    <vscale x 16 x i16> %1, +    iXLen %2) + +  ret <vscale x 16 x i16> %a +} + +declare <vscale x 16 x i16> @llvm.riscv.vluxei.mask.nxv16i16.nxv16i16( +  <vscale x 16 x i16>, +  ptr, +  <vscale x 16 x i16>, +  <vscale x 16 x i1>, +  iXLen, +  iXLen); + +define <vscale x 16 x i16> @intrinsic_vluxei_mask_v_nxv16i16_nxv16i16_nxv16i16(<vscale x 16 x i16> %0, ptr %1, <vscale x 16 x i16> %2, <vscale x 16 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vluxei_mask_v_nxv16i16_nxv16i16_nxv16i16: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e16, m4, ta, mu +; CHECK-NEXT:    vluxei16.v v8, (a0), v12, v0.t +; CHECK-NEXT:    ret +entry: +  %a = call <vscale x 16 x i16> @llvm.riscv.vluxei.mask.nxv16i16.nxv16i16( +    <vscale x 16 x i16> %0, +    ptr %1, +    <vscale x 16 x i16> %2, +    <vscale x 16 x i1> %3, +    iXLen %4, iXLen 1) + +  ret <vscale x 16 x i16> %a +} + +declare <vscale x 32 x i16> @llvm.riscv.vluxei.nxv32i16.nxv32i16( +  <vscale x 32 x i16>, +  ptr, +  <vscale x 32 x i16>, +  iXLen); + +define <vscale x 32 x i16> @intrinsic_vluxei_v_nxv32i16_nxv32i16_nxv32i16(ptr %0, <vscale x 32 x i16> %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vluxei_v_nxv32i16_nxv32i16_nxv32i16: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e16, m8, ta, ma +; CHECK-NEXT:    vluxei16.v v8, (a0), v8 +; CHECK-NEXT:    ret +entry: +  %a = call <vscale x 32 x i16> @llvm.riscv.vluxei.nxv32i16.nxv32i16( +    <vscale x 32 x i16> poison, +    ptr %0, +    <vscale x 32 x i16> %1, +    iXLen %2) + +  ret <vscale x 32 x i16> %a +} + +declare <vscale x 32 x i16> @llvm.riscv.vluxei.mask.nxv32i16.nxv32i16( +  <vscale x 32 x i16>, +  ptr, +  <vscale x 32 x i16>, +  <vscale x 32 x i1>, +  iXLen, +  iXLen); + +define <vscale x 32 x i16> @intrinsic_vluxei_mask_v_nxv32i16_nxv32i16_nxv32i16(<vscale x 32 x i16> %0, ptr %1, <vscale x 32 x i16> %2, <vscale x 32 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vluxei_mask_v_nxv32i16_nxv32i16_nxv32i16: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e16, m8, ta, mu +; CHECK-NEXT:    vluxei16.v v8, (a0), v16, v0.t +; CHECK-NEXT:    ret +entry: +  %a = call <vscale x 32 x i16> @llvm.riscv.vluxei.mask.nxv32i16.nxv32i16( +    <vscale x 32 x i16> %0, +    ptr %1, +    <vscale x 32 x i16> %2, +    <vscale x 32 x i1> %3, +    iXLen %4, iXLen 1) + +  ret <vscale x 32 x i16> %a +} + +declare <vscale x 1 x i32> @llvm.riscv.vluxei.nxv1i32.nxv1i16( +  <vscale x 1 x i32>, +  ptr, +  <vscale x 1 x i16>, +  iXLen); + +define <vscale x 1 x i32> @intrinsic_vluxei_v_nxv1i32_nxv1i32_nxv1i16(ptr %0, <vscale x 1 x i16> %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vluxei_v_nxv1i32_nxv1i32_nxv1i16: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e32, mf2, ta, ma +; CHECK-NEXT:    vluxei16.v v9, (a0), v8 +; CHECK-NEXT:    vmv1r.v v8, v9 +; CHECK-NEXT:    ret +entry: +  %a = call <vscale x 1 x i32> @llvm.riscv.vluxei.nxv1i32.nxv1i16( +    <vscale x 1 x i32> poison, +    ptr %0, +    <vscale x 1 x i16> %1, +    iXLen %2) + +  ret <vscale x 1 x i32> %a +} + +declare <vscale x 1 x i32> @llvm.riscv.vluxei.mask.nxv1i32.nxv1i16( +  <vscale x 1 x i32>, +  ptr, +  <vscale x 1 x i16>, +  <vscale x 1 x i1>, +  iXLen, +  iXLen); + +define <vscale x 1 x i32> @intrinsic_vluxei_mask_v_nxv1i32_nxv1i32_nxv1i16(<vscale x 1 x i32> %0, ptr %1, <vscale x 1 x i16> %2, <vscale x 1 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vluxei_mask_v_nxv1i32_nxv1i32_nxv1i16: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e32, mf2, ta, mu +; CHECK-NEXT:    vluxei16.v v8, (a0), v9, v0.t +; CHECK-NEXT:    ret +entry: +  %a = call <vscale x 1 x i32> @llvm.riscv.vluxei.mask.nxv1i32.nxv1i16( +    <vscale x 1 x i32> %0, +    ptr %1, +    <vscale x 1 x i16> %2, +    <vscale x 1 x i1> %3, +    iXLen %4, iXLen 1) + +  ret <vscale x 1 x i32> %a +} + +declare <vscale x 2 x i32> @llvm.riscv.vluxei.nxv2i32.nxv2i16( +  <vscale x 2 x i32>, +  ptr, +  <vscale x 2 x i16>, +  iXLen); + +define <vscale x 2 x i32> @intrinsic_vluxei_v_nxv2i32_nxv2i32_nxv2i16(ptr %0, <vscale x 2 x i16> %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vluxei_v_nxv2i32_nxv2i32_nxv2i16: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e32, m1, ta, ma +; CHECK-NEXT:    vluxei16.v v9, (a0), v8 +; CHECK-NEXT:    vmv.v.v v8, v9 +; CHECK-NEXT:    ret +entry: +  %a = call <vscale x 2 x i32> @llvm.riscv.vluxei.nxv2i32.nxv2i16( +    <vscale x 2 x i32> poison, +    ptr %0, +    <vscale x 2 x i16> %1, +    iXLen %2) + +  ret <vscale x 2 x i32> %a +} + +declare <vscale x 2 x i32> @llvm.riscv.vluxei.mask.nxv2i32.nxv2i16( +  <vscale x 2 x i32>, +  ptr, +  <vscale x 2 x i16>, +  <vscale x 2 x i1>, +  iXLen, +  iXLen); + +define <vscale x 2 x i32> @intrinsic_vluxei_mask_v_nxv2i32_nxv2i32_nxv2i16(<vscale x 2 x i32> %0, ptr %1, <vscale x 2 x i16> %2, <vscale x 2 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vluxei_mask_v_nxv2i32_nxv2i32_nxv2i16: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e32, m1, ta, mu +; CHECK-NEXT:    vluxei16.v v8, (a0), v9, v0.t +; CHECK-NEXT:    ret +entry: +  %a = call <vscale x 2 x i32> @llvm.riscv.vluxei.mask.nxv2i32.nxv2i16( +    <vscale x 2 x i32> %0, +    ptr %1, +    <vscale x 2 x i16> %2, +    <vscale x 2 x i1> %3, +    iXLen %4, iXLen 1) + +  ret <vscale x 2 x i32> %a +} + +declare <vscale x 4 x i32> @llvm.riscv.vluxei.nxv4i32.nxv4i16( +  <vscale x 4 x i32>, +  ptr, +  <vscale x 4 x i16>, +  iXLen); + +define <vscale x 4 x i32> @intrinsic_vluxei_v_nxv4i32_nxv4i32_nxv4i16(ptr %0, <vscale x 4 x i16> %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vluxei_v_nxv4i32_nxv4i32_nxv4i16: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetivli zero, 1, e8, m1, ta, ma +; CHECK-NEXT:    vmv1r.v v10, v8 +; CHECK-NEXT:    vsetvli zero, a1, e32, m2, ta, ma +; CHECK-NEXT:    vluxei16.v v8, (a0), v10 +; CHECK-NEXT:    ret +entry: +  %a = call <vscale x 4 x i32> @llvm.riscv.vluxei.nxv4i32.nxv4i16( +    <vscale x 4 x i32> poison, +    ptr %0, +    <vscale x 4 x i16> %1, +    iXLen %2) + +  ret <vscale x 4 x i32> %a +} + +declare <vscale x 4 x i32> @llvm.riscv.vluxei.mask.nxv4i32.nxv4i16( +  <vscale x 4 x i32>, +  ptr, +  <vscale x 4 x i16>, +  <vscale x 4 x i1>, +  iXLen, +  iXLen); + +define <vscale x 4 x i32> @intrinsic_vluxei_mask_v_nxv4i32_nxv4i32_nxv4i16(<vscale x 4 x i32> %0, ptr %1, <vscale x 4 x i16> %2, <vscale x 4 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vluxei_mask_v_nxv4i32_nxv4i32_nxv4i16: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e32, m2, ta, mu +; CHECK-NEXT:    vluxei16.v v8, (a0), v10, v0.t +; CHECK-NEXT:    ret +entry: +  %a = call <vscale x 4 x i32> @llvm.riscv.vluxei.mask.nxv4i32.nxv4i16( +    <vscale x 4 x i32> %0, +    ptr %1, +    <vscale x 4 x i16> %2, +    <vscale x 4 x i1> %3, +    iXLen %4, iXLen 1) + +  ret <vscale x 4 x i32> %a +} + +declare <vscale x 8 x i32> @llvm.riscv.vluxei.nxv8i32.nxv8i16( +  <vscale x 8 x i32>, +  ptr, +  <vscale x 8 x i16>, +  iXLen); + +define <vscale x 8 x i32> @intrinsic_vluxei_v_nxv8i32_nxv8i32_nxv8i16(ptr %0, <vscale x 8 x i16> %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vluxei_v_nxv8i32_nxv8i32_nxv8i16: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetivli zero, 1, e8, m1, ta, ma +; CHECK-NEXT:    vmv2r.v v12, v8 +; CHECK-NEXT:    vsetvli zero, a1, e32, m4, ta, ma +; CHECK-NEXT:    vluxei16.v v8, (a0), v12 +; CHECK-NEXT:    ret +entry: +  %a = call <vscale x 8 x i32> @llvm.riscv.vluxei.nxv8i32.nxv8i16( +    <vscale x 8 x i32> poison, +    ptr %0, +    <vscale x 8 x i16> %1, +    iXLen %2) + +  ret <vscale x 8 x i32> %a +} + +declare <vscale x 8 x i32> @llvm.riscv.vluxei.mask.nxv8i32.nxv8i16( +  <vscale x 8 x i32>, +  ptr, +  <vscale x 8 x i16>, +  <vscale x 8 x i1>, +  iXLen, +  iXLen); + +define <vscale x 8 x i32> @intrinsic_vluxei_mask_v_nxv8i32_nxv8i32_nxv8i16(<vscale x 8 x i32> %0, ptr %1, <vscale x 8 x i16> %2, <vscale x 8 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vluxei_mask_v_nxv8i32_nxv8i32_nxv8i16: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e32, m4, ta, mu +; CHECK-NEXT:    vluxei16.v v8, (a0), v12, v0.t +; CHECK-NEXT:    ret +entry: +  %a = call <vscale x 8 x i32> @llvm.riscv.vluxei.mask.nxv8i32.nxv8i16( +    <vscale x 8 x i32> %0, +    ptr %1, +    <vscale x 8 x i16> %2, +    <vscale x 8 x i1> %3, +    iXLen %4, iXLen 1) + +  ret <vscale x 8 x i32> %a +} + +declare <vscale x 16 x i32> @llvm.riscv.vluxei.nxv16i32.nxv16i16( +  <vscale x 16 x i32>, +  ptr, +  <vscale x 16 x i16>, +  iXLen); + +define <vscale x 16 x i32> @intrinsic_vluxei_v_nxv16i32_nxv16i32_nxv16i16(ptr %0, <vscale x 16 x i16> %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vluxei_v_nxv16i32_nxv16i32_nxv16i16: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetivli zero, 1, e8, m1, ta, ma +; CHECK-NEXT:    vmv4r.v v16, v8 +; CHECK-NEXT:    vsetvli zero, a1, e32, m8, ta, ma +; CHECK-NEXT:    vluxei16.v v8, (a0), v16 +; CHECK-NEXT:    ret +entry: +  %a = call <vscale x 16 x i32> @llvm.riscv.vluxei.nxv16i32.nxv16i16( +    <vscale x 16 x i32> poison, +    ptr %0, +    <vscale x 16 x i16> %1, +    iXLen %2) + +  ret <vscale x 16 x i32> %a +} + +declare <vscale x 16 x i32> @llvm.riscv.vluxei.mask.nxv16i32.nxv16i16( +  <vscale x 16 x i32>, +  ptr, +  <vscale x 16 x i16>, +  <vscale x 16 x i1>, +  iXLen, +  iXLen); + +define <vscale x 16 x i32> @intrinsic_vluxei_mask_v_nxv16i32_nxv16i32_nxv16i16(<vscale x 16 x i32> %0, ptr %1, <vscale x 16 x i16> %2, <vscale x 16 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vluxei_mask_v_nxv16i32_nxv16i32_nxv16i16: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e32, m8, ta, mu +; CHECK-NEXT:    vluxei16.v v8, (a0), v16, v0.t +; CHECK-NEXT:    ret +entry: +  %a = call <vscale x 16 x i32> @llvm.riscv.vluxei.mask.nxv16i32.nxv16i16( +    <vscale x 16 x i32> %0, +    ptr %1, +    <vscale x 16 x i16> %2, +    <vscale x 16 x i1> %3, +    iXLen %4, iXLen 1) + +  ret <vscale x 16 x i32> %a +} + +declare <vscale x 1 x i64> @llvm.riscv.vluxei.nxv1i64.nxv1i16( +  <vscale x 1 x i64>, +  ptr, +  <vscale x 1 x i16>, +  iXLen); + +define <vscale x 1 x i64> @intrinsic_vluxei_v_nxv1i64_nxv1i64_nxv1i16(ptr %0, <vscale x 1 x i16> %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vluxei_v_nxv1i64_nxv1i64_nxv1i16: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e64, m1, ta, ma +; CHECK-NEXT:    vluxei16.v v9, (a0), v8 +; CHECK-NEXT:    vmv.v.v v8, v9 +; CHECK-NEXT:    ret +entry: +  %a = call <vscale x 1 x i64> @llvm.riscv.vluxei.nxv1i64.nxv1i16( +    <vscale x 1 x i64> poison, +    ptr %0, +    <vscale x 1 x i16> %1, +    iXLen %2) + +  ret <vscale x 1 x i64> %a +} + +declare <vscale x 1 x i64> @llvm.riscv.vluxei.mask.nxv1i64.nxv1i16( +  <vscale x 1 x i64>, +  ptr, +  <vscale x 1 x i16>, +  <vscale x 1 x i1>, +  iXLen, +  iXLen); + +define <vscale x 1 x i64> @intrinsic_vluxei_mask_v_nxv1i64_nxv1i64_nxv1i16(<vscale x 1 x i64> %0, ptr %1, <vscale x 1 x i16> %2, <vscale x 1 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vluxei_mask_v_nxv1i64_nxv1i64_nxv1i16: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e64, m1, ta, mu +; CHECK-NEXT:    vluxei16.v v8, (a0), v9, v0.t +; CHECK-NEXT:    ret +entry: +  %a = call <vscale x 1 x i64> @llvm.riscv.vluxei.mask.nxv1i64.nxv1i16( +    <vscale x 1 x i64> %0, +    ptr %1, +    <vscale x 1 x i16> %2, +    <vscale x 1 x i1> %3, +    iXLen %4, iXLen 1) + +  ret <vscale x 1 x i64> %a +} + +declare <vscale x 2 x i64> @llvm.riscv.vluxei.nxv2i64.nxv2i16( +  <vscale x 2 x i64>, +  ptr, +  <vscale x 2 x i16>, +  iXLen); + +define <vscale x 2 x i64> @intrinsic_vluxei_v_nxv2i64_nxv2i64_nxv2i16(ptr %0, <vscale x 2 x i16> %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vluxei_v_nxv2i64_nxv2i64_nxv2i16: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetivli zero, 1, e8, m1, ta, ma +; CHECK-NEXT:    vmv1r.v v10, v8 +; CHECK-NEXT:    vsetvli zero, a1, e64, m2, ta, ma +; CHECK-NEXT:    vluxei16.v v8, (a0), v10 +; CHECK-NEXT:    ret +entry: +  %a = call <vscale x 2 x i64> @llvm.riscv.vluxei.nxv2i64.nxv2i16( +    <vscale x 2 x i64> poison, +    ptr %0, +    <vscale x 2 x i16> %1, +    iXLen %2) + +  ret <vscale x 2 x i64> %a +} + +declare <vscale x 2 x i64> @llvm.riscv.vluxei.mask.nxv2i64.nxv2i16( +  <vscale x 2 x i64>, +  ptr, +  <vscale x 2 x i16>, +  <vscale x 2 x i1>, +  iXLen, +  iXLen); + +define <vscale x 2 x i64> @intrinsic_vluxei_mask_v_nxv2i64_nxv2i64_nxv2i16(<vscale x 2 x i64> %0, ptr %1, <vscale x 2 x i16> %2, <vscale x 2 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vluxei_mask_v_nxv2i64_nxv2i64_nxv2i16: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e64, m2, ta, mu +; CHECK-NEXT:    vluxei16.v v8, (a0), v10, v0.t +; CHECK-NEXT:    ret +entry: +  %a = call <vscale x 2 x i64> @llvm.riscv.vluxei.mask.nxv2i64.nxv2i16( +    <vscale x 2 x i64> %0, +    ptr %1, +    <vscale x 2 x i16> %2, +    <vscale x 2 x i1> %3, +    iXLen %4, iXLen 1) + +  ret <vscale x 2 x i64> %a +} + +declare <vscale x 4 x i64> @llvm.riscv.vluxei.nxv4i64.nxv4i16( +  <vscale x 4 x i64>, +  ptr, +  <vscale x 4 x i16>, +  iXLen); + +define <vscale x 4 x i64> @intrinsic_vluxei_v_nxv4i64_nxv4i64_nxv4i16(ptr %0, <vscale x 4 x i16> %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vluxei_v_nxv4i64_nxv4i64_nxv4i16: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetivli zero, 1, e8, m1, ta, ma +; CHECK-NEXT:    vmv1r.v v12, v8 +; CHECK-NEXT:    vsetvli zero, a1, e64, m4, ta, ma +; CHECK-NEXT:    vluxei16.v v8, (a0), v12 +; CHECK-NEXT:    ret +entry: +  %a = call <vscale x 4 x i64> @llvm.riscv.vluxei.nxv4i64.nxv4i16( +    <vscale x 4 x i64> poison, +    ptr %0, +    <vscale x 4 x i16> %1, +    iXLen %2) + +  ret <vscale x 4 x i64> %a +} + +declare <vscale x 4 x i64> @llvm.riscv.vluxei.mask.nxv4i64.nxv4i16( +  <vscale x 4 x i64>, +  ptr, +  <vscale x 4 x i16>, +  <vscale x 4 x i1>, +  iXLen, +  iXLen); + +define <vscale x 4 x i64> @intrinsic_vluxei_mask_v_nxv4i64_nxv4i64_nxv4i16(<vscale x 4 x i64> %0, ptr %1, <vscale x 4 x i16> %2, <vscale x 4 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vluxei_mask_v_nxv4i64_nxv4i64_nxv4i16: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e64, m4, ta, mu +; CHECK-NEXT:    vluxei16.v v8, (a0), v12, v0.t +; CHECK-NEXT:    ret +entry: +  %a = call <vscale x 4 x i64> @llvm.riscv.vluxei.mask.nxv4i64.nxv4i16( +    <vscale x 4 x i64> %0, +    ptr %1, +    <vscale x 4 x i16> %2, +    <vscale x 4 x i1> %3, +    iXLen %4, iXLen 1) + +  ret <vscale x 4 x i64> %a +} + +declare <vscale x 8 x i64> @llvm.riscv.vluxei.nxv8i64.nxv8i16( +  <vscale x 8 x i64>, +  ptr, +  <vscale x 8 x i16>, +  iXLen); + +define <vscale x 8 x i64> @intrinsic_vluxei_v_nxv8i64_nxv8i64_nxv8i16(ptr %0, <vscale x 8 x i16> %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vluxei_v_nxv8i64_nxv8i64_nxv8i16: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetivli zero, 1, e8, m1, ta, ma +; CHECK-NEXT:    vmv2r.v v16, v8 +; CHECK-NEXT:    vsetvli zero, a1, e64, m8, ta, ma +; CHECK-NEXT:    vluxei16.v v8, (a0), v16 +; CHECK-NEXT:    ret +entry: +  %a = call <vscale x 8 x i64> @llvm.riscv.vluxei.nxv8i64.nxv8i16( +    <vscale x 8 x i64> poison, +    ptr %0, +    <vscale x 8 x i16> %1, +    iXLen %2) + +  ret <vscale x 8 x i64> %a +} + +declare <vscale x 8 x i64> @llvm.riscv.vluxei.mask.nxv8i64.nxv8i16( +  <vscale x 8 x i64>, +  ptr, +  <vscale x 8 x i16>, +  <vscale x 8 x i1>, +  iXLen, +  iXLen); + +define <vscale x 8 x i64> @intrinsic_vluxei_mask_v_nxv8i64_nxv8i64_nxv8i16(<vscale x 8 x i64> %0, ptr %1, <vscale x 8 x i16> %2, <vscale x 8 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vluxei_mask_v_nxv8i64_nxv8i64_nxv8i16: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e64, m8, ta, mu +; CHECK-NEXT:    vluxei16.v v8, (a0), v16, v0.t +; CHECK-NEXT:    ret +entry: +  %a = call <vscale x 8 x i64> @llvm.riscv.vluxei.mask.nxv8i64.nxv8i16( +    <vscale x 8 x i64> %0, +    ptr %1, +    <vscale x 8 x i16> %2, +    <vscale x 8 x i1> %3, +    iXLen %4, iXLen 1) + +  ret <vscale x 8 x i64> %a +} + +declare <vscale x 1 x half> @llvm.riscv.vluxei.nxv1f16.nxv1i16( +  <vscale x 1 x half>, +  ptr, +  <vscale x 1 x i16>, +  iXLen); + +define <vscale x 1 x half> @intrinsic_vluxei_v_nxv1f16_nxv1f16_nxv1i16(ptr %0, <vscale x 1 x i16> %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vluxei_v_nxv1f16_nxv1f16_nxv1i16: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e16, mf4, ta, ma +; CHECK-NEXT:    vluxei16.v v8, (a0), v8 +; CHECK-NEXT:    ret +entry: +  %a = call <vscale x 1 x half> @llvm.riscv.vluxei.nxv1f16.nxv1i16( +    <vscale x 1 x half> poison, +    ptr %0, +    <vscale x 1 x i16> %1, +    iXLen %2) + +  ret <vscale x 1 x half> %a +} + +declare <vscale x 1 x half> @llvm.riscv.vluxei.mask.nxv1f16.nxv1i16( +  <vscale x 1 x half>, +  ptr, +  <vscale x 1 x i16>, +  <vscale x 1 x i1>, +  iXLen, +  iXLen); + +define <vscale x 1 x half> @intrinsic_vluxei_mask_v_nxv1f16_nxv1f16_nxv1i16(<vscale x 1 x half> %0, ptr %1, <vscale x 1 x i16> %2, <vscale x 1 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vluxei_mask_v_nxv1f16_nxv1f16_nxv1i16: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e16, mf4, ta, mu +; CHECK-NEXT:    vluxei16.v v8, (a0), v9, v0.t +; CHECK-NEXT:    ret +entry: +  %a = call <vscale x 1 x half> @llvm.riscv.vluxei.mask.nxv1f16.nxv1i16( +    <vscale x 1 x half> %0, +    ptr %1, +    <vscale x 1 x i16> %2, +    <vscale x 1 x i1> %3, +    iXLen %4, iXLen 1) + +  ret <vscale x 1 x half> %a +} + +declare <vscale x 2 x half> @llvm.riscv.vluxei.nxv2f16.nxv2i16( +  <vscale x 2 x half>, +  ptr, +  <vscale x 2 x i16>, +  iXLen); + +define <vscale x 2 x half> @intrinsic_vluxei_v_nxv2f16_nxv2f16_nxv2i16(ptr %0, <vscale x 2 x i16> %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vluxei_v_nxv2f16_nxv2f16_nxv2i16: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e16, mf2, ta, ma +; CHECK-NEXT:    vluxei16.v v8, (a0), v8 +; CHECK-NEXT:    ret +entry: +  %a = call <vscale x 2 x half> @llvm.riscv.vluxei.nxv2f16.nxv2i16( +    <vscale x 2 x half> poison, +    ptr %0, +    <vscale x 2 x i16> %1, +    iXLen %2) + +  ret <vscale x 2 x half> %a +} + +declare <vscale x 2 x half> @llvm.riscv.vluxei.mask.nxv2f16.nxv2i16( +  <vscale x 2 x half>, +  ptr, +  <vscale x 2 x i16>, +  <vscale x 2 x i1>, +  iXLen, +  iXLen); + +define <vscale x 2 x half> @intrinsic_vluxei_mask_v_nxv2f16_nxv2f16_nxv2i16(<vscale x 2 x half> %0, ptr %1, <vscale x 2 x i16> %2, <vscale x 2 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vluxei_mask_v_nxv2f16_nxv2f16_nxv2i16: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e16, mf2, ta, mu +; CHECK-NEXT:    vluxei16.v v8, (a0), v9, v0.t +; CHECK-NEXT:    ret +entry: +  %a = call <vscale x 2 x half> @llvm.riscv.vluxei.mask.nxv2f16.nxv2i16( +    <vscale x 2 x half> %0, +    ptr %1, +    <vscale x 2 x i16> %2, +    <vscale x 2 x i1> %3, +    iXLen %4, iXLen 1) + +  ret <vscale x 2 x half> %a +} + +declare <vscale x 4 x half> @llvm.riscv.vluxei.nxv4f16.nxv4i16( +  <vscale x 4 x half>, +  ptr, +  <vscale x 4 x i16>, +  iXLen); + +define <vscale x 4 x half> @intrinsic_vluxei_v_nxv4f16_nxv4f16_nxv4i16(ptr %0, <vscale x 4 x i16> %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vluxei_v_nxv4f16_nxv4f16_nxv4i16: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e16, m1, ta, ma +; CHECK-NEXT:    vluxei16.v v8, (a0), v8 +; CHECK-NEXT:    ret +entry: +  %a = call <vscale x 4 x half> @llvm.riscv.vluxei.nxv4f16.nxv4i16( +    <vscale x 4 x half> poison, +    ptr %0, +    <vscale x 4 x i16> %1, +    iXLen %2) + +  ret <vscale x 4 x half> %a +} + +declare <vscale x 4 x half> @llvm.riscv.vluxei.mask.nxv4f16.nxv4i16( +  <vscale x 4 x half>, +  ptr, +  <vscale x 4 x i16>, +  <vscale x 4 x i1>, +  iXLen, +  iXLen); + +define <vscale x 4 x half> @intrinsic_vluxei_mask_v_nxv4f16_nxv4f16_nxv4i16(<vscale x 4 x half> %0, ptr %1, <vscale x 4 x i16> %2, <vscale x 4 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vluxei_mask_v_nxv4f16_nxv4f16_nxv4i16: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e16, m1, ta, mu +; CHECK-NEXT:    vluxei16.v v8, (a0), v9, v0.t +; CHECK-NEXT:    ret +entry: +  %a = call <vscale x 4 x half> @llvm.riscv.vluxei.mask.nxv4f16.nxv4i16( +    <vscale x 4 x half> %0, +    ptr %1, +    <vscale x 4 x i16> %2, +    <vscale x 4 x i1> %3, +    iXLen %4, iXLen 1) + +  ret <vscale x 4 x half> %a +} + +declare <vscale x 8 x half> @llvm.riscv.vluxei.nxv8f16.nxv8i16( +  <vscale x 8 x half>, +  ptr, +  <vscale x 8 x i16>, +  iXLen); + +define <vscale x 8 x half> @intrinsic_vluxei_v_nxv8f16_nxv8f16_nxv8i16(ptr %0, <vscale x 8 x i16> %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vluxei_v_nxv8f16_nxv8f16_nxv8i16: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e16, m2, ta, ma +; CHECK-NEXT:    vluxei16.v v8, (a0), v8 +; CHECK-NEXT:    ret +entry: +  %a = call <vscale x 8 x half> @llvm.riscv.vluxei.nxv8f16.nxv8i16( +    <vscale x 8 x half> poison, +    ptr %0, +    <vscale x 8 x i16> %1, +    iXLen %2) + +  ret <vscale x 8 x half> %a +} + +declare <vscale x 8 x half> @llvm.riscv.vluxei.mask.nxv8f16.nxv8i16( +  <vscale x 8 x half>, +  ptr, +  <vscale x 8 x i16>, +  <vscale x 8 x i1>, +  iXLen, +  iXLen); + +define <vscale x 8 x half> @intrinsic_vluxei_mask_v_nxv8f16_nxv8f16_nxv8i16(<vscale x 8 x half> %0, ptr %1, <vscale x 8 x i16> %2, <vscale x 8 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vluxei_mask_v_nxv8f16_nxv8f16_nxv8i16: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e16, m2, ta, mu +; CHECK-NEXT:    vluxei16.v v8, (a0), v10, v0.t +; CHECK-NEXT:    ret +entry: +  %a = call <vscale x 8 x half> @llvm.riscv.vluxei.mask.nxv8f16.nxv8i16( +    <vscale x 8 x half> %0, +    ptr %1, +    <vscale x 8 x i16> %2, +    <vscale x 8 x i1> %3, +    iXLen %4, iXLen 1) + +  ret <vscale x 8 x half> %a +} + +declare <vscale x 16 x half> @llvm.riscv.vluxei.nxv16f16.nxv16i16( +  <vscale x 16 x half>, +  ptr, +  <vscale x 16 x i16>, +  iXLen); + +define <vscale x 16 x half> @intrinsic_vluxei_v_nxv16f16_nxv16f16_nxv16i16(ptr %0, <vscale x 16 x i16> %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vluxei_v_nxv16f16_nxv16f16_nxv16i16: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e16, m4, ta, ma +; CHECK-NEXT:    vluxei16.v v8, (a0), v8 +; CHECK-NEXT:    ret +entry: +  %a = call <vscale x 16 x half> @llvm.riscv.vluxei.nxv16f16.nxv16i16( +    <vscale x 16 x half> poison, +    ptr %0, +    <vscale x 16 x i16> %1, +    iXLen %2) + +  ret <vscale x 16 x half> %a +} + +declare <vscale x 16 x half> @llvm.riscv.vluxei.mask.nxv16f16.nxv16i16( +  <vscale x 16 x half>, +  ptr, +  <vscale x 16 x i16>, +  <vscale x 16 x i1>, +  iXLen, +  iXLen); + +define <vscale x 16 x half> @intrinsic_vluxei_mask_v_nxv16f16_nxv16f16_nxv16i16(<vscale x 16 x half> %0, ptr %1, <vscale x 16 x i16> %2, <vscale x 16 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vluxei_mask_v_nxv16f16_nxv16f16_nxv16i16: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e16, m4, ta, mu +; CHECK-NEXT:    vluxei16.v v8, (a0), v12, v0.t +; CHECK-NEXT:    ret +entry: +  %a = call <vscale x 16 x half> @llvm.riscv.vluxei.mask.nxv16f16.nxv16i16( +    <vscale x 16 x half> %0, +    ptr %1, +    <vscale x 16 x i16> %2, +    <vscale x 16 x i1> %3, +    iXLen %4, iXLen 1) + +  ret <vscale x 16 x half> %a +} + +declare <vscale x 32 x half> @llvm.riscv.vluxei.nxv32f16.nxv32i16( +  <vscale x 32 x half>, +  ptr, +  <vscale x 32 x i16>, +  iXLen); + +define <vscale x 32 x half> @intrinsic_vluxei_v_nxv32f16_nxv32f16_nxv32i16(ptr %0, <vscale x 32 x i16> %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vluxei_v_nxv32f16_nxv32f16_nxv32i16: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e16, m8, ta, ma +; CHECK-NEXT:    vluxei16.v v8, (a0), v8 +; CHECK-NEXT:    ret +entry: +  %a = call <vscale x 32 x half> @llvm.riscv.vluxei.nxv32f16.nxv32i16( +    <vscale x 32 x half> poison, +    ptr %0, +    <vscale x 32 x i16> %1, +    iXLen %2) + +  ret <vscale x 32 x half> %a +} + +declare <vscale x 32 x half> @llvm.riscv.vluxei.mask.nxv32f16.nxv32i16( +  <vscale x 32 x half>, +  ptr, +  <vscale x 32 x i16>, +  <vscale x 32 x i1>, +  iXLen, +  iXLen); + +define <vscale x 32 x half> @intrinsic_vluxei_mask_v_nxv32f16_nxv32f16_nxv32i16(<vscale x 32 x half> %0, ptr %1, <vscale x 32 x i16> %2, <vscale x 32 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vluxei_mask_v_nxv32f16_nxv32f16_nxv32i16: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e16, m8, ta, mu +; CHECK-NEXT:    vluxei16.v v8, (a0), v16, v0.t +; CHECK-NEXT:    ret +entry: +  %a = call <vscale x 32 x half> @llvm.riscv.vluxei.mask.nxv32f16.nxv32i16( +    <vscale x 32 x half> %0, +    ptr %1, +    <vscale x 32 x i16> %2, +    <vscale x 32 x i1> %3, +    iXLen %4, iXLen 1) + +  ret <vscale x 32 x half> %a +} + +declare <vscale x 1 x float> @llvm.riscv.vluxei.nxv1f32.nxv1i16( +  <vscale x 1 x float>, +  ptr, +  <vscale x 1 x i16>, +  iXLen); + +define <vscale x 1 x float> @intrinsic_vluxei_v_nxv1f32_nxv1f32_nxv1i16(ptr %0, <vscale x 1 x i16> %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vluxei_v_nxv1f32_nxv1f32_nxv1i16: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e32, mf2, ta, ma +; CHECK-NEXT:    vluxei16.v v9, (a0), v8 +; CHECK-NEXT:    vmv1r.v v8, v9 +; CHECK-NEXT:    ret +entry: +  %a = call <vscale x 1 x float> @llvm.riscv.vluxei.nxv1f32.nxv1i16( +    <vscale x 1 x float> poison, +    ptr %0, +    <vscale x 1 x i16> %1, +    iXLen %2) + +  ret <vscale x 1 x float> %a +} + +declare <vscale x 1 x float> @llvm.riscv.vluxei.mask.nxv1f32.nxv1i16( +  <vscale x 1 x float>, +  ptr, +  <vscale x 1 x i16>, +  <vscale x 1 x i1>, +  iXLen, +  iXLen); + +define <vscale x 1 x float> @intrinsic_vluxei_mask_v_nxv1f32_nxv1f32_nxv1i16(<vscale x 1 x float> %0, ptr %1, <vscale x 1 x i16> %2, <vscale x 1 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vluxei_mask_v_nxv1f32_nxv1f32_nxv1i16: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e32, mf2, ta, mu +; CHECK-NEXT:    vluxei16.v v8, (a0), v9, v0.t +; CHECK-NEXT:    ret +entry: +  %a = call <vscale x 1 x float> @llvm.riscv.vluxei.mask.nxv1f32.nxv1i16( +    <vscale x 1 x float> %0, +    ptr %1, +    <vscale x 1 x i16> %2, +    <vscale x 1 x i1> %3, +    iXLen %4, iXLen 1) + +  ret <vscale x 1 x float> %a +} + +declare <vscale x 2 x float> @llvm.riscv.vluxei.nxv2f32.nxv2i16( +  <vscale x 2 x float>, +  ptr, +  <vscale x 2 x i16>, +  iXLen); + +define <vscale x 2 x float> @intrinsic_vluxei_v_nxv2f32_nxv2f32_nxv2i16(ptr %0, <vscale x 2 x i16> %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vluxei_v_nxv2f32_nxv2f32_nxv2i16: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e32, m1, ta, ma +; CHECK-NEXT:    vluxei16.v v9, (a0), v8 +; CHECK-NEXT:    vmv.v.v v8, v9 +; CHECK-NEXT:    ret +entry: +  %a = call <vscale x 2 x float> @llvm.riscv.vluxei.nxv2f32.nxv2i16( +    <vscale x 2 x float> poison, +    ptr %0, +    <vscale x 2 x i16> %1, +    iXLen %2) + +  ret <vscale x 2 x float> %a +} + +declare <vscale x 2 x float> @llvm.riscv.vluxei.mask.nxv2f32.nxv2i16( +  <vscale x 2 x float>, +  ptr, +  <vscale x 2 x i16>, +  <vscale x 2 x i1>, +  iXLen, +  iXLen); + +define <vscale x 2 x float> @intrinsic_vluxei_mask_v_nxv2f32_nxv2f32_nxv2i16(<vscale x 2 x float> %0, ptr %1, <vscale x 2 x i16> %2, <vscale x 2 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vluxei_mask_v_nxv2f32_nxv2f32_nxv2i16: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e32, m1, ta, mu +; CHECK-NEXT:    vluxei16.v v8, (a0), v9, v0.t +; CHECK-NEXT:    ret +entry: +  %a = call <vscale x 2 x float> @llvm.riscv.vluxei.mask.nxv2f32.nxv2i16( +    <vscale x 2 x float> %0, +    ptr %1, +    <vscale x 2 x i16> %2, +    <vscale x 2 x i1> %3, +    iXLen %4, iXLen 1) + +  ret <vscale x 2 x float> %a +} + +declare <vscale x 4 x float> @llvm.riscv.vluxei.nxv4f32.nxv4i16( +  <vscale x 4 x float>, +  ptr, +  <vscale x 4 x i16>, +  iXLen); + +define <vscale x 4 x float> @intrinsic_vluxei_v_nxv4f32_nxv4f32_nxv4i16(ptr %0, <vscale x 4 x i16> %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vluxei_v_nxv4f32_nxv4f32_nxv4i16: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetivli zero, 1, e8, m1, ta, ma +; CHECK-NEXT:    vmv1r.v v10, v8 +; CHECK-NEXT:    vsetvli zero, a1, e32, m2, ta, ma +; CHECK-NEXT:    vluxei16.v v8, (a0), v10 +; CHECK-NEXT:    ret +entry: +  %a = call <vscale x 4 x float> @llvm.riscv.vluxei.nxv4f32.nxv4i16( +    <vscale x 4 x float> poison, +    ptr %0, +    <vscale x 4 x i16> %1, +    iXLen %2) + +  ret <vscale x 4 x float> %a +} + +declare <vscale x 4 x float> @llvm.riscv.vluxei.mask.nxv4f32.nxv4i16( +  <vscale x 4 x float>, +  ptr, +  <vscale x 4 x i16>, +  <vscale x 4 x i1>, +  iXLen, +  iXLen); + +define <vscale x 4 x float> @intrinsic_vluxei_mask_v_nxv4f32_nxv4f32_nxv4i16(<vscale x 4 x float> %0, ptr %1, <vscale x 4 x i16> %2, <vscale x 4 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vluxei_mask_v_nxv4f32_nxv4f32_nxv4i16: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e32, m2, ta, mu +; CHECK-NEXT:    vluxei16.v v8, (a0), v10, v0.t +; CHECK-NEXT:    ret +entry: +  %a = call <vscale x 4 x float> @llvm.riscv.vluxei.mask.nxv4f32.nxv4i16( +    <vscale x 4 x float> %0, +    ptr %1, +    <vscale x 4 x i16> %2, +    <vscale x 4 x i1> %3, +    iXLen %4, iXLen 1) + +  ret <vscale x 4 x float> %a +} + +declare <vscale x 8 x float> @llvm.riscv.vluxei.nxv8f32.nxv8i16( +  <vscale x 8 x float>, +  ptr, +  <vscale x 8 x i16>, +  iXLen); + +define <vscale x 8 x float> @intrinsic_vluxei_v_nxv8f32_nxv8f32_nxv8i16(ptr %0, <vscale x 8 x i16> %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vluxei_v_nxv8f32_nxv8f32_nxv8i16: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetivli zero, 1, e8, m1, ta, ma +; CHECK-NEXT:    vmv2r.v v12, v8 +; CHECK-NEXT:    vsetvli zero, a1, e32, m4, ta, ma +; CHECK-NEXT:    vluxei16.v v8, (a0), v12 +; CHECK-NEXT:    ret +entry: +  %a = call <vscale x 8 x float> @llvm.riscv.vluxei.nxv8f32.nxv8i16( +    <vscale x 8 x float> poison, +    ptr %0, +    <vscale x 8 x i16> %1, +    iXLen %2) + +  ret <vscale x 8 x float> %a +} + +declare <vscale x 8 x float> @llvm.riscv.vluxei.mask.nxv8f32.nxv8i16( +  <vscale x 8 x float>, +  ptr, +  <vscale x 8 x i16>, +  <vscale x 8 x i1>, +  iXLen, +  iXLen); + +define <vscale x 8 x float> @intrinsic_vluxei_mask_v_nxv8f32_nxv8f32_nxv8i16(<vscale x 8 x float> %0, ptr %1, <vscale x 8 x i16> %2, <vscale x 8 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vluxei_mask_v_nxv8f32_nxv8f32_nxv8i16: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e32, m4, ta, mu +; CHECK-NEXT:    vluxei16.v v8, (a0), v12, v0.t +; CHECK-NEXT:    ret +entry: +  %a = call <vscale x 8 x float> @llvm.riscv.vluxei.mask.nxv8f32.nxv8i16( +    <vscale x 8 x float> %0, +    ptr %1, +    <vscale x 8 x i16> %2, +    <vscale x 8 x i1> %3, +    iXLen %4, iXLen 1) + +  ret <vscale x 8 x float> %a +} + +declare <vscale x 16 x float> @llvm.riscv.vluxei.nxv16f32.nxv16i16( +  <vscale x 16 x float>, +  ptr, +  <vscale x 16 x i16>, +  iXLen); + +define <vscale x 16 x float> @intrinsic_vluxei_v_nxv16f32_nxv16f32_nxv16i16(ptr %0, <vscale x 16 x i16> %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vluxei_v_nxv16f32_nxv16f32_nxv16i16: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetivli zero, 1, e8, m1, ta, ma +; CHECK-NEXT:    vmv4r.v v16, v8 +; CHECK-NEXT:    vsetvli zero, a1, e32, m8, ta, ma +; CHECK-NEXT:    vluxei16.v v8, (a0), v16 +; CHECK-NEXT:    ret +entry: +  %a = call <vscale x 16 x float> @llvm.riscv.vluxei.nxv16f32.nxv16i16( +    <vscale x 16 x float> poison, +    ptr %0, +    <vscale x 16 x i16> %1, +    iXLen %2) + +  ret <vscale x 16 x float> %a +} + +declare <vscale x 16 x float> @llvm.riscv.vluxei.mask.nxv16f32.nxv16i16( +  <vscale x 16 x float>, +  ptr, +  <vscale x 16 x i16>, +  <vscale x 16 x i1>, +  iXLen, +  iXLen); + +define <vscale x 16 x float> @intrinsic_vluxei_mask_v_nxv16f32_nxv16f32_nxv16i16(<vscale x 16 x float> %0, ptr %1, <vscale x 16 x i16> %2, <vscale x 16 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vluxei_mask_v_nxv16f32_nxv16f32_nxv16i16: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e32, m8, ta, mu +; CHECK-NEXT:    vluxei16.v v8, (a0), v16, v0.t +; CHECK-NEXT:    ret +entry: +  %a = call <vscale x 16 x float> @llvm.riscv.vluxei.mask.nxv16f32.nxv16i16( +    <vscale x 16 x float> %0, +    ptr %1, +    <vscale x 16 x i16> %2, +    <vscale x 16 x i1> %3, +    iXLen %4, iXLen 1) + +  ret <vscale x 16 x float> %a +} + +declare <vscale x 1 x double> @llvm.riscv.vluxei.nxv1f64.nxv1i16( +  <vscale x 1 x double>, +  ptr, +  <vscale x 1 x i16>, +  iXLen); + +define <vscale x 1 x double> @intrinsic_vluxei_v_nxv1f64_nxv1f64_nxv1i16(ptr %0, <vscale x 1 x i16> %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vluxei_v_nxv1f64_nxv1f64_nxv1i16: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e64, m1, ta, ma +; CHECK-NEXT:    vluxei16.v v9, (a0), v8 +; CHECK-NEXT:    vmv.v.v v8, v9 +; CHECK-NEXT:    ret +entry: +  %a = call <vscale x 1 x double> @llvm.riscv.vluxei.nxv1f64.nxv1i16( +    <vscale x 1 x double> poison, +    ptr %0, +    <vscale x 1 x i16> %1, +    iXLen %2) + +  ret <vscale x 1 x double> %a +} + +declare <vscale x 1 x double> @llvm.riscv.vluxei.mask.nxv1f64.nxv1i16( +  <vscale x 1 x double>, +  ptr, +  <vscale x 1 x i16>, +  <vscale x 1 x i1>, +  iXLen, +  iXLen); + +define <vscale x 1 x double> @intrinsic_vluxei_mask_v_nxv1f64_nxv1f64_nxv1i16(<vscale x 1 x double> %0, ptr %1, <vscale x 1 x i16> %2, <vscale x 1 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vluxei_mask_v_nxv1f64_nxv1f64_nxv1i16: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e64, m1, ta, mu +; CHECK-NEXT:    vluxei16.v v8, (a0), v9, v0.t +; CHECK-NEXT:    ret +entry: +  %a = call <vscale x 1 x double> @llvm.riscv.vluxei.mask.nxv1f64.nxv1i16( +    <vscale x 1 x double> %0, +    ptr %1, +    <vscale x 1 x i16> %2, +    <vscale x 1 x i1> %3, +    iXLen %4, iXLen 1) + +  ret <vscale x 1 x double> %a +} + +declare <vscale x 2 x double> @llvm.riscv.vluxei.nxv2f64.nxv2i16( +  <vscale x 2 x double>, +  ptr, +  <vscale x 2 x i16>, +  iXLen); + +define <vscale x 2 x double> @intrinsic_vluxei_v_nxv2f64_nxv2f64_nxv2i16(ptr %0, <vscale x 2 x i16> %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vluxei_v_nxv2f64_nxv2f64_nxv2i16: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetivli zero, 1, e8, m1, ta, ma +; CHECK-NEXT:    vmv1r.v v10, v8 +; CHECK-NEXT:    vsetvli zero, a1, e64, m2, ta, ma +; CHECK-NEXT:    vluxei16.v v8, (a0), v10 +; CHECK-NEXT:    ret +entry: +  %a = call <vscale x 2 x double> @llvm.riscv.vluxei.nxv2f64.nxv2i16( +    <vscale x 2 x double> poison, +    ptr %0, +    <vscale x 2 x i16> %1, +    iXLen %2) + +  ret <vscale x 2 x double> %a +} + +declare <vscale x 2 x double> @llvm.riscv.vluxei.mask.nxv2f64.nxv2i16( +  <vscale x 2 x double>, +  ptr, +  <vscale x 2 x i16>, +  <vscale x 2 x i1>, +  iXLen, +  iXLen); + +define <vscale x 2 x double> @intrinsic_vluxei_mask_v_nxv2f64_nxv2f64_nxv2i16(<vscale x 2 x double> %0, ptr %1, <vscale x 2 x i16> %2, <vscale x 2 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vluxei_mask_v_nxv2f64_nxv2f64_nxv2i16: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e64, m2, ta, mu +; CHECK-NEXT:    vluxei16.v v8, (a0), v10, v0.t +; CHECK-NEXT:    ret +entry: +  %a = call <vscale x 2 x double> @llvm.riscv.vluxei.mask.nxv2f64.nxv2i16( +    <vscale x 2 x double> %0, +    ptr %1, +    <vscale x 2 x i16> %2, +    <vscale x 2 x i1> %3, +    iXLen %4, iXLen 1) + +  ret <vscale x 2 x double> %a +} + +declare <vscale x 4 x double> @llvm.riscv.vluxei.nxv4f64.nxv4i16( +  <vscale x 4 x double>, +  ptr, +  <vscale x 4 x i16>, +  iXLen); + +define <vscale x 4 x double> @intrinsic_vluxei_v_nxv4f64_nxv4f64_nxv4i16(ptr %0, <vscale x 4 x i16> %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vluxei_v_nxv4f64_nxv4f64_nxv4i16: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetivli zero, 1, e8, m1, ta, ma +; CHECK-NEXT:    vmv1r.v v12, v8 +; CHECK-NEXT:    vsetvli zero, a1, e64, m4, ta, ma +; CHECK-NEXT:    vluxei16.v v8, (a0), v12 +; CHECK-NEXT:    ret +entry: +  %a = call <vscale x 4 x double> @llvm.riscv.vluxei.nxv4f64.nxv4i16( +    <vscale x 4 x double> poison, +    ptr %0, +    <vscale x 4 x i16> %1, +    iXLen %2) + +  ret <vscale x 4 x double> %a +} + +declare <vscale x 4 x double> @llvm.riscv.vluxei.mask.nxv4f64.nxv4i16( +  <vscale x 4 x double>, +  ptr, +  <vscale x 4 x i16>, +  <vscale x 4 x i1>, +  iXLen, +  iXLen); + +define <vscale x 4 x double> @intrinsic_vluxei_mask_v_nxv4f64_nxv4f64_nxv4i16(<vscale x 4 x double> %0, ptr %1, <vscale x 4 x i16> %2, <vscale x 4 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vluxei_mask_v_nxv4f64_nxv4f64_nxv4i16: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e64, m4, ta, mu +; CHECK-NEXT:    vluxei16.v v8, (a0), v12, v0.t +; CHECK-NEXT:    ret +entry: +  %a = call <vscale x 4 x double> @llvm.riscv.vluxei.mask.nxv4f64.nxv4i16( +    <vscale x 4 x double> %0, +    ptr %1, +    <vscale x 4 x i16> %2, +    <vscale x 4 x i1> %3, +    iXLen %4, iXLen 1) + +  ret <vscale x 4 x double> %a +} + +declare <vscale x 8 x double> @llvm.riscv.vluxei.nxv8f64.nxv8i16( +  <vscale x 8 x double>, +  ptr, +  <vscale x 8 x i16>, +  iXLen); + +define <vscale x 8 x double> @intrinsic_vluxei_v_nxv8f64_nxv8f64_nxv8i16(ptr %0, <vscale x 8 x i16> %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vluxei_v_nxv8f64_nxv8f64_nxv8i16: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetivli zero, 1, e8, m1, ta, ma +; CHECK-NEXT:    vmv2r.v v16, v8 +; CHECK-NEXT:    vsetvli zero, a1, e64, m8, ta, ma +; CHECK-NEXT:    vluxei16.v v8, (a0), v16 +; CHECK-NEXT:    ret +entry: +  %a = call <vscale x 8 x double> @llvm.riscv.vluxei.nxv8f64.nxv8i16( +    <vscale x 8 x double> poison, +    ptr %0, +    <vscale x 8 x i16> %1, +    iXLen %2) + +  ret <vscale x 8 x double> %a +} + +declare <vscale x 8 x double> @llvm.riscv.vluxei.mask.nxv8f64.nxv8i16( +  <vscale x 8 x double>, +  ptr, +  <vscale x 8 x i16>, +  <vscale x 8 x i1>, +  iXLen, +  iXLen); + +define <vscale x 8 x double> @intrinsic_vluxei_mask_v_nxv8f64_nxv8f64_nxv8i16(<vscale x 8 x double> %0, ptr %1, <vscale x 8 x i16> %2, <vscale x 8 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vluxei_mask_v_nxv8f64_nxv8f64_nxv8i16: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e64, m8, ta, mu +; CHECK-NEXT:    vluxei16.v v8, (a0), v16, v0.t +; CHECK-NEXT:    ret +entry: +  %a = call <vscale x 8 x double> @llvm.riscv.vluxei.mask.nxv8f64.nxv8i16( +    <vscale x 8 x double> %0, +    ptr %1, +    <vscale x 8 x i16> %2, +    <vscale x 8 x i1> %3, +    iXLen %4, iXLen 1) + +  ret <vscale x 8 x double> %a +} + +declare <vscale x 1 x i8> @llvm.riscv.vluxei.nxv1i8.nxv1i8( +  <vscale x 1 x i8>, +  ptr, +  <vscale x 1 x i8>, +  iXLen); + +define <vscale x 1 x i8> @intrinsic_vluxei_v_nxv1i8_nxv1i8_nxv1i8(ptr %0, <vscale x 1 x i8> %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vluxei_v_nxv1i8_nxv1i8_nxv1i8: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e8, mf8, ta, ma +; CHECK-NEXT:    vluxei8.v v8, (a0), v8 +; CHECK-NEXT:    ret +entry: +  %a = call <vscale x 1 x i8> @llvm.riscv.vluxei.nxv1i8.nxv1i8( +    <vscale x 1 x i8> poison, +    ptr %0, +    <vscale x 1 x i8> %1, +    iXLen %2) + +  ret <vscale x 1 x i8> %a +} + +declare <vscale x 1 x i8> @llvm.riscv.vluxei.mask.nxv1i8.nxv1i8( +  <vscale x 1 x i8>, +  ptr, +  <vscale x 1 x i8>, +  <vscale x 1 x i1>, +  iXLen, +  iXLen); + +define <vscale x 1 x i8> @intrinsic_vluxei_mask_v_nxv1i8_nxv1i8_nxv1i8(<vscale x 1 x i8> %0, ptr %1, <vscale x 1 x i8> %2, <vscale x 1 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vluxei_mask_v_nxv1i8_nxv1i8_nxv1i8: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e8, mf8, ta, mu +; CHECK-NEXT:    vluxei8.v v8, (a0), v9, v0.t +; CHECK-NEXT:    ret +entry: +  %a = call <vscale x 1 x i8> @llvm.riscv.vluxei.mask.nxv1i8.nxv1i8( +    <vscale x 1 x i8> %0, +    ptr %1, +    <vscale x 1 x i8> %2, +    <vscale x 1 x i1> %3, +    iXLen %4, iXLen 1) + +  ret <vscale x 1 x i8> %a +} + +declare <vscale x 2 x i8> @llvm.riscv.vluxei.nxv2i8.nxv2i8( +  <vscale x 2 x i8>, +  ptr, +  <vscale x 2 x i8>, +  iXLen); + +define <vscale x 2 x i8> @intrinsic_vluxei_v_nxv2i8_nxv2i8_nxv2i8(ptr %0, <vscale x 2 x i8> %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vluxei_v_nxv2i8_nxv2i8_nxv2i8: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e8, mf4, ta, ma +; CHECK-NEXT:    vluxei8.v v8, (a0), v8 +; CHECK-NEXT:    ret +entry: +  %a = call <vscale x 2 x i8> @llvm.riscv.vluxei.nxv2i8.nxv2i8( +    <vscale x 2 x i8> poison, +    ptr %0, +    <vscale x 2 x i8> %1, +    iXLen %2) + +  ret <vscale x 2 x i8> %a +} + +declare <vscale x 2 x i8> @llvm.riscv.vluxei.mask.nxv2i8.nxv2i8( +  <vscale x 2 x i8>, +  ptr, +  <vscale x 2 x i8>, +  <vscale x 2 x i1>, +  iXLen, +  iXLen); + +define <vscale x 2 x i8> @intrinsic_vluxei_mask_v_nxv2i8_nxv2i8_nxv2i8(<vscale x 2 x i8> %0, ptr %1, <vscale x 2 x i8> %2, <vscale x 2 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vluxei_mask_v_nxv2i8_nxv2i8_nxv2i8: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e8, mf4, ta, mu +; CHECK-NEXT:    vluxei8.v v8, (a0), v9, v0.t +; CHECK-NEXT:    ret +entry: +  %a = call <vscale x 2 x i8> @llvm.riscv.vluxei.mask.nxv2i8.nxv2i8( +    <vscale x 2 x i8> %0, +    ptr %1, +    <vscale x 2 x i8> %2, +    <vscale x 2 x i1> %3, +    iXLen %4, iXLen 1) + +  ret <vscale x 2 x i8> %a +} + +declare <vscale x 4 x i8> @llvm.riscv.vluxei.nxv4i8.nxv4i8( +  <vscale x 4 x i8>, +  ptr, +  <vscale x 4 x i8>, +  iXLen); + +define <vscale x 4 x i8> @intrinsic_vluxei_v_nxv4i8_nxv4i8_nxv4i8(ptr %0, <vscale x 4 x i8> %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vluxei_v_nxv4i8_nxv4i8_nxv4i8: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e8, mf2, ta, ma +; CHECK-NEXT:    vluxei8.v v8, (a0), v8 +; CHECK-NEXT:    ret +entry: +  %a = call <vscale x 4 x i8> @llvm.riscv.vluxei.nxv4i8.nxv4i8( +    <vscale x 4 x i8> poison, +    ptr %0, +    <vscale x 4 x i8> %1, +    iXLen %2) + +  ret <vscale x 4 x i8> %a +} + +declare <vscale x 4 x i8> @llvm.riscv.vluxei.mask.nxv4i8.nxv4i8( +  <vscale x 4 x i8>, +  ptr, +  <vscale x 4 x i8>, +  <vscale x 4 x i1>, +  iXLen, +  iXLen); + +define <vscale x 4 x i8> @intrinsic_vluxei_mask_v_nxv4i8_nxv4i8_nxv4i8(<vscale x 4 x i8> %0, ptr %1, <vscale x 4 x i8> %2, <vscale x 4 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vluxei_mask_v_nxv4i8_nxv4i8_nxv4i8: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e8, mf2, ta, mu +; CHECK-NEXT:    vluxei8.v v8, (a0), v9, v0.t +; CHECK-NEXT:    ret +entry: +  %a = call <vscale x 4 x i8> @llvm.riscv.vluxei.mask.nxv4i8.nxv4i8( +    <vscale x 4 x i8> %0, +    ptr %1, +    <vscale x 4 x i8> %2, +    <vscale x 4 x i1> %3, +    iXLen %4, iXLen 1) + +  ret <vscale x 4 x i8> %a +} + +declare <vscale x 8 x i8> @llvm.riscv.vluxei.nxv8i8.nxv8i8( +  <vscale x 8 x i8>, +  ptr, +  <vscale x 8 x i8>, +  iXLen); + +define <vscale x 8 x i8> @intrinsic_vluxei_v_nxv8i8_nxv8i8_nxv8i8(ptr %0, <vscale x 8 x i8> %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vluxei_v_nxv8i8_nxv8i8_nxv8i8: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e8, m1, ta, ma +; CHECK-NEXT:    vluxei8.v v8, (a0), v8 +; CHECK-NEXT:    ret +entry: +  %a = call <vscale x 8 x i8> @llvm.riscv.vluxei.nxv8i8.nxv8i8( +    <vscale x 8 x i8> poison, +    ptr %0, +    <vscale x 8 x i8> %1, +    iXLen %2) + +  ret <vscale x 8 x i8> %a +} + +declare <vscale x 8 x i8> @llvm.riscv.vluxei.mask.nxv8i8.nxv8i8( +  <vscale x 8 x i8>, +  ptr, +  <vscale x 8 x i8>, +  <vscale x 8 x i1>, +  iXLen, +  iXLen); + +define <vscale x 8 x i8> @intrinsic_vluxei_mask_v_nxv8i8_nxv8i8_nxv8i8(<vscale x 8 x i8> %0, ptr %1, <vscale x 8 x i8> %2, <vscale x 8 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vluxei_mask_v_nxv8i8_nxv8i8_nxv8i8: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e8, m1, ta, mu +; CHECK-NEXT:    vluxei8.v v8, (a0), v9, v0.t +; CHECK-NEXT:    ret +entry: +  %a = call <vscale x 8 x i8> @llvm.riscv.vluxei.mask.nxv8i8.nxv8i8( +    <vscale x 8 x i8> %0, +    ptr %1, +    <vscale x 8 x i8> %2, +    <vscale x 8 x i1> %3, +    iXLen %4, iXLen 1) + +  ret <vscale x 8 x i8> %a +} + +declare <vscale x 16 x i8> @llvm.riscv.vluxei.nxv16i8.nxv16i8( +  <vscale x 16 x i8>, +  ptr, +  <vscale x 16 x i8>, +  iXLen); + +define <vscale x 16 x i8> @intrinsic_vluxei_v_nxv16i8_nxv16i8_nxv16i8(ptr %0, <vscale x 16 x i8> %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vluxei_v_nxv16i8_nxv16i8_nxv16i8: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e8, m2, ta, ma +; CHECK-NEXT:    vluxei8.v v8, (a0), v8 +; CHECK-NEXT:    ret +entry: +  %a = call <vscale x 16 x i8> @llvm.riscv.vluxei.nxv16i8.nxv16i8( +    <vscale x 16 x i8> poison, +    ptr %0, +    <vscale x 16 x i8> %1, +    iXLen %2) + +  ret <vscale x 16 x i8> %a +} + +declare <vscale x 16 x i8> @llvm.riscv.vluxei.mask.nxv16i8.nxv16i8( +  <vscale x 16 x i8>, +  ptr, +  <vscale x 16 x i8>, +  <vscale x 16 x i1>, +  iXLen, +  iXLen); + +define <vscale x 16 x i8> @intrinsic_vluxei_mask_v_nxv16i8_nxv16i8_nxv16i8(<vscale x 16 x i8> %0, ptr %1, <vscale x 16 x i8> %2, <vscale x 16 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vluxei_mask_v_nxv16i8_nxv16i8_nxv16i8: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e8, m2, ta, mu +; CHECK-NEXT:    vluxei8.v v8, (a0), v10, v0.t +; CHECK-NEXT:    ret +entry: +  %a = call <vscale x 16 x i8> @llvm.riscv.vluxei.mask.nxv16i8.nxv16i8( +    <vscale x 16 x i8> %0, +    ptr %1, +    <vscale x 16 x i8> %2, +    <vscale x 16 x i1> %3, +    iXLen %4, iXLen 1) + +  ret <vscale x 16 x i8> %a +} + +declare <vscale x 32 x i8> @llvm.riscv.vluxei.nxv32i8.nxv32i8( +  <vscale x 32 x i8>, +  ptr, +  <vscale x 32 x i8>, +  iXLen); + +define <vscale x 32 x i8> @intrinsic_vluxei_v_nxv32i8_nxv32i8_nxv32i8(ptr %0, <vscale x 32 x i8> %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vluxei_v_nxv32i8_nxv32i8_nxv32i8: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e8, m4, ta, ma +; CHECK-NEXT:    vluxei8.v v8, (a0), v8 +; CHECK-NEXT:    ret +entry: +  %a = call <vscale x 32 x i8> @llvm.riscv.vluxei.nxv32i8.nxv32i8( +    <vscale x 32 x i8> poison, +    ptr %0, +    <vscale x 32 x i8> %1, +    iXLen %2) + +  ret <vscale x 32 x i8> %a +} + +declare <vscale x 32 x i8> @llvm.riscv.vluxei.mask.nxv32i8.nxv32i8( +  <vscale x 32 x i8>, +  ptr, +  <vscale x 32 x i8>, +  <vscale x 32 x i1>, +  iXLen, +  iXLen); + +define <vscale x 32 x i8> @intrinsic_vluxei_mask_v_nxv32i8_nxv32i8_nxv32i8(<vscale x 32 x i8> %0, ptr %1, <vscale x 32 x i8> %2, <vscale x 32 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vluxei_mask_v_nxv32i8_nxv32i8_nxv32i8: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e8, m4, ta, mu +; CHECK-NEXT:    vluxei8.v v8, (a0), v12, v0.t +; CHECK-NEXT:    ret +entry: +  %a = call <vscale x 32 x i8> @llvm.riscv.vluxei.mask.nxv32i8.nxv32i8( +    <vscale x 32 x i8> %0, +    ptr %1, +    <vscale x 32 x i8> %2, +    <vscale x 32 x i1> %3, +    iXLen %4, iXLen 1) + +  ret <vscale x 32 x i8> %a +} + +declare <vscale x 64 x i8> @llvm.riscv.vluxei.nxv64i8.nxv64i8( +  <vscale x 64 x i8>, +  ptr, +  <vscale x 64 x i8>, +  iXLen); + +define <vscale x 64 x i8> @intrinsic_vluxei_v_nxv64i8_nxv64i8_nxv64i8(ptr %0, <vscale x 64 x i8> %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vluxei_v_nxv64i8_nxv64i8_nxv64i8: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e8, m8, ta, ma +; CHECK-NEXT:    vluxei8.v v8, (a0), v8 +; CHECK-NEXT:    ret +entry: +  %a = call <vscale x 64 x i8> @llvm.riscv.vluxei.nxv64i8.nxv64i8( +    <vscale x 64 x i8> poison, +    ptr %0, +    <vscale x 64 x i8> %1, +    iXLen %2) + +  ret <vscale x 64 x i8> %a +} + +declare <vscale x 64 x i8> @llvm.riscv.vluxei.mask.nxv64i8.nxv64i8( +  <vscale x 64 x i8>, +  ptr, +  <vscale x 64 x i8>, +  <vscale x 64 x i1>, +  iXLen, +  iXLen); + +define <vscale x 64 x i8> @intrinsic_vluxei_mask_v_nxv64i8_nxv64i8_nxv64i8(<vscale x 64 x i8> %0, ptr %1, <vscale x 64 x i8> %2, <vscale x 64 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vluxei_mask_v_nxv64i8_nxv64i8_nxv64i8: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e8, m8, ta, mu +; CHECK-NEXT:    vluxei8.v v8, (a0), v16, v0.t +; CHECK-NEXT:    ret +entry: +  %a = call <vscale x 64 x i8> @llvm.riscv.vluxei.mask.nxv64i8.nxv64i8( +    <vscale x 64 x i8> %0, +    ptr %1, +    <vscale x 64 x i8> %2, +    <vscale x 64 x i1> %3, +    iXLen %4, iXLen 1) + +  ret <vscale x 64 x i8> %a +} + +declare <vscale x 1 x i16> @llvm.riscv.vluxei.nxv1i16.nxv1i8( +  <vscale x 1 x i16>, +  ptr, +  <vscale x 1 x i8>, +  iXLen); + +define <vscale x 1 x i16> @intrinsic_vluxei_v_nxv1i16_nxv1i16_nxv1i8(ptr %0, <vscale x 1 x i8> %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vluxei_v_nxv1i16_nxv1i16_nxv1i8: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e16, mf4, ta, ma +; CHECK-NEXT:    vluxei8.v v9, (a0), v8 +; CHECK-NEXT:    vmv1r.v v8, v9 +; CHECK-NEXT:    ret +entry: +  %a = call <vscale x 1 x i16> @llvm.riscv.vluxei.nxv1i16.nxv1i8( +    <vscale x 1 x i16> poison, +    ptr %0, +    <vscale x 1 x i8> %1, +    iXLen %2) + +  ret <vscale x 1 x i16> %a +} + +declare <vscale x 1 x i16> @llvm.riscv.vluxei.mask.nxv1i16.nxv1i8( +  <vscale x 1 x i16>, +  ptr, +  <vscale x 1 x i8>, +  <vscale x 1 x i1>, +  iXLen, +  iXLen); + +define <vscale x 1 x i16> @intrinsic_vluxei_mask_v_nxv1i16_nxv1i16_nxv1i8(<vscale x 1 x i16> %0, ptr %1, <vscale x 1 x i8> %2, <vscale x 1 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vluxei_mask_v_nxv1i16_nxv1i16_nxv1i8: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e16, mf4, ta, mu +; CHECK-NEXT:    vluxei8.v v8, (a0), v9, v0.t +; CHECK-NEXT:    ret +entry: +  %a = call <vscale x 1 x i16> @llvm.riscv.vluxei.mask.nxv1i16.nxv1i8( +    <vscale x 1 x i16> %0, +    ptr %1, +    <vscale x 1 x i8> %2, +    <vscale x 1 x i1> %3, +    iXLen %4, iXLen 1) + +  ret <vscale x 1 x i16> %a +} + +declare <vscale x 2 x i16> @llvm.riscv.vluxei.nxv2i16.nxv2i8( +  <vscale x 2 x i16>, +  ptr, +  <vscale x 2 x i8>, +  iXLen); + +define <vscale x 2 x i16> @intrinsic_vluxei_v_nxv2i16_nxv2i16_nxv2i8(ptr %0, <vscale x 2 x i8> %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vluxei_v_nxv2i16_nxv2i16_nxv2i8: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e16, mf2, ta, ma +; CHECK-NEXT:    vluxei8.v v9, (a0), v8 +; CHECK-NEXT:    vmv1r.v v8, v9 +; CHECK-NEXT:    ret +entry: +  %a = call <vscale x 2 x i16> @llvm.riscv.vluxei.nxv2i16.nxv2i8( +    <vscale x 2 x i16> poison, +    ptr %0, +    <vscale x 2 x i8> %1, +    iXLen %2) + +  ret <vscale x 2 x i16> %a +} + +declare <vscale x 2 x i16> @llvm.riscv.vluxei.mask.nxv2i16.nxv2i8( +  <vscale x 2 x i16>, +  ptr, +  <vscale x 2 x i8>, +  <vscale x 2 x i1>, +  iXLen, +  iXLen); + +define <vscale x 2 x i16> @intrinsic_vluxei_mask_v_nxv2i16_nxv2i16_nxv2i8(<vscale x 2 x i16> %0, ptr %1, <vscale x 2 x i8> %2, <vscale x 2 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vluxei_mask_v_nxv2i16_nxv2i16_nxv2i8: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e16, mf2, ta, mu +; CHECK-NEXT:    vluxei8.v v8, (a0), v9, v0.t +; CHECK-NEXT:    ret +entry: +  %a = call <vscale x 2 x i16> @llvm.riscv.vluxei.mask.nxv2i16.nxv2i8( +    <vscale x 2 x i16> %0, +    ptr %1, +    <vscale x 2 x i8> %2, +    <vscale x 2 x i1> %3, +    iXLen %4, iXLen 1) + +  ret <vscale x 2 x i16> %a +} + +declare <vscale x 4 x i16> @llvm.riscv.vluxei.nxv4i16.nxv4i8( +  <vscale x 4 x i16>, +  ptr, +  <vscale x 4 x i8>, +  iXLen); + +define <vscale x 4 x i16> @intrinsic_vluxei_v_nxv4i16_nxv4i16_nxv4i8(ptr %0, <vscale x 4 x i8> %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vluxei_v_nxv4i16_nxv4i16_nxv4i8: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e16, m1, ta, ma +; CHECK-NEXT:    vluxei8.v v9, (a0), v8 +; CHECK-NEXT:    vmv.v.v v8, v9 +; CHECK-NEXT:    ret +entry: +  %a = call <vscale x 4 x i16> @llvm.riscv.vluxei.nxv4i16.nxv4i8( +    <vscale x 4 x i16> poison, +    ptr %0, +    <vscale x 4 x i8> %1, +    iXLen %2) + +  ret <vscale x 4 x i16> %a +} + +declare <vscale x 4 x i16> @llvm.riscv.vluxei.mask.nxv4i16.nxv4i8( +  <vscale x 4 x i16>, +  ptr, +  <vscale x 4 x i8>, +  <vscale x 4 x i1>, +  iXLen, +  iXLen); + +define <vscale x 4 x i16> @intrinsic_vluxei_mask_v_nxv4i16_nxv4i16_nxv4i8(<vscale x 4 x i16> %0, ptr %1, <vscale x 4 x i8> %2, <vscale x 4 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vluxei_mask_v_nxv4i16_nxv4i16_nxv4i8: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e16, m1, ta, mu +; CHECK-NEXT:    vluxei8.v v8, (a0), v9, v0.t +; CHECK-NEXT:    ret +entry: +  %a = call <vscale x 4 x i16> @llvm.riscv.vluxei.mask.nxv4i16.nxv4i8( +    <vscale x 4 x i16> %0, +    ptr %1, +    <vscale x 4 x i8> %2, +    <vscale x 4 x i1> %3, +    iXLen %4, iXLen 1) + +  ret <vscale x 4 x i16> %a +} + +declare <vscale x 8 x i16> @llvm.riscv.vluxei.nxv8i16.nxv8i8( +  <vscale x 8 x i16>, +  ptr, +  <vscale x 8 x i8>, +  iXLen); + +define <vscale x 8 x i16> @intrinsic_vluxei_v_nxv8i16_nxv8i16_nxv8i8(ptr %0, <vscale x 8 x i8> %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vluxei_v_nxv8i16_nxv8i16_nxv8i8: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetivli zero, 1, e8, m1, ta, ma +; CHECK-NEXT:    vmv1r.v v10, v8 +; CHECK-NEXT:    vsetvli zero, a1, e16, m2, ta, ma +; CHECK-NEXT:    vluxei8.v v8, (a0), v10 +; CHECK-NEXT:    ret +entry: +  %a = call <vscale x 8 x i16> @llvm.riscv.vluxei.nxv8i16.nxv8i8( +    <vscale x 8 x i16> poison, +    ptr %0, +    <vscale x 8 x i8> %1, +    iXLen %2) + +  ret <vscale x 8 x i16> %a +} + +declare <vscale x 8 x i16> @llvm.riscv.vluxei.mask.nxv8i16.nxv8i8( +  <vscale x 8 x i16>, +  ptr, +  <vscale x 8 x i8>, +  <vscale x 8 x i1>, +  iXLen, +  iXLen); + +define <vscale x 8 x i16> @intrinsic_vluxei_mask_v_nxv8i16_nxv8i16_nxv8i8(<vscale x 8 x i16> %0, ptr %1, <vscale x 8 x i8> %2, <vscale x 8 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vluxei_mask_v_nxv8i16_nxv8i16_nxv8i8: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e16, m2, ta, mu +; CHECK-NEXT:    vluxei8.v v8, (a0), v10, v0.t +; CHECK-NEXT:    ret +entry: +  %a = call <vscale x 8 x i16> @llvm.riscv.vluxei.mask.nxv8i16.nxv8i8( +    <vscale x 8 x i16> %0, +    ptr %1, +    <vscale x 8 x i8> %2, +    <vscale x 8 x i1> %3, +    iXLen %4, iXLen 1) + +  ret <vscale x 8 x i16> %a +} + +declare <vscale x 16 x i16> @llvm.riscv.vluxei.nxv16i16.nxv16i8( +  <vscale x 16 x i16>, +  ptr, +  <vscale x 16 x i8>, +  iXLen); + +define <vscale x 16 x i16> @intrinsic_vluxei_v_nxv16i16_nxv16i16_nxv16i8(ptr %0, <vscale x 16 x i8> %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vluxei_v_nxv16i16_nxv16i16_nxv16i8: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetivli zero, 1, e8, m1, ta, ma +; CHECK-NEXT:    vmv2r.v v12, v8 +; CHECK-NEXT:    vsetvli zero, a1, e16, m4, ta, ma +; CHECK-NEXT:    vluxei8.v v8, (a0), v12 +; CHECK-NEXT:    ret +entry: +  %a = call <vscale x 16 x i16> @llvm.riscv.vluxei.nxv16i16.nxv16i8( +    <vscale x 16 x i16> poison, +    ptr %0, +    <vscale x 16 x i8> %1, +    iXLen %2) + +  ret <vscale x 16 x i16> %a +} + +declare <vscale x 16 x i16> @llvm.riscv.vluxei.mask.nxv16i16.nxv16i8( +  <vscale x 16 x i16>, +  ptr, +  <vscale x 16 x i8>, +  <vscale x 16 x i1>, +  iXLen, +  iXLen); + +define <vscale x 16 x i16> @intrinsic_vluxei_mask_v_nxv16i16_nxv16i16_nxv16i8(<vscale x 16 x i16> %0, ptr %1, <vscale x 16 x i8> %2, <vscale x 16 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vluxei_mask_v_nxv16i16_nxv16i16_nxv16i8: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e16, m4, ta, mu +; CHECK-NEXT:    vluxei8.v v8, (a0), v12, v0.t +; CHECK-NEXT:    ret +entry: +  %a = call <vscale x 16 x i16> @llvm.riscv.vluxei.mask.nxv16i16.nxv16i8( +    <vscale x 16 x i16> %0, +    ptr %1, +    <vscale x 16 x i8> %2, +    <vscale x 16 x i1> %3, +    iXLen %4, iXLen 1) + +  ret <vscale x 16 x i16> %a +} + +declare <vscale x 32 x i16> @llvm.riscv.vluxei.nxv32i16.nxv32i8( +  <vscale x 32 x i16>, +  ptr, +  <vscale x 32 x i8>, +  iXLen); + +define <vscale x 32 x i16> @intrinsic_vluxei_v_nxv32i16_nxv32i16_nxv32i8(ptr %0, <vscale x 32 x i8> %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vluxei_v_nxv32i16_nxv32i16_nxv32i8: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetivli zero, 1, e8, m1, ta, ma +; CHECK-NEXT:    vmv4r.v v16, v8 +; CHECK-NEXT:    vsetvli zero, a1, e16, m8, ta, ma +; CHECK-NEXT:    vluxei8.v v8, (a0), v16 +; CHECK-NEXT:    ret +entry: +  %a = call <vscale x 32 x i16> @llvm.riscv.vluxei.nxv32i16.nxv32i8( +    <vscale x 32 x i16> poison, +    ptr %0, +    <vscale x 32 x i8> %1, +    iXLen %2) + +  ret <vscale x 32 x i16> %a +} + +declare <vscale x 32 x i16> @llvm.riscv.vluxei.mask.nxv32i16.nxv32i8( +  <vscale x 32 x i16>, +  ptr, +  <vscale x 32 x i8>, +  <vscale x 32 x i1>, +  iXLen, +  iXLen); + +define <vscale x 32 x i16> @intrinsic_vluxei_mask_v_nxv32i16_nxv32i16_nxv32i8(<vscale x 32 x i16> %0, ptr %1, <vscale x 32 x i8> %2, <vscale x 32 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vluxei_mask_v_nxv32i16_nxv32i16_nxv32i8: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e16, m8, ta, mu +; CHECK-NEXT:    vluxei8.v v8, (a0), v16, v0.t +; CHECK-NEXT:    ret +entry: +  %a = call <vscale x 32 x i16> @llvm.riscv.vluxei.mask.nxv32i16.nxv32i8( +    <vscale x 32 x i16> %0, +    ptr %1, +    <vscale x 32 x i8> %2, +    <vscale x 32 x i1> %3, +    iXLen %4, iXLen 1) + +  ret <vscale x 32 x i16> %a +} + +declare <vscale x 1 x i32> @llvm.riscv.vluxei.nxv1i32.nxv1i8( +  <vscale x 1 x i32>, +  ptr, +  <vscale x 1 x i8>, +  iXLen); + +define <vscale x 1 x i32> @intrinsic_vluxei_v_nxv1i32_nxv1i32_nxv1i8(ptr %0, <vscale x 1 x i8> %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vluxei_v_nxv1i32_nxv1i32_nxv1i8: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e32, mf2, ta, ma +; CHECK-NEXT:    vluxei8.v v9, (a0), v8 +; CHECK-NEXT:    vmv1r.v v8, v9 +; CHECK-NEXT:    ret +entry: +  %a = call <vscale x 1 x i32> @llvm.riscv.vluxei.nxv1i32.nxv1i8( +    <vscale x 1 x i32> poison, +    ptr %0, +    <vscale x 1 x i8> %1, +    iXLen %2) + +  ret <vscale x 1 x i32> %a +} + +declare <vscale x 1 x i32> @llvm.riscv.vluxei.mask.nxv1i32.nxv1i8( +  <vscale x 1 x i32>, +  ptr, +  <vscale x 1 x i8>, +  <vscale x 1 x i1>, +  iXLen, +  iXLen); + +define <vscale x 1 x i32> @intrinsic_vluxei_mask_v_nxv1i32_nxv1i32_nxv1i8(<vscale x 1 x i32> %0, ptr %1, <vscale x 1 x i8> %2, <vscale x 1 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vluxei_mask_v_nxv1i32_nxv1i32_nxv1i8: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e32, mf2, ta, mu +; CHECK-NEXT:    vluxei8.v v8, (a0), v9, v0.t +; CHECK-NEXT:    ret +entry: +  %a = call <vscale x 1 x i32> @llvm.riscv.vluxei.mask.nxv1i32.nxv1i8( +    <vscale x 1 x i32> %0, +    ptr %1, +    <vscale x 1 x i8> %2, +    <vscale x 1 x i1> %3, +    iXLen %4, iXLen 1) + +  ret <vscale x 1 x i32> %a +} + +declare <vscale x 2 x i32> @llvm.riscv.vluxei.nxv2i32.nxv2i8( +  <vscale x 2 x i32>, +  ptr, +  <vscale x 2 x i8>, +  iXLen); + +define <vscale x 2 x i32> @intrinsic_vluxei_v_nxv2i32_nxv2i32_nxv2i8(ptr %0, <vscale x 2 x i8> %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vluxei_v_nxv2i32_nxv2i32_nxv2i8: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e32, m1, ta, ma +; CHECK-NEXT:    vluxei8.v v9, (a0), v8 +; CHECK-NEXT:    vmv.v.v v8, v9 +; CHECK-NEXT:    ret +entry: +  %a = call <vscale x 2 x i32> @llvm.riscv.vluxei.nxv2i32.nxv2i8( +    <vscale x 2 x i32> poison, +    ptr %0, +    <vscale x 2 x i8> %1, +    iXLen %2) + +  ret <vscale x 2 x i32> %a +} + +declare <vscale x 2 x i32> @llvm.riscv.vluxei.mask.nxv2i32.nxv2i8( +  <vscale x 2 x i32>, +  ptr, +  <vscale x 2 x i8>, +  <vscale x 2 x i1>, +  iXLen, +  iXLen); + +define <vscale x 2 x i32> @intrinsic_vluxei_mask_v_nxv2i32_nxv2i32_nxv2i8(<vscale x 2 x i32> %0, ptr %1, <vscale x 2 x i8> %2, <vscale x 2 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vluxei_mask_v_nxv2i32_nxv2i32_nxv2i8: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e32, m1, ta, mu +; CHECK-NEXT:    vluxei8.v v8, (a0), v9, v0.t +; CHECK-NEXT:    ret +entry: +  %a = call <vscale x 2 x i32> @llvm.riscv.vluxei.mask.nxv2i32.nxv2i8( +    <vscale x 2 x i32> %0, +    ptr %1, +    <vscale x 2 x i8> %2, +    <vscale x 2 x i1> %3, +    iXLen %4, iXLen 1) + +  ret <vscale x 2 x i32> %a +} + +declare <vscale x 4 x i32> @llvm.riscv.vluxei.nxv4i32.nxv4i8( +  <vscale x 4 x i32>, +  ptr, +  <vscale x 4 x i8>, +  iXLen); + +define <vscale x 4 x i32> @intrinsic_vluxei_v_nxv4i32_nxv4i32_nxv4i8(ptr %0, <vscale x 4 x i8> %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vluxei_v_nxv4i32_nxv4i32_nxv4i8: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetivli zero, 1, e8, m1, ta, ma +; CHECK-NEXT:    vmv1r.v v10, v8 +; CHECK-NEXT:    vsetvli zero, a1, e32, m2, ta, ma +; CHECK-NEXT:    vluxei8.v v8, (a0), v10 +; CHECK-NEXT:    ret +entry: +  %a = call <vscale x 4 x i32> @llvm.riscv.vluxei.nxv4i32.nxv4i8( +    <vscale x 4 x i32> poison, +    ptr %0, +    <vscale x 4 x i8> %1, +    iXLen %2) + +  ret <vscale x 4 x i32> %a +} + +declare <vscale x 4 x i32> @llvm.riscv.vluxei.mask.nxv4i32.nxv4i8( +  <vscale x 4 x i32>, +  ptr, +  <vscale x 4 x i8>, +  <vscale x 4 x i1>, +  iXLen, +  iXLen); + +define <vscale x 4 x i32> @intrinsic_vluxei_mask_v_nxv4i32_nxv4i32_nxv4i8(<vscale x 4 x i32> %0, ptr %1, <vscale x 4 x i8> %2, <vscale x 4 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vluxei_mask_v_nxv4i32_nxv4i32_nxv4i8: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e32, m2, ta, mu +; CHECK-NEXT:    vluxei8.v v8, (a0), v10, v0.t +; CHECK-NEXT:    ret +entry: +  %a = call <vscale x 4 x i32> @llvm.riscv.vluxei.mask.nxv4i32.nxv4i8( +    <vscale x 4 x i32> %0, +    ptr %1, +    <vscale x 4 x i8> %2, +    <vscale x 4 x i1> %3, +    iXLen %4, iXLen 1) + +  ret <vscale x 4 x i32> %a +} + +declare <vscale x 8 x i32> @llvm.riscv.vluxei.nxv8i32.nxv8i8( +  <vscale x 8 x i32>, +  ptr, +  <vscale x 8 x i8>, +  iXLen); + +define <vscale x 8 x i32> @intrinsic_vluxei_v_nxv8i32_nxv8i32_nxv8i8(ptr %0, <vscale x 8 x i8> %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vluxei_v_nxv8i32_nxv8i32_nxv8i8: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetivli zero, 1, e8, m1, ta, ma +; CHECK-NEXT:    vmv1r.v v12, v8 +; CHECK-NEXT:    vsetvli zero, a1, e32, m4, ta, ma +; CHECK-NEXT:    vluxei8.v v8, (a0), v12 +; CHECK-NEXT:    ret +entry: +  %a = call <vscale x 8 x i32> @llvm.riscv.vluxei.nxv8i32.nxv8i8( +    <vscale x 8 x i32> poison, +    ptr %0, +    <vscale x 8 x i8> %1, +    iXLen %2) + +  ret <vscale x 8 x i32> %a +} + +declare <vscale x 8 x i32> @llvm.riscv.vluxei.mask.nxv8i32.nxv8i8( +  <vscale x 8 x i32>, +  ptr, +  <vscale x 8 x i8>, +  <vscale x 8 x i1>, +  iXLen, +  iXLen); + +define <vscale x 8 x i32> @intrinsic_vluxei_mask_v_nxv8i32_nxv8i32_nxv8i8(<vscale x 8 x i32> %0, ptr %1, <vscale x 8 x i8> %2, <vscale x 8 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vluxei_mask_v_nxv8i32_nxv8i32_nxv8i8: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e32, m4, ta, mu +; CHECK-NEXT:    vluxei8.v v8, (a0), v12, v0.t +; CHECK-NEXT:    ret +entry: +  %a = call <vscale x 8 x i32> @llvm.riscv.vluxei.mask.nxv8i32.nxv8i8( +    <vscale x 8 x i32> %0, +    ptr %1, +    <vscale x 8 x i8> %2, +    <vscale x 8 x i1> %3, +    iXLen %4, iXLen 1) + +  ret <vscale x 8 x i32> %a +} + +declare <vscale x 16 x i32> @llvm.riscv.vluxei.nxv16i32.nxv16i8( +  <vscale x 16 x i32>, +  ptr, +  <vscale x 16 x i8>, +  iXLen); + +define <vscale x 16 x i32> @intrinsic_vluxei_v_nxv16i32_nxv16i32_nxv16i8(ptr %0, <vscale x 16 x i8> %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vluxei_v_nxv16i32_nxv16i32_nxv16i8: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetivli zero, 1, e8, m1, ta, ma +; CHECK-NEXT:    vmv2r.v v16, v8 +; CHECK-NEXT:    vsetvli zero, a1, e32, m8, ta, ma +; CHECK-NEXT:    vluxei8.v v8, (a0), v16 +; CHECK-NEXT:    ret +entry: +  %a = call <vscale x 16 x i32> @llvm.riscv.vluxei.nxv16i32.nxv16i8( +    <vscale x 16 x i32> poison, +    ptr %0, +    <vscale x 16 x i8> %1, +    iXLen %2) + +  ret <vscale x 16 x i32> %a +} + +declare <vscale x 16 x i32> @llvm.riscv.vluxei.mask.nxv16i32.nxv16i8( +  <vscale x 16 x i32>, +  ptr, +  <vscale x 16 x i8>, +  <vscale x 16 x i1>, +  iXLen, +  iXLen); + +define <vscale x 16 x i32> @intrinsic_vluxei_mask_v_nxv16i32_nxv16i32_nxv16i8(<vscale x 16 x i32> %0, ptr %1, <vscale x 16 x i8> %2, <vscale x 16 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vluxei_mask_v_nxv16i32_nxv16i32_nxv16i8: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e32, m8, ta, mu +; CHECK-NEXT:    vluxei8.v v8, (a0), v16, v0.t +; CHECK-NEXT:    ret +entry: +  %a = call <vscale x 16 x i32> @llvm.riscv.vluxei.mask.nxv16i32.nxv16i8( +    <vscale x 16 x i32> %0, +    ptr %1, +    <vscale x 16 x i8> %2, +    <vscale x 16 x i1> %3, +    iXLen %4, iXLen 1) + +  ret <vscale x 16 x i32> %a +} + +declare <vscale x 1 x i64> @llvm.riscv.vluxei.nxv1i64.nxv1i8( +  <vscale x 1 x i64>, +  ptr, +  <vscale x 1 x i8>, +  iXLen); + +define <vscale x 1 x i64> @intrinsic_vluxei_v_nxv1i64_nxv1i64_nxv1i8(ptr %0, <vscale x 1 x i8> %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vluxei_v_nxv1i64_nxv1i64_nxv1i8: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e64, m1, ta, ma +; CHECK-NEXT:    vluxei8.v v9, (a0), v8 +; CHECK-NEXT:    vmv.v.v v8, v9 +; CHECK-NEXT:    ret +entry: +  %a = call <vscale x 1 x i64> @llvm.riscv.vluxei.nxv1i64.nxv1i8( +    <vscale x 1 x i64> poison, +    ptr %0, +    <vscale x 1 x i8> %1, +    iXLen %2) + +  ret <vscale x 1 x i64> %a +} + +declare <vscale x 1 x i64> @llvm.riscv.vluxei.mask.nxv1i64.nxv1i8( +  <vscale x 1 x i64>, +  ptr, +  <vscale x 1 x i8>, +  <vscale x 1 x i1>, +  iXLen, +  iXLen); + +define <vscale x 1 x i64> @intrinsic_vluxei_mask_v_nxv1i64_nxv1i64_nxv1i8(<vscale x 1 x i64> %0, ptr %1, <vscale x 1 x i8> %2, <vscale x 1 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vluxei_mask_v_nxv1i64_nxv1i64_nxv1i8: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e64, m1, ta, mu +; CHECK-NEXT:    vluxei8.v v8, (a0), v9, v0.t +; CHECK-NEXT:    ret +entry: +  %a = call <vscale x 1 x i64> @llvm.riscv.vluxei.mask.nxv1i64.nxv1i8( +    <vscale x 1 x i64> %0, +    ptr %1, +    <vscale x 1 x i8> %2, +    <vscale x 1 x i1> %3, +    iXLen %4, iXLen 1) + +  ret <vscale x 1 x i64> %a +} + +declare <vscale x 2 x i64> @llvm.riscv.vluxei.nxv2i64.nxv2i8( +  <vscale x 2 x i64>, +  ptr, +  <vscale x 2 x i8>, +  iXLen); + +define <vscale x 2 x i64> @intrinsic_vluxei_v_nxv2i64_nxv2i64_nxv2i8(ptr %0, <vscale x 2 x i8> %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vluxei_v_nxv2i64_nxv2i64_nxv2i8: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetivli zero, 1, e8, m1, ta, ma +; CHECK-NEXT:    vmv1r.v v10, v8 +; CHECK-NEXT:    vsetvli zero, a1, e64, m2, ta, ma +; CHECK-NEXT:    vluxei8.v v8, (a0), v10 +; CHECK-NEXT:    ret +entry: +  %a = call <vscale x 2 x i64> @llvm.riscv.vluxei.nxv2i64.nxv2i8( +    <vscale x 2 x i64> poison, +    ptr %0, +    <vscale x 2 x i8> %1, +    iXLen %2) + +  ret <vscale x 2 x i64> %a +} + +declare <vscale x 2 x i64> @llvm.riscv.vluxei.mask.nxv2i64.nxv2i8( +  <vscale x 2 x i64>, +  ptr, +  <vscale x 2 x i8>, +  <vscale x 2 x i1>, +  iXLen, +  iXLen); + +define <vscale x 2 x i64> @intrinsic_vluxei_mask_v_nxv2i64_nxv2i64_nxv2i8(<vscale x 2 x i64> %0, ptr %1, <vscale x 2 x i8> %2, <vscale x 2 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vluxei_mask_v_nxv2i64_nxv2i64_nxv2i8: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e64, m2, ta, mu +; CHECK-NEXT:    vluxei8.v v8, (a0), v10, v0.t +; CHECK-NEXT:    ret +entry: +  %a = call <vscale x 2 x i64> @llvm.riscv.vluxei.mask.nxv2i64.nxv2i8( +    <vscale x 2 x i64> %0, +    ptr %1, +    <vscale x 2 x i8> %2, +    <vscale x 2 x i1> %3, +    iXLen %4, iXLen 1) + +  ret <vscale x 2 x i64> %a +} + +declare <vscale x 4 x i64> @llvm.riscv.vluxei.nxv4i64.nxv4i8( +  <vscale x 4 x i64>, +  ptr, +  <vscale x 4 x i8>, +  iXLen); + +define <vscale x 4 x i64> @intrinsic_vluxei_v_nxv4i64_nxv4i64_nxv4i8(ptr %0, <vscale x 4 x i8> %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vluxei_v_nxv4i64_nxv4i64_nxv4i8: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetivli zero, 1, e8, m1, ta, ma +; CHECK-NEXT:    vmv1r.v v12, v8 +; CHECK-NEXT:    vsetvli zero, a1, e64, m4, ta, ma +; CHECK-NEXT:    vluxei8.v v8, (a0), v12 +; CHECK-NEXT:    ret +entry: +  %a = call <vscale x 4 x i64> @llvm.riscv.vluxei.nxv4i64.nxv4i8( +    <vscale x 4 x i64> poison, +    ptr %0, +    <vscale x 4 x i8> %1, +    iXLen %2) + +  ret <vscale x 4 x i64> %a +} + +declare <vscale x 4 x i64> @llvm.riscv.vluxei.mask.nxv4i64.nxv4i8( +  <vscale x 4 x i64>, +  ptr, +  <vscale x 4 x i8>, +  <vscale x 4 x i1>, +  iXLen, +  iXLen); + +define <vscale x 4 x i64> @intrinsic_vluxei_mask_v_nxv4i64_nxv4i64_nxv4i8(<vscale x 4 x i64> %0, ptr %1, <vscale x 4 x i8> %2, <vscale x 4 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vluxei_mask_v_nxv4i64_nxv4i64_nxv4i8: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e64, m4, ta, mu +; CHECK-NEXT:    vluxei8.v v8, (a0), v12, v0.t +; CHECK-NEXT:    ret +entry: +  %a = call <vscale x 4 x i64> @llvm.riscv.vluxei.mask.nxv4i64.nxv4i8( +    <vscale x 4 x i64> %0, +    ptr %1, +    <vscale x 4 x i8> %2, +    <vscale x 4 x i1> %3, +    iXLen %4, iXLen 1) + +  ret <vscale x 4 x i64> %a +} + +declare <vscale x 8 x i64> @llvm.riscv.vluxei.nxv8i64.nxv8i8( +  <vscale x 8 x i64>, +  ptr, +  <vscale x 8 x i8>, +  iXLen); + +define <vscale x 8 x i64> @intrinsic_vluxei_v_nxv8i64_nxv8i64_nxv8i8(ptr %0, <vscale x 8 x i8> %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vluxei_v_nxv8i64_nxv8i64_nxv8i8: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetivli zero, 1, e8, m1, ta, ma +; CHECK-NEXT:    vmv1r.v v16, v8 +; CHECK-NEXT:    vsetvli zero, a1, e64, m8, ta, ma +; CHECK-NEXT:    vluxei8.v v8, (a0), v16 +; CHECK-NEXT:    ret +entry: +  %a = call <vscale x 8 x i64> @llvm.riscv.vluxei.nxv8i64.nxv8i8( +    <vscale x 8 x i64> poison, +    ptr %0, +    <vscale x 8 x i8> %1, +    iXLen %2) + +  ret <vscale x 8 x i64> %a +} + +declare <vscale x 8 x i64> @llvm.riscv.vluxei.mask.nxv8i64.nxv8i8( +  <vscale x 8 x i64>, +  ptr, +  <vscale x 8 x i8>, +  <vscale x 8 x i1>, +  iXLen, +  iXLen); + +define <vscale x 8 x i64> @intrinsic_vluxei_mask_v_nxv8i64_nxv8i64_nxv8i8(<vscale x 8 x i64> %0, ptr %1, <vscale x 8 x i8> %2, <vscale x 8 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vluxei_mask_v_nxv8i64_nxv8i64_nxv8i8: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e64, m8, ta, mu +; CHECK-NEXT:    vluxei8.v v8, (a0), v16, v0.t +; CHECK-NEXT:    ret +entry: +  %a = call <vscale x 8 x i64> @llvm.riscv.vluxei.mask.nxv8i64.nxv8i8( +    <vscale x 8 x i64> %0, +    ptr %1, +    <vscale x 8 x i8> %2, +    <vscale x 8 x i1> %3, +    iXLen %4, iXLen 1) + +  ret <vscale x 8 x i64> %a +} + +declare <vscale x 1 x half> @llvm.riscv.vluxei.nxv1f16.nxv1i8( +  <vscale x 1 x half>, +  ptr, +  <vscale x 1 x i8>, +  iXLen); + +define <vscale x 1 x half> @intrinsic_vluxei_v_nxv1f16_nxv1f16_nxv1i8(ptr %0, <vscale x 1 x i8> %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vluxei_v_nxv1f16_nxv1f16_nxv1i8: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e16, mf4, ta, ma +; CHECK-NEXT:    vluxei8.v v9, (a0), v8 +; CHECK-NEXT:    vmv1r.v v8, v9 +; CHECK-NEXT:    ret +entry: +  %a = call <vscale x 1 x half> @llvm.riscv.vluxei.nxv1f16.nxv1i8( +    <vscale x 1 x half> poison, +    ptr %0, +    <vscale x 1 x i8> %1, +    iXLen %2) + +  ret <vscale x 1 x half> %a +} + +declare <vscale x 1 x half> @llvm.riscv.vluxei.mask.nxv1f16.nxv1i8( +  <vscale x 1 x half>, +  ptr, +  <vscale x 1 x i8>, +  <vscale x 1 x i1>, +  iXLen, +  iXLen); + +define <vscale x 1 x half> @intrinsic_vluxei_mask_v_nxv1f16_nxv1f16_nxv1i8(<vscale x 1 x half> %0, ptr %1, <vscale x 1 x i8> %2, <vscale x 1 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vluxei_mask_v_nxv1f16_nxv1f16_nxv1i8: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e16, mf4, ta, mu +; CHECK-NEXT:    vluxei8.v v8, (a0), v9, v0.t +; CHECK-NEXT:    ret +entry: +  %a = call <vscale x 1 x half> @llvm.riscv.vluxei.mask.nxv1f16.nxv1i8( +    <vscale x 1 x half> %0, +    ptr %1, +    <vscale x 1 x i8> %2, +    <vscale x 1 x i1> %3, +    iXLen %4, iXLen 1) + +  ret <vscale x 1 x half> %a +} + +declare <vscale x 2 x half> @llvm.riscv.vluxei.nxv2f16.nxv2i8( +  <vscale x 2 x half>, +  ptr, +  <vscale x 2 x i8>, +  iXLen); + +define <vscale x 2 x half> @intrinsic_vluxei_v_nxv2f16_nxv2f16_nxv2i8(ptr %0, <vscale x 2 x i8> %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vluxei_v_nxv2f16_nxv2f16_nxv2i8: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e16, mf2, ta, ma +; CHECK-NEXT:    vluxei8.v v9, (a0), v8 +; CHECK-NEXT:    vmv1r.v v8, v9 +; CHECK-NEXT:    ret +entry: +  %a = call <vscale x 2 x half> @llvm.riscv.vluxei.nxv2f16.nxv2i8( +    <vscale x 2 x half> poison, +    ptr %0, +    <vscale x 2 x i8> %1, +    iXLen %2) + +  ret <vscale x 2 x half> %a +} + +declare <vscale x 2 x half> @llvm.riscv.vluxei.mask.nxv2f16.nxv2i8( +  <vscale x 2 x half>, +  ptr, +  <vscale x 2 x i8>, +  <vscale x 2 x i1>, +  iXLen, +  iXLen); + +define <vscale x 2 x half> @intrinsic_vluxei_mask_v_nxv2f16_nxv2f16_nxv2i8(<vscale x 2 x half> %0, ptr %1, <vscale x 2 x i8> %2, <vscale x 2 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vluxei_mask_v_nxv2f16_nxv2f16_nxv2i8: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e16, mf2, ta, mu +; CHECK-NEXT:    vluxei8.v v8, (a0), v9, v0.t +; CHECK-NEXT:    ret +entry: +  %a = call <vscale x 2 x half> @llvm.riscv.vluxei.mask.nxv2f16.nxv2i8( +    <vscale x 2 x half> %0, +    ptr %1, +    <vscale x 2 x i8> %2, +    <vscale x 2 x i1> %3, +    iXLen %4, iXLen 1) + +  ret <vscale x 2 x half> %a +} + +declare <vscale x 4 x half> @llvm.riscv.vluxei.nxv4f16.nxv4i8( +  <vscale x 4 x half>, +  ptr, +  <vscale x 4 x i8>, +  iXLen); + +define <vscale x 4 x half> @intrinsic_vluxei_v_nxv4f16_nxv4f16_nxv4i8(ptr %0, <vscale x 4 x i8> %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vluxei_v_nxv4f16_nxv4f16_nxv4i8: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e16, m1, ta, ma +; CHECK-NEXT:    vluxei8.v v9, (a0), v8 +; CHECK-NEXT:    vmv.v.v v8, v9 +; CHECK-NEXT:    ret +entry: +  %a = call <vscale x 4 x half> @llvm.riscv.vluxei.nxv4f16.nxv4i8( +    <vscale x 4 x half> poison, +    ptr %0, +    <vscale x 4 x i8> %1, +    iXLen %2) + +  ret <vscale x 4 x half> %a +} + +declare <vscale x 4 x half> @llvm.riscv.vluxei.mask.nxv4f16.nxv4i8( +  <vscale x 4 x half>, +  ptr, +  <vscale x 4 x i8>, +  <vscale x 4 x i1>, +  iXLen, +  iXLen); + +define <vscale x 4 x half> @intrinsic_vluxei_mask_v_nxv4f16_nxv4f16_nxv4i8(<vscale x 4 x half> %0, ptr %1, <vscale x 4 x i8> %2, <vscale x 4 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vluxei_mask_v_nxv4f16_nxv4f16_nxv4i8: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e16, m1, ta, mu +; CHECK-NEXT:    vluxei8.v v8, (a0), v9, v0.t +; CHECK-NEXT:    ret +entry: +  %a = call <vscale x 4 x half> @llvm.riscv.vluxei.mask.nxv4f16.nxv4i8( +    <vscale x 4 x half> %0, +    ptr %1, +    <vscale x 4 x i8> %2, +    <vscale x 4 x i1> %3, +    iXLen %4, iXLen 1) + +  ret <vscale x 4 x half> %a +} + +declare <vscale x 8 x half> @llvm.riscv.vluxei.nxv8f16.nxv8i8( +  <vscale x 8 x half>, +  ptr, +  <vscale x 8 x i8>, +  iXLen); + +define <vscale x 8 x half> @intrinsic_vluxei_v_nxv8f16_nxv8f16_nxv8i8(ptr %0, <vscale x 8 x i8> %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vluxei_v_nxv8f16_nxv8f16_nxv8i8: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetivli zero, 1, e8, m1, ta, ma +; CHECK-NEXT:    vmv1r.v v10, v8 +; CHECK-NEXT:    vsetvli zero, a1, e16, m2, ta, ma +; CHECK-NEXT:    vluxei8.v v8, (a0), v10 +; CHECK-NEXT:    ret +entry: +  %a = call <vscale x 8 x half> @llvm.riscv.vluxei.nxv8f16.nxv8i8( +    <vscale x 8 x half> poison, +    ptr %0, +    <vscale x 8 x i8> %1, +    iXLen %2) + +  ret <vscale x 8 x half> %a +} + +declare <vscale x 8 x half> @llvm.riscv.vluxei.mask.nxv8f16.nxv8i8( +  <vscale x 8 x half>, +  ptr, +  <vscale x 8 x i8>, +  <vscale x 8 x i1>, +  iXLen, +  iXLen); + +define <vscale x 8 x half> @intrinsic_vluxei_mask_v_nxv8f16_nxv8f16_nxv8i8(<vscale x 8 x half> %0, ptr %1, <vscale x 8 x i8> %2, <vscale x 8 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vluxei_mask_v_nxv8f16_nxv8f16_nxv8i8: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e16, m2, ta, mu +; CHECK-NEXT:    vluxei8.v v8, (a0), v10, v0.t +; CHECK-NEXT:    ret +entry: +  %a = call <vscale x 8 x half> @llvm.riscv.vluxei.mask.nxv8f16.nxv8i8( +    <vscale x 8 x half> %0, +    ptr %1, +    <vscale x 8 x i8> %2, +    <vscale x 8 x i1> %3, +    iXLen %4, iXLen 1) + +  ret <vscale x 8 x half> %a +} + +declare <vscale x 16 x half> @llvm.riscv.vluxei.nxv16f16.nxv16i8( +  <vscale x 16 x half>, +  ptr, +  <vscale x 16 x i8>, +  iXLen); + +define <vscale x 16 x half> @intrinsic_vluxei_v_nxv16f16_nxv16f16_nxv16i8(ptr %0, <vscale x 16 x i8> %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vluxei_v_nxv16f16_nxv16f16_nxv16i8: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetivli zero, 1, e8, m1, ta, ma +; CHECK-NEXT:    vmv2r.v v12, v8 +; CHECK-NEXT:    vsetvli zero, a1, e16, m4, ta, ma +; CHECK-NEXT:    vluxei8.v v8, (a0), v12 +; CHECK-NEXT:    ret +entry: +  %a = call <vscale x 16 x half> @llvm.riscv.vluxei.nxv16f16.nxv16i8( +    <vscale x 16 x half> poison, +    ptr %0, +    <vscale x 16 x i8> %1, +    iXLen %2) + +  ret <vscale x 16 x half> %a +} + +declare <vscale x 16 x half> @llvm.riscv.vluxei.mask.nxv16f16.nxv16i8( +  <vscale x 16 x half>, +  ptr, +  <vscale x 16 x i8>, +  <vscale x 16 x i1>, +  iXLen, +  iXLen); + +define <vscale x 16 x half> @intrinsic_vluxei_mask_v_nxv16f16_nxv16f16_nxv16i8(<vscale x 16 x half> %0, ptr %1, <vscale x 16 x i8> %2, <vscale x 16 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vluxei_mask_v_nxv16f16_nxv16f16_nxv16i8: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e16, m4, ta, mu +; CHECK-NEXT:    vluxei8.v v8, (a0), v12, v0.t +; CHECK-NEXT:    ret +entry: +  %a = call <vscale x 16 x half> @llvm.riscv.vluxei.mask.nxv16f16.nxv16i8( +    <vscale x 16 x half> %0, +    ptr %1, +    <vscale x 16 x i8> %2, +    <vscale x 16 x i1> %3, +    iXLen %4, iXLen 1) + +  ret <vscale x 16 x half> %a +} + +declare <vscale x 32 x half> @llvm.riscv.vluxei.nxv32f16.nxv32i8( +  <vscale x 32 x half>, +  ptr, +  <vscale x 32 x i8>, +  iXLen); + +define <vscale x 32 x half> @intrinsic_vluxei_v_nxv32f16_nxv32f16_nxv32i8(ptr %0, <vscale x 32 x i8> %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vluxei_v_nxv32f16_nxv32f16_nxv32i8: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetivli zero, 1, e8, m1, ta, ma +; CHECK-NEXT:    vmv4r.v v16, v8 +; CHECK-NEXT:    vsetvli zero, a1, e16, m8, ta, ma +; CHECK-NEXT:    vluxei8.v v8, (a0), v16 +; CHECK-NEXT:    ret +entry: +  %a = call <vscale x 32 x half> @llvm.riscv.vluxei.nxv32f16.nxv32i8( +    <vscale x 32 x half> poison, +    ptr %0, +    <vscale x 32 x i8> %1, +    iXLen %2) + +  ret <vscale x 32 x half> %a +} + +declare <vscale x 32 x half> @llvm.riscv.vluxei.mask.nxv32f16.nxv32i8( +  <vscale x 32 x half>, +  ptr, +  <vscale x 32 x i8>, +  <vscale x 32 x i1>, +  iXLen, +  iXLen); + +define <vscale x 32 x half> @intrinsic_vluxei_mask_v_nxv32f16_nxv32f16_nxv32i8(<vscale x 32 x half> %0, ptr %1, <vscale x 32 x i8> %2, <vscale x 32 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vluxei_mask_v_nxv32f16_nxv32f16_nxv32i8: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e16, m8, ta, mu +; CHECK-NEXT:    vluxei8.v v8, (a0), v16, v0.t +; CHECK-NEXT:    ret +entry: +  %a = call <vscale x 32 x half> @llvm.riscv.vluxei.mask.nxv32f16.nxv32i8( +    <vscale x 32 x half> %0, +    ptr %1, +    <vscale x 32 x i8> %2, +    <vscale x 32 x i1> %3, +    iXLen %4, iXLen 1) + +  ret <vscale x 32 x half> %a +} + +declare <vscale x 1 x float> @llvm.riscv.vluxei.nxv1f32.nxv1i8( +  <vscale x 1 x float>, +  ptr, +  <vscale x 1 x i8>, +  iXLen); + +define <vscale x 1 x float> @intrinsic_vluxei_v_nxv1f32_nxv1f32_nxv1i8(ptr %0, <vscale x 1 x i8> %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vluxei_v_nxv1f32_nxv1f32_nxv1i8: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e32, mf2, ta, ma +; CHECK-NEXT:    vluxei8.v v9, (a0), v8 +; CHECK-NEXT:    vmv1r.v v8, v9 +; CHECK-NEXT:    ret +entry: +  %a = call <vscale x 1 x float> @llvm.riscv.vluxei.nxv1f32.nxv1i8( +    <vscale x 1 x float> poison, +    ptr %0, +    <vscale x 1 x i8> %1, +    iXLen %2) + +  ret <vscale x 1 x float> %a +} + +declare <vscale x 1 x float> @llvm.riscv.vluxei.mask.nxv1f32.nxv1i8( +  <vscale x 1 x float>, +  ptr, +  <vscale x 1 x i8>, +  <vscale x 1 x i1>, +  iXLen, +  iXLen); + +define <vscale x 1 x float> @intrinsic_vluxei_mask_v_nxv1f32_nxv1f32_nxv1i8(<vscale x 1 x float> %0, ptr %1, <vscale x 1 x i8> %2, <vscale x 1 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vluxei_mask_v_nxv1f32_nxv1f32_nxv1i8: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e32, mf2, ta, mu +; CHECK-NEXT:    vluxei8.v v8, (a0), v9, v0.t +; CHECK-NEXT:    ret +entry: +  %a = call <vscale x 1 x float> @llvm.riscv.vluxei.mask.nxv1f32.nxv1i8( +    <vscale x 1 x float> %0, +    ptr %1, +    <vscale x 1 x i8> %2, +    <vscale x 1 x i1> %3, +    iXLen %4, iXLen 1) + +  ret <vscale x 1 x float> %a +} + +declare <vscale x 2 x float> @llvm.riscv.vluxei.nxv2f32.nxv2i8( +  <vscale x 2 x float>, +  ptr, +  <vscale x 2 x i8>, +  iXLen); + +define <vscale x 2 x float> @intrinsic_vluxei_v_nxv2f32_nxv2f32_nxv2i8(ptr %0, <vscale x 2 x i8> %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vluxei_v_nxv2f32_nxv2f32_nxv2i8: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e32, m1, ta, ma +; CHECK-NEXT:    vluxei8.v v9, (a0), v8 +; CHECK-NEXT:    vmv.v.v v8, v9 +; CHECK-NEXT:    ret +entry: +  %a = call <vscale x 2 x float> @llvm.riscv.vluxei.nxv2f32.nxv2i8( +    <vscale x 2 x float> poison, +    ptr %0, +    <vscale x 2 x i8> %1, +    iXLen %2) + +  ret <vscale x 2 x float> %a +} + +declare <vscale x 2 x float> @llvm.riscv.vluxei.mask.nxv2f32.nxv2i8( +  <vscale x 2 x float>, +  ptr, +  <vscale x 2 x i8>, +  <vscale x 2 x i1>, +  iXLen, +  iXLen); + +define <vscale x 2 x float> @intrinsic_vluxei_mask_v_nxv2f32_nxv2f32_nxv2i8(<vscale x 2 x float> %0, ptr %1, <vscale x 2 x i8> %2, <vscale x 2 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vluxei_mask_v_nxv2f32_nxv2f32_nxv2i8: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e32, m1, ta, mu +; CHECK-NEXT:    vluxei8.v v8, (a0), v9, v0.t +; CHECK-NEXT:    ret +entry: +  %a = call <vscale x 2 x float> @llvm.riscv.vluxei.mask.nxv2f32.nxv2i8( +    <vscale x 2 x float> %0, +    ptr %1, +    <vscale x 2 x i8> %2, +    <vscale x 2 x i1> %3, +    iXLen %4, iXLen 1) + +  ret <vscale x 2 x float> %a +} + +declare <vscale x 4 x float> @llvm.riscv.vluxei.nxv4f32.nxv4i8( +  <vscale x 4 x float>, +  ptr, +  <vscale x 4 x i8>, +  iXLen); + +define <vscale x 4 x float> @intrinsic_vluxei_v_nxv4f32_nxv4f32_nxv4i8(ptr %0, <vscale x 4 x i8> %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vluxei_v_nxv4f32_nxv4f32_nxv4i8: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetivli zero, 1, e8, m1, ta, ma +; CHECK-NEXT:    vmv1r.v v10, v8 +; CHECK-NEXT:    vsetvli zero, a1, e32, m2, ta, ma +; CHECK-NEXT:    vluxei8.v v8, (a0), v10 +; CHECK-NEXT:    ret +entry: +  %a = call <vscale x 4 x float> @llvm.riscv.vluxei.nxv4f32.nxv4i8( +    <vscale x 4 x float> poison, +    ptr %0, +    <vscale x 4 x i8> %1, +    iXLen %2) + +  ret <vscale x 4 x float> %a +} + +declare <vscale x 4 x float> @llvm.riscv.vluxei.mask.nxv4f32.nxv4i8( +  <vscale x 4 x float>, +  ptr, +  <vscale x 4 x i8>, +  <vscale x 4 x i1>, +  iXLen, +  iXLen); + +define <vscale x 4 x float> @intrinsic_vluxei_mask_v_nxv4f32_nxv4f32_nxv4i8(<vscale x 4 x float> %0, ptr %1, <vscale x 4 x i8> %2, <vscale x 4 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vluxei_mask_v_nxv4f32_nxv4f32_nxv4i8: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e32, m2, ta, mu +; CHECK-NEXT:    vluxei8.v v8, (a0), v10, v0.t +; CHECK-NEXT:    ret +entry: +  %a = call <vscale x 4 x float> @llvm.riscv.vluxei.mask.nxv4f32.nxv4i8( +    <vscale x 4 x float> %0, +    ptr %1, +    <vscale x 4 x i8> %2, +    <vscale x 4 x i1> %3, +    iXLen %4, iXLen 1) + +  ret <vscale x 4 x float> %a +} + +declare <vscale x 8 x float> @llvm.riscv.vluxei.nxv8f32.nxv8i8( +  <vscale x 8 x float>, +  ptr, +  <vscale x 8 x i8>, +  iXLen); + +define <vscale x 8 x float> @intrinsic_vluxei_v_nxv8f32_nxv8f32_nxv8i8(ptr %0, <vscale x 8 x i8> %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vluxei_v_nxv8f32_nxv8f32_nxv8i8: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetivli zero, 1, e8, m1, ta, ma +; CHECK-NEXT:    vmv1r.v v12, v8 +; CHECK-NEXT:    vsetvli zero, a1, e32, m4, ta, ma +; CHECK-NEXT:    vluxei8.v v8, (a0), v12 +; CHECK-NEXT:    ret +entry: +  %a = call <vscale x 8 x float> @llvm.riscv.vluxei.nxv8f32.nxv8i8( +    <vscale x 8 x float> poison, +    ptr %0, +    <vscale x 8 x i8> %1, +    iXLen %2) + +  ret <vscale x 8 x float> %a +} + +declare <vscale x 8 x float> @llvm.riscv.vluxei.mask.nxv8f32.nxv8i8( +  <vscale x 8 x float>, +  ptr, +  <vscale x 8 x i8>, +  <vscale x 8 x i1>, +  iXLen, +  iXLen); + +define <vscale x 8 x float> @intrinsic_vluxei_mask_v_nxv8f32_nxv8f32_nxv8i8(<vscale x 8 x float> %0, ptr %1, <vscale x 8 x i8> %2, <vscale x 8 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vluxei_mask_v_nxv8f32_nxv8f32_nxv8i8: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e32, m4, ta, mu +; CHECK-NEXT:    vluxei8.v v8, (a0), v12, v0.t +; CHECK-NEXT:    ret +entry: +  %a = call <vscale x 8 x float> @llvm.riscv.vluxei.mask.nxv8f32.nxv8i8( +    <vscale x 8 x float> %0, +    ptr %1, +    <vscale x 8 x i8> %2, +    <vscale x 8 x i1> %3, +    iXLen %4, iXLen 1) + +  ret <vscale x 8 x float> %a +} + +declare <vscale x 16 x float> @llvm.riscv.vluxei.nxv16f32.nxv16i8( +  <vscale x 16 x float>, +  ptr, +  <vscale x 16 x i8>, +  iXLen); + +define <vscale x 16 x float> @intrinsic_vluxei_v_nxv16f32_nxv16f32_nxv16i8(ptr %0, <vscale x 16 x i8> %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vluxei_v_nxv16f32_nxv16f32_nxv16i8: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetivli zero, 1, e8, m1, ta, ma +; CHECK-NEXT:    vmv2r.v v16, v8 +; CHECK-NEXT:    vsetvli zero, a1, e32, m8, ta, ma +; CHECK-NEXT:    vluxei8.v v8, (a0), v16 +; CHECK-NEXT:    ret +entry: +  %a = call <vscale x 16 x float> @llvm.riscv.vluxei.nxv16f32.nxv16i8( +    <vscale x 16 x float> poison, +    ptr %0, +    <vscale x 16 x i8> %1, +    iXLen %2) + +  ret <vscale x 16 x float> %a +} + +declare <vscale x 16 x float> @llvm.riscv.vluxei.mask.nxv16f32.nxv16i8( +  <vscale x 16 x float>, +  ptr, +  <vscale x 16 x i8>, +  <vscale x 16 x i1>, +  iXLen, +  iXLen); + +define <vscale x 16 x float> @intrinsic_vluxei_mask_v_nxv16f32_nxv16f32_nxv16i8(<vscale x 16 x float> %0, ptr %1, <vscale x 16 x i8> %2, <vscale x 16 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vluxei_mask_v_nxv16f32_nxv16f32_nxv16i8: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e32, m8, ta, mu +; CHECK-NEXT:    vluxei8.v v8, (a0), v16, v0.t +; CHECK-NEXT:    ret +entry: +  %a = call <vscale x 16 x float> @llvm.riscv.vluxei.mask.nxv16f32.nxv16i8( +    <vscale x 16 x float> %0, +    ptr %1, +    <vscale x 16 x i8> %2, +    <vscale x 16 x i1> %3, +    iXLen %4, iXLen 1) + +  ret <vscale x 16 x float> %a +} + +declare <vscale x 1 x double> @llvm.riscv.vluxei.nxv1f64.nxv1i8( +  <vscale x 1 x double>, +  ptr, +  <vscale x 1 x i8>, +  iXLen); + +define <vscale x 1 x double> @intrinsic_vluxei_v_nxv1f64_nxv1f64_nxv1i8(ptr %0, <vscale x 1 x i8> %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vluxei_v_nxv1f64_nxv1f64_nxv1i8: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e64, m1, ta, ma +; CHECK-NEXT:    vluxei8.v v9, (a0), v8 +; CHECK-NEXT:    vmv.v.v v8, v9 +; CHECK-NEXT:    ret +entry: +  %a = call <vscale x 1 x double> @llvm.riscv.vluxei.nxv1f64.nxv1i8( +    <vscale x 1 x double> poison, +    ptr %0, +    <vscale x 1 x i8> %1, +    iXLen %2) + +  ret <vscale x 1 x double> %a +} + +declare <vscale x 1 x double> @llvm.riscv.vluxei.mask.nxv1f64.nxv1i8( +  <vscale x 1 x double>, +  ptr, +  <vscale x 1 x i8>, +  <vscale x 1 x i1>, +  iXLen, +  iXLen); + +define <vscale x 1 x double> @intrinsic_vluxei_mask_v_nxv1f64_nxv1f64_nxv1i8(<vscale x 1 x double> %0, ptr %1, <vscale x 1 x i8> %2, <vscale x 1 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vluxei_mask_v_nxv1f64_nxv1f64_nxv1i8: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e64, m1, ta, mu +; CHECK-NEXT:    vluxei8.v v8, (a0), v9, v0.t +; CHECK-NEXT:    ret +entry: +  %a = call <vscale x 1 x double> @llvm.riscv.vluxei.mask.nxv1f64.nxv1i8( +    <vscale x 1 x double> %0, +    ptr %1, +    <vscale x 1 x i8> %2, +    <vscale x 1 x i1> %3, +    iXLen %4, iXLen 1) + +  ret <vscale x 1 x double> %a +} + +declare <vscale x 2 x double> @llvm.riscv.vluxei.nxv2f64.nxv2i8( +  <vscale x 2 x double>, +  ptr, +  <vscale x 2 x i8>, +  iXLen); + +define <vscale x 2 x double> @intrinsic_vluxei_v_nxv2f64_nxv2f64_nxv2i8(ptr %0, <vscale x 2 x i8> %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vluxei_v_nxv2f64_nxv2f64_nxv2i8: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetivli zero, 1, e8, m1, ta, ma +; CHECK-NEXT:    vmv1r.v v10, v8 +; CHECK-NEXT:    vsetvli zero, a1, e64, m2, ta, ma +; CHECK-NEXT:    vluxei8.v v8, (a0), v10 +; CHECK-NEXT:    ret +entry: +  %a = call <vscale x 2 x double> @llvm.riscv.vluxei.nxv2f64.nxv2i8( +    <vscale x 2 x double> poison, +    ptr %0, +    <vscale x 2 x i8> %1, +    iXLen %2) + +  ret <vscale x 2 x double> %a +} + +declare <vscale x 2 x double> @llvm.riscv.vluxei.mask.nxv2f64.nxv2i8( +  <vscale x 2 x double>, +  ptr, +  <vscale x 2 x i8>, +  <vscale x 2 x i1>, +  iXLen, +  iXLen); + +define <vscale x 2 x double> @intrinsic_vluxei_mask_v_nxv2f64_nxv2f64_nxv2i8(<vscale x 2 x double> %0, ptr %1, <vscale x 2 x i8> %2, <vscale x 2 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vluxei_mask_v_nxv2f64_nxv2f64_nxv2i8: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e64, m2, ta, mu +; CHECK-NEXT:    vluxei8.v v8, (a0), v10, v0.t +; CHECK-NEXT:    ret +entry: +  %a = call <vscale x 2 x double> @llvm.riscv.vluxei.mask.nxv2f64.nxv2i8( +    <vscale x 2 x double> %0, +    ptr %1, +    <vscale x 2 x i8> %2, +    <vscale x 2 x i1> %3, +    iXLen %4, iXLen 1) + +  ret <vscale x 2 x double> %a +} + +declare <vscale x 4 x double> @llvm.riscv.vluxei.nxv4f64.nxv4i8( +  <vscale x 4 x double>, +  ptr, +  <vscale x 4 x i8>, +  iXLen); + +define <vscale x 4 x double> @intrinsic_vluxei_v_nxv4f64_nxv4f64_nxv4i8(ptr %0, <vscale x 4 x i8> %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vluxei_v_nxv4f64_nxv4f64_nxv4i8: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetivli zero, 1, e8, m1, ta, ma +; CHECK-NEXT:    vmv1r.v v12, v8 +; CHECK-NEXT:    vsetvli zero, a1, e64, m4, ta, ma +; CHECK-NEXT:    vluxei8.v v8, (a0), v12 +; CHECK-NEXT:    ret +entry: +  %a = call <vscale x 4 x double> @llvm.riscv.vluxei.nxv4f64.nxv4i8( +    <vscale x 4 x double> poison, +    ptr %0, +    <vscale x 4 x i8> %1, +    iXLen %2) + +  ret <vscale x 4 x double> %a +} + +declare <vscale x 4 x double> @llvm.riscv.vluxei.mask.nxv4f64.nxv4i8( +  <vscale x 4 x double>, +  ptr, +  <vscale x 4 x i8>, +  <vscale x 4 x i1>, +  iXLen, +  iXLen); + +define <vscale x 4 x double> @intrinsic_vluxei_mask_v_nxv4f64_nxv4f64_nxv4i8(<vscale x 4 x double> %0, ptr %1, <vscale x 4 x i8> %2, <vscale x 4 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vluxei_mask_v_nxv4f64_nxv4f64_nxv4i8: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e64, m4, ta, mu +; CHECK-NEXT:    vluxei8.v v8, (a0), v12, v0.t +; CHECK-NEXT:    ret +entry: +  %a = call <vscale x 4 x double> @llvm.riscv.vluxei.mask.nxv4f64.nxv4i8( +    <vscale x 4 x double> %0, +    ptr %1, +    <vscale x 4 x i8> %2, +    <vscale x 4 x i1> %3, +    iXLen %4, iXLen 1) + +  ret <vscale x 4 x double> %a +} + +declare <vscale x 8 x double> @llvm.riscv.vluxei.nxv8f64.nxv8i8( +  <vscale x 8 x double>, +  ptr, +  <vscale x 8 x i8>, +  iXLen); + +define <vscale x 8 x double> @intrinsic_vluxei_v_nxv8f64_nxv8f64_nxv8i8(ptr %0, <vscale x 8 x i8> %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vluxei_v_nxv8f64_nxv8f64_nxv8i8: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetivli zero, 1, e8, m1, ta, ma +; CHECK-NEXT:    vmv1r.v v16, v8 +; CHECK-NEXT:    vsetvli zero, a1, e64, m8, ta, ma +; CHECK-NEXT:    vluxei8.v v8, (a0), v16 +; CHECK-NEXT:    ret +entry: +  %a = call <vscale x 8 x double> @llvm.riscv.vluxei.nxv8f64.nxv8i8( +    <vscale x 8 x double> poison, +    ptr %0, +    <vscale x 8 x i8> %1, +    iXLen %2) + +  ret <vscale x 8 x double> %a +} + +declare <vscale x 8 x double> @llvm.riscv.vluxei.mask.nxv8f64.nxv8i8( +  <vscale x 8 x double>, +  ptr, +  <vscale x 8 x i8>, +  <vscale x 8 x i1>, +  iXLen, +  iXLen); + +define <vscale x 8 x double> @intrinsic_vluxei_mask_v_nxv8f64_nxv8f64_nxv8i8(<vscale x 8 x double> %0, ptr %1, <vscale x 8 x i8> %2, <vscale x 8 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vluxei_mask_v_nxv8f64_nxv8f64_nxv8i8: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e64, m8, ta, mu +; CHECK-NEXT:    vluxei8.v v8, (a0), v16, v0.t +; CHECK-NEXT:    ret +entry: +  %a = call <vscale x 8 x double> @llvm.riscv.vluxei.mask.nxv8f64.nxv8i8( +    <vscale x 8 x double> %0, +    ptr %1, +    <vscale x 8 x i8> %2, +    <vscale x 8 x i1> %3, +    iXLen %4, iXLen 1) + +  ret <vscale x 8 x double> %a +} diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/rvv/vsoxei-rv64.ll b/llvm/test/CodeGen/RISCV/GlobalISel/rvv/vsoxei-rv64.ll new file mode 100644 index 0000000..4963d91 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/GlobalISel/rvv/vsoxei-rv64.ll @@ -0,0 +1,1293 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=riscv64 -mattr=+v,+zvfhmin,+zvfbfmin -global-isel -verify-machineinstrs \ +; RUN:   < %s | FileCheck %s + +; The intrinsics are not supported with RV32. + +declare void @llvm.riscv.vsoxei.nxv1i8.nxv1i64( +  <vscale x 1 x i8>, +  ptr, +  <vscale x 1 x i64>, +  i64); + +define void @intrinsic_vsoxei_v_nxv1i8_nxv1i8_nxv1i64(<vscale x 1 x i8> %0, ptr %1, <vscale x 1 x i64> %2, i64 %3) nounwind { +; CHECK-LABEL: intrinsic_vsoxei_v_nxv1i8_nxv1i8_nxv1i64: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e8, mf8, ta, ma +; CHECK-NEXT:    vsoxei64.v v8, (a0), v9 +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsoxei.nxv1i8.nxv1i64( +    <vscale x 1 x i8> %0, +    ptr %1, +    <vscale x 1 x i64> %2, +    i64 %3) + +  ret void +} + +declare void @llvm.riscv.vsoxei.mask.nxv1i8.nxv1i64( +  <vscale x 1 x i8>, +  ptr, +  <vscale x 1 x i64>, +  <vscale x 1 x i1>, +  i64); + +define void @intrinsic_vsoxei_mask_v_nxv1i8_nxv1i8_nxv1i64(<vscale x 1 x i8> %0, ptr %1, <vscale x 1 x i64> %2, <vscale x 1 x i1> %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vsoxei_mask_v_nxv1i8_nxv1i8_nxv1i64: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e8, mf8, ta, ma +; CHECK-NEXT:    vsoxei64.v v8, (a0), v9, v0.t +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsoxei.mask.nxv1i8.nxv1i64( +    <vscale x 1 x i8> %0, +    ptr %1, +    <vscale x 1 x i64> %2, +    <vscale x 1 x i1> %3, +    i64 %4) + +  ret void +} + +declare void @llvm.riscv.vsoxei.nxv2i8.nxv2i64( +  <vscale x 2 x i8>, +  ptr, +  <vscale x 2 x i64>, +  i64); + +define void @intrinsic_vsoxei_v_nxv2i8_nxv2i8_nxv2i64(<vscale x 2 x i8> %0, ptr %1, <vscale x 2 x i64> %2, i64 %3) nounwind { +; CHECK-LABEL: intrinsic_vsoxei_v_nxv2i8_nxv2i8_nxv2i64: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e8, mf4, ta, ma +; CHECK-NEXT:    vsoxei64.v v8, (a0), v10 +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsoxei.nxv2i8.nxv2i64( +    <vscale x 2 x i8> %0, +    ptr %1, +    <vscale x 2 x i64> %2, +    i64 %3) + +  ret void +} + +declare void @llvm.riscv.vsoxei.mask.nxv2i8.nxv2i64( +  <vscale x 2 x i8>, +  ptr, +  <vscale x 2 x i64>, +  <vscale x 2 x i1>, +  i64); + +define void @intrinsic_vsoxei_mask_v_nxv2i8_nxv2i8_nxv2i64(<vscale x 2 x i8> %0, ptr %1, <vscale x 2 x i64> %2, <vscale x 2 x i1> %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vsoxei_mask_v_nxv2i8_nxv2i8_nxv2i64: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e8, mf4, ta, ma +; CHECK-NEXT:    vsoxei64.v v8, (a0), v10, v0.t +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsoxei.mask.nxv2i8.nxv2i64( +    <vscale x 2 x i8> %0, +    ptr %1, +    <vscale x 2 x i64> %2, +    <vscale x 2 x i1> %3, +    i64 %4) + +  ret void +} + +declare void @llvm.riscv.vsoxei.nxv4i8.nxv4i64( +  <vscale x 4 x i8>, +  ptr, +  <vscale x 4 x i64>, +  i64); + +define void @intrinsic_vsoxei_v_nxv4i8_nxv4i8_nxv4i64(<vscale x 4 x i8> %0, ptr %1, <vscale x 4 x i64> %2, i64 %3) nounwind { +; CHECK-LABEL: intrinsic_vsoxei_v_nxv4i8_nxv4i8_nxv4i64: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e8, mf2, ta, ma +; CHECK-NEXT:    vsoxei64.v v8, (a0), v12 +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsoxei.nxv4i8.nxv4i64( +    <vscale x 4 x i8> %0, +    ptr %1, +    <vscale x 4 x i64> %2, +    i64 %3) + +  ret void +} + +declare void @llvm.riscv.vsoxei.mask.nxv4i8.nxv4i64( +  <vscale x 4 x i8>, +  ptr, +  <vscale x 4 x i64>, +  <vscale x 4 x i1>, +  i64); + +define void @intrinsic_vsoxei_mask_v_nxv4i8_nxv4i8_nxv4i64(<vscale x 4 x i8> %0, ptr %1, <vscale x 4 x i64> %2, <vscale x 4 x i1> %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vsoxei_mask_v_nxv4i8_nxv4i8_nxv4i64: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e8, mf2, ta, ma +; CHECK-NEXT:    vsoxei64.v v8, (a0), v12, v0.t +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsoxei.mask.nxv4i8.nxv4i64( +    <vscale x 4 x i8> %0, +    ptr %1, +    <vscale x 4 x i64> %2, +    <vscale x 4 x i1> %3, +    i64 %4) + +  ret void +} + +declare void @llvm.riscv.vsoxei.nxv8i8.nxv8i64( +  <vscale x 8 x i8>, +  ptr, +  <vscale x 8 x i64>, +  i64); + +define void @intrinsic_vsoxei_v_nxv8i8_nxv8i8_nxv8i64(<vscale x 8 x i8> %0, ptr %1, <vscale x 8 x i64> %2, i64 %3) nounwind { +; CHECK-LABEL: intrinsic_vsoxei_v_nxv8i8_nxv8i8_nxv8i64: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e8, m1, ta, ma +; CHECK-NEXT:    vsoxei64.v v8, (a0), v16 +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsoxei.nxv8i8.nxv8i64( +    <vscale x 8 x i8> %0, +    ptr %1, +    <vscale x 8 x i64> %2, +    i64 %3) + +  ret void +} + +declare void @llvm.riscv.vsoxei.mask.nxv8i8.nxv8i64( +  <vscale x 8 x i8>, +  ptr, +  <vscale x 8 x i64>, +  <vscale x 8 x i1>, +  i64); + +define void @intrinsic_vsoxei_mask_v_nxv8i8_nxv8i8_nxv8i64(<vscale x 8 x i8> %0, ptr %1, <vscale x 8 x i64> %2, <vscale x 8 x i1> %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vsoxei_mask_v_nxv8i8_nxv8i8_nxv8i64: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e8, m1, ta, ma +; CHECK-NEXT:    vsoxei64.v v8, (a0), v16, v0.t +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsoxei.mask.nxv8i8.nxv8i64( +    <vscale x 8 x i8> %0, +    ptr %1, +    <vscale x 8 x i64> %2, +    <vscale x 8 x i1> %3, +    i64 %4) + +  ret void +} + +declare void @llvm.riscv.vsoxei.nxv1i16.nxv1i64( +  <vscale x 1 x i16>, +  ptr, +  <vscale x 1 x i64>, +  i64); + +define void @intrinsic_vsoxei_v_nxv1i16_nxv1i16_nxv1i64(<vscale x 1 x i16> %0, ptr %1, <vscale x 1 x i64> %2, i64 %3) nounwind { +; CHECK-LABEL: intrinsic_vsoxei_v_nxv1i16_nxv1i16_nxv1i64: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e16, mf4, ta, ma +; CHECK-NEXT:    vsoxei64.v v8, (a0), v9 +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsoxei.nxv1i16.nxv1i64( +    <vscale x 1 x i16> %0, +    ptr %1, +    <vscale x 1 x i64> %2, +    i64 %3) + +  ret void +} + +declare void @llvm.riscv.vsoxei.mask.nxv1i16.nxv1i64( +  <vscale x 1 x i16>, +  ptr, +  <vscale x 1 x i64>, +  <vscale x 1 x i1>, +  i64); + +define void @intrinsic_vsoxei_mask_v_nxv1i16_nxv1i16_nxv1i64(<vscale x 1 x i16> %0, ptr %1, <vscale x 1 x i64> %2, <vscale x 1 x i1> %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vsoxei_mask_v_nxv1i16_nxv1i16_nxv1i64: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e16, mf4, ta, ma +; CHECK-NEXT:    vsoxei64.v v8, (a0), v9, v0.t +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsoxei.mask.nxv1i16.nxv1i64( +    <vscale x 1 x i16> %0, +    ptr %1, +    <vscale x 1 x i64> %2, +    <vscale x 1 x i1> %3, +    i64 %4) + +  ret void +} + +declare void @llvm.riscv.vsoxei.nxv2i16.nxv2i64( +  <vscale x 2 x i16>, +  ptr, +  <vscale x 2 x i64>, +  i64); + +define void @intrinsic_vsoxei_v_nxv2i16_nxv2i16_nxv2i64(<vscale x 2 x i16> %0, ptr %1, <vscale x 2 x i64> %2, i64 %3) nounwind { +; CHECK-LABEL: intrinsic_vsoxei_v_nxv2i16_nxv2i16_nxv2i64: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e16, mf2, ta, ma +; CHECK-NEXT:    vsoxei64.v v8, (a0), v10 +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsoxei.nxv2i16.nxv2i64( +    <vscale x 2 x i16> %0, +    ptr %1, +    <vscale x 2 x i64> %2, +    i64 %3) + +  ret void +} + +declare void @llvm.riscv.vsoxei.mask.nxv2i16.nxv2i64( +  <vscale x 2 x i16>, +  ptr, +  <vscale x 2 x i64>, +  <vscale x 2 x i1>, +  i64); + +define void @intrinsic_vsoxei_mask_v_nxv2i16_nxv2i16_nxv2i64(<vscale x 2 x i16> %0, ptr %1, <vscale x 2 x i64> %2, <vscale x 2 x i1> %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vsoxei_mask_v_nxv2i16_nxv2i16_nxv2i64: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e16, mf2, ta, ma +; CHECK-NEXT:    vsoxei64.v v8, (a0), v10, v0.t +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsoxei.mask.nxv2i16.nxv2i64( +    <vscale x 2 x i16> %0, +    ptr %1, +    <vscale x 2 x i64> %2, +    <vscale x 2 x i1> %3, +    i64 %4) + +  ret void +} + +declare void @llvm.riscv.vsoxei.nxv4i16.nxv4i64( +  <vscale x 4 x i16>, +  ptr, +  <vscale x 4 x i64>, +  i64); + +define void @intrinsic_vsoxei_v_nxv4i16_nxv4i16_nxv4i64(<vscale x 4 x i16> %0, ptr %1, <vscale x 4 x i64> %2, i64 %3) nounwind { +; CHECK-LABEL: intrinsic_vsoxei_v_nxv4i16_nxv4i16_nxv4i64: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e16, m1, ta, ma +; CHECK-NEXT:    vsoxei64.v v8, (a0), v12 +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsoxei.nxv4i16.nxv4i64( +    <vscale x 4 x i16> %0, +    ptr %1, +    <vscale x 4 x i64> %2, +    i64 %3) + +  ret void +} + +declare void @llvm.riscv.vsoxei.mask.nxv4i16.nxv4i64( +  <vscale x 4 x i16>, +  ptr, +  <vscale x 4 x i64>, +  <vscale x 4 x i1>, +  i64); + +define void @intrinsic_vsoxei_mask_v_nxv4i16_nxv4i16_nxv4i64(<vscale x 4 x i16> %0, ptr %1, <vscale x 4 x i64> %2, <vscale x 4 x i1> %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vsoxei_mask_v_nxv4i16_nxv4i16_nxv4i64: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e16, m1, ta, ma +; CHECK-NEXT:    vsoxei64.v v8, (a0), v12, v0.t +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsoxei.mask.nxv4i16.nxv4i64( +    <vscale x 4 x i16> %0, +    ptr %1, +    <vscale x 4 x i64> %2, +    <vscale x 4 x i1> %3, +    i64 %4) + +  ret void +} + +declare void @llvm.riscv.vsoxei.nxv8i16.nxv8i64( +  <vscale x 8 x i16>, +  ptr, +  <vscale x 8 x i64>, +  i64); + +define void @intrinsic_vsoxei_v_nxv8i16_nxv8i16_nxv8i64(<vscale x 8 x i16> %0, ptr %1, <vscale x 8 x i64> %2, i64 %3) nounwind { +; CHECK-LABEL: intrinsic_vsoxei_v_nxv8i16_nxv8i16_nxv8i64: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e16, m2, ta, ma +; CHECK-NEXT:    vsoxei64.v v8, (a0), v16 +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsoxei.nxv8i16.nxv8i64( +    <vscale x 8 x i16> %0, +    ptr %1, +    <vscale x 8 x i64> %2, +    i64 %3) + +  ret void +} + +declare void @llvm.riscv.vsoxei.mask.nxv8i16.nxv8i64( +  <vscale x 8 x i16>, +  ptr, +  <vscale x 8 x i64>, +  <vscale x 8 x i1>, +  i64); + +define void @intrinsic_vsoxei_mask_v_nxv8i16_nxv8i16_nxv8i64(<vscale x 8 x i16> %0, ptr %1, <vscale x 8 x i64> %2, <vscale x 8 x i1> %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vsoxei_mask_v_nxv8i16_nxv8i16_nxv8i64: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e16, m2, ta, ma +; CHECK-NEXT:    vsoxei64.v v8, (a0), v16, v0.t +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsoxei.mask.nxv8i16.nxv8i64( +    <vscale x 8 x i16> %0, +    ptr %1, +    <vscale x 8 x i64> %2, +    <vscale x 8 x i1> %3, +    i64 %4) + +  ret void +} + +declare void @llvm.riscv.vsoxei.nxv1i32.nxv1i64( +  <vscale x 1 x i32>, +  ptr, +  <vscale x 1 x i64>, +  i64); + +define void @intrinsic_vsoxei_v_nxv1i32_nxv1i32_nxv1i64(<vscale x 1 x i32> %0, ptr %1, <vscale x 1 x i64> %2, i64 %3) nounwind { +; CHECK-LABEL: intrinsic_vsoxei_v_nxv1i32_nxv1i32_nxv1i64: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e32, mf2, ta, ma +; CHECK-NEXT:    vsoxei64.v v8, (a0), v9 +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsoxei.nxv1i32.nxv1i64( +    <vscale x 1 x i32> %0, +    ptr %1, +    <vscale x 1 x i64> %2, +    i64 %3) + +  ret void +} + +declare void @llvm.riscv.vsoxei.mask.nxv1i32.nxv1i64( +  <vscale x 1 x i32>, +  ptr, +  <vscale x 1 x i64>, +  <vscale x 1 x i1>, +  i64); + +define void @intrinsic_vsoxei_mask_v_nxv1i32_nxv1i32_nxv1i64(<vscale x 1 x i32> %0, ptr %1, <vscale x 1 x i64> %2, <vscale x 1 x i1> %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vsoxei_mask_v_nxv1i32_nxv1i32_nxv1i64: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e32, mf2, ta, ma +; CHECK-NEXT:    vsoxei64.v v8, (a0), v9, v0.t +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsoxei.mask.nxv1i32.nxv1i64( +    <vscale x 1 x i32> %0, +    ptr %1, +    <vscale x 1 x i64> %2, +    <vscale x 1 x i1> %3, +    i64 %4) + +  ret void +} + +declare void @llvm.riscv.vsoxei.nxv2i32.nxv2i64( +  <vscale x 2 x i32>, +  ptr, +  <vscale x 2 x i64>, +  i64); + +define void @intrinsic_vsoxei_v_nxv2i32_nxv2i32_nxv2i64(<vscale x 2 x i32> %0, ptr %1, <vscale x 2 x i64> %2, i64 %3) nounwind { +; CHECK-LABEL: intrinsic_vsoxei_v_nxv2i32_nxv2i32_nxv2i64: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e32, m1, ta, ma +; CHECK-NEXT:    vsoxei64.v v8, (a0), v10 +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsoxei.nxv2i32.nxv2i64( +    <vscale x 2 x i32> %0, +    ptr %1, +    <vscale x 2 x i64> %2, +    i64 %3) + +  ret void +} + +declare void @llvm.riscv.vsoxei.mask.nxv2i32.nxv2i64( +  <vscale x 2 x i32>, +  ptr, +  <vscale x 2 x i64>, +  <vscale x 2 x i1>, +  i64); + +define void @intrinsic_vsoxei_mask_v_nxv2i32_nxv2i32_nxv2i64(<vscale x 2 x i32> %0, ptr %1, <vscale x 2 x i64> %2, <vscale x 2 x i1> %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vsoxei_mask_v_nxv2i32_nxv2i32_nxv2i64: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e32, m1, ta, ma +; CHECK-NEXT:    vsoxei64.v v8, (a0), v10, v0.t +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsoxei.mask.nxv2i32.nxv2i64( +    <vscale x 2 x i32> %0, +    ptr %1, +    <vscale x 2 x i64> %2, +    <vscale x 2 x i1> %3, +    i64 %4) + +  ret void +} + +declare void @llvm.riscv.vsoxei.nxv4i32.nxv4i64( +  <vscale x 4 x i32>, +  ptr, +  <vscale x 4 x i64>, +  i64); + +define void @intrinsic_vsoxei_v_nxv4i32_nxv4i32_nxv4i64(<vscale x 4 x i32> %0, ptr %1, <vscale x 4 x i64> %2, i64 %3) nounwind { +; CHECK-LABEL: intrinsic_vsoxei_v_nxv4i32_nxv4i32_nxv4i64: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e32, m2, ta, ma +; CHECK-NEXT:    vsoxei64.v v8, (a0), v12 +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsoxei.nxv4i32.nxv4i64( +    <vscale x 4 x i32> %0, +    ptr %1, +    <vscale x 4 x i64> %2, +    i64 %3) + +  ret void +} + +declare void @llvm.riscv.vsoxei.mask.nxv4i32.nxv4i64( +  <vscale x 4 x i32>, +  ptr, +  <vscale x 4 x i64>, +  <vscale x 4 x i1>, +  i64); + +define void @intrinsic_vsoxei_mask_v_nxv4i32_nxv4i32_nxv4i64(<vscale x 4 x i32> %0, ptr %1, <vscale x 4 x i64> %2, <vscale x 4 x i1> %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vsoxei_mask_v_nxv4i32_nxv4i32_nxv4i64: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e32, m2, ta, ma +; CHECK-NEXT:    vsoxei64.v v8, (a0), v12, v0.t +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsoxei.mask.nxv4i32.nxv4i64( +    <vscale x 4 x i32> %0, +    ptr %1, +    <vscale x 4 x i64> %2, +    <vscale x 4 x i1> %3, +    i64 %4) + +  ret void +} + +declare void @llvm.riscv.vsoxei.nxv8i32.nxv8i64( +  <vscale x 8 x i32>, +  ptr, +  <vscale x 8 x i64>, +  i64); + +define void @intrinsic_vsoxei_v_nxv8i32_nxv8i32_nxv8i64(<vscale x 8 x i32> %0, ptr %1, <vscale x 8 x i64> %2, i64 %3) nounwind { +; CHECK-LABEL: intrinsic_vsoxei_v_nxv8i32_nxv8i32_nxv8i64: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e32, m4, ta, ma +; CHECK-NEXT:    vsoxei64.v v8, (a0), v16 +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsoxei.nxv8i32.nxv8i64( +    <vscale x 8 x i32> %0, +    ptr %1, +    <vscale x 8 x i64> %2, +    i64 %3) + +  ret void +} + +declare void @llvm.riscv.vsoxei.mask.nxv8i32.nxv8i64( +  <vscale x 8 x i32>, +  ptr, +  <vscale x 8 x i64>, +  <vscale x 8 x i1>, +  i64); + +define void @intrinsic_vsoxei_mask_v_nxv8i32_nxv8i32_nxv8i64(<vscale x 8 x i32> %0, ptr %1, <vscale x 8 x i64> %2, <vscale x 8 x i1> %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vsoxei_mask_v_nxv8i32_nxv8i32_nxv8i64: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e32, m4, ta, ma +; CHECK-NEXT:    vsoxei64.v v8, (a0), v16, v0.t +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsoxei.mask.nxv8i32.nxv8i64( +    <vscale x 8 x i32> %0, +    ptr %1, +    <vscale x 8 x i64> %2, +    <vscale x 8 x i1> %3, +    i64 %4) + +  ret void +} + +declare void @llvm.riscv.vsoxei.nxv1i64.nxv1i64( +  <vscale x 1 x i64>, +  ptr, +  <vscale x 1 x i64>, +  i64); + +define void @intrinsic_vsoxei_v_nxv1i64_nxv1i64_nxv1i64(<vscale x 1 x i64> %0, ptr %1, <vscale x 1 x i64> %2, i64 %3) nounwind { +; CHECK-LABEL: intrinsic_vsoxei_v_nxv1i64_nxv1i64_nxv1i64: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e64, m1, ta, ma +; CHECK-NEXT:    vsoxei64.v v8, (a0), v9 +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsoxei.nxv1i64.nxv1i64( +    <vscale x 1 x i64> %0, +    ptr %1, +    <vscale x 1 x i64> %2, +    i64 %3) + +  ret void +} + +declare void @llvm.riscv.vsoxei.mask.nxv1i64.nxv1i64( +  <vscale x 1 x i64>, +  ptr, +  <vscale x 1 x i64>, +  <vscale x 1 x i1>, +  i64); + +define void @intrinsic_vsoxei_mask_v_nxv1i64_nxv1i64_nxv1i64(<vscale x 1 x i64> %0, ptr %1, <vscale x 1 x i64> %2, <vscale x 1 x i1> %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vsoxei_mask_v_nxv1i64_nxv1i64_nxv1i64: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e64, m1, ta, ma +; CHECK-NEXT:    vsoxei64.v v8, (a0), v9, v0.t +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsoxei.mask.nxv1i64.nxv1i64( +    <vscale x 1 x i64> %0, +    ptr %1, +    <vscale x 1 x i64> %2, +    <vscale x 1 x i1> %3, +    i64 %4) + +  ret void +} + +declare void @llvm.riscv.vsoxei.nxv2i64.nxv2i64( +  <vscale x 2 x i64>, +  ptr, +  <vscale x 2 x i64>, +  i64); + +define void @intrinsic_vsoxei_v_nxv2i64_nxv2i64_nxv2i64(<vscale x 2 x i64> %0, ptr %1, <vscale x 2 x i64> %2, i64 %3) nounwind { +; CHECK-LABEL: intrinsic_vsoxei_v_nxv2i64_nxv2i64_nxv2i64: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e64, m2, ta, ma +; CHECK-NEXT:    vsoxei64.v v8, (a0), v10 +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsoxei.nxv2i64.nxv2i64( +    <vscale x 2 x i64> %0, +    ptr %1, +    <vscale x 2 x i64> %2, +    i64 %3) + +  ret void +} + +declare void @llvm.riscv.vsoxei.mask.nxv2i64.nxv2i64( +  <vscale x 2 x i64>, +  ptr, +  <vscale x 2 x i64>, +  <vscale x 2 x i1>, +  i64); + +define void @intrinsic_vsoxei_mask_v_nxv2i64_nxv2i64_nxv2i64(<vscale x 2 x i64> %0, ptr %1, <vscale x 2 x i64> %2, <vscale x 2 x i1> %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vsoxei_mask_v_nxv2i64_nxv2i64_nxv2i64: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e64, m2, ta, ma +; CHECK-NEXT:    vsoxei64.v v8, (a0), v10, v0.t +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsoxei.mask.nxv2i64.nxv2i64( +    <vscale x 2 x i64> %0, +    ptr %1, +    <vscale x 2 x i64> %2, +    <vscale x 2 x i1> %3, +    i64 %4) + +  ret void +} + +declare void @llvm.riscv.vsoxei.nxv4i64.nxv4i64( +  <vscale x 4 x i64>, +  ptr, +  <vscale x 4 x i64>, +  i64); + +define void @intrinsic_vsoxei_v_nxv4i64_nxv4i64_nxv4i64(<vscale x 4 x i64> %0, ptr %1, <vscale x 4 x i64> %2, i64 %3) nounwind { +; CHECK-LABEL: intrinsic_vsoxei_v_nxv4i64_nxv4i64_nxv4i64: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e64, m4, ta, ma +; CHECK-NEXT:    vsoxei64.v v8, (a0), v12 +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsoxei.nxv4i64.nxv4i64( +    <vscale x 4 x i64> %0, +    ptr %1, +    <vscale x 4 x i64> %2, +    i64 %3) + +  ret void +} + +declare void @llvm.riscv.vsoxei.mask.nxv4i64.nxv4i64( +  <vscale x 4 x i64>, +  ptr, +  <vscale x 4 x i64>, +  <vscale x 4 x i1>, +  i64); + +define void @intrinsic_vsoxei_mask_v_nxv4i64_nxv4i64_nxv4i64(<vscale x 4 x i64> %0, ptr %1, <vscale x 4 x i64> %2, <vscale x 4 x i1> %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vsoxei_mask_v_nxv4i64_nxv4i64_nxv4i64: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e64, m4, ta, ma +; CHECK-NEXT:    vsoxei64.v v8, (a0), v12, v0.t +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsoxei.mask.nxv4i64.nxv4i64( +    <vscale x 4 x i64> %0, +    ptr %1, +    <vscale x 4 x i64> %2, +    <vscale x 4 x i1> %3, +    i64 %4) + +  ret void +} + +declare void @llvm.riscv.vsoxei.nxv8i64.nxv8i64( +  <vscale x 8 x i64>, +  ptr, +  <vscale x 8 x i64>, +  i64); + +define void @intrinsic_vsoxei_v_nxv8i64_nxv8i64_nxv8i64(<vscale x 8 x i64> %0, ptr %1, <vscale x 8 x i64> %2, i64 %3) nounwind { +; CHECK-LABEL: intrinsic_vsoxei_v_nxv8i64_nxv8i64_nxv8i64: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e64, m8, ta, ma +; CHECK-NEXT:    vsoxei64.v v8, (a0), v16 +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsoxei.nxv8i64.nxv8i64( +    <vscale x 8 x i64> %0, +    ptr %1, +    <vscale x 8 x i64> %2, +    i64 %3) + +  ret void +} + +declare void @llvm.riscv.vsoxei.mask.nxv8i64.nxv8i64( +  <vscale x 8 x i64>, +  ptr, +  <vscale x 8 x i64>, +  <vscale x 8 x i1>, +  i64); + +define void @intrinsic_vsoxei_mask_v_nxv8i64_nxv8i64_nxv8i64(<vscale x 8 x i64> %0, ptr %1, <vscale x 8 x i64> %2, <vscale x 8 x i1> %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vsoxei_mask_v_nxv8i64_nxv8i64_nxv8i64: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e64, m8, ta, ma +; CHECK-NEXT:    vsoxei64.v v8, (a0), v16, v0.t +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsoxei.mask.nxv8i64.nxv8i64( +    <vscale x 8 x i64> %0, +    ptr %1, +    <vscale x 8 x i64> %2, +    <vscale x 8 x i1> %3, +    i64 %4) + +  ret void +} + +declare void @llvm.riscv.vsoxei.nxv1f16.nxv1i64( +  <vscale x 1 x half>, +  ptr, +  <vscale x 1 x i64>, +  i64); + +define void @intrinsic_vsoxei_v_nxv1f16_nxv1f16_nxv1i64(<vscale x 1 x half> %0, ptr %1, <vscale x 1 x i64> %2, i64 %3) nounwind { +; CHECK-LABEL: intrinsic_vsoxei_v_nxv1f16_nxv1f16_nxv1i64: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e16, mf4, ta, ma +; CHECK-NEXT:    vsoxei64.v v8, (a0), v9 +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsoxei.nxv1f16.nxv1i64( +    <vscale x 1 x half> %0, +    ptr %1, +    <vscale x 1 x i64> %2, +    i64 %3) + +  ret void +} + +declare void @llvm.riscv.vsoxei.mask.nxv1f16.nxv1i64( +  <vscale x 1 x half>, +  ptr, +  <vscale x 1 x i64>, +  <vscale x 1 x i1>, +  i64); + +define void @intrinsic_vsoxei_mask_v_nxv1f16_nxv1f16_nxv1i64(<vscale x 1 x half> %0, ptr %1, <vscale x 1 x i64> %2, <vscale x 1 x i1> %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vsoxei_mask_v_nxv1f16_nxv1f16_nxv1i64: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e16, mf4, ta, ma +; CHECK-NEXT:    vsoxei64.v v8, (a0), v9, v0.t +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsoxei.mask.nxv1f16.nxv1i64( +    <vscale x 1 x half> %0, +    ptr %1, +    <vscale x 1 x i64> %2, +    <vscale x 1 x i1> %3, +    i64 %4) + +  ret void +} + +declare void @llvm.riscv.vsoxei.nxv2f16.nxv2i64( +  <vscale x 2 x half>, +  ptr, +  <vscale x 2 x i64>, +  i64); + +define void @intrinsic_vsoxei_v_nxv2f16_nxv2f16_nxv2i64(<vscale x 2 x half> %0, ptr %1, <vscale x 2 x i64> %2, i64 %3) nounwind { +; CHECK-LABEL: intrinsic_vsoxei_v_nxv2f16_nxv2f16_nxv2i64: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e16, mf2, ta, ma +; CHECK-NEXT:    vsoxei64.v v8, (a0), v10 +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsoxei.nxv2f16.nxv2i64( +    <vscale x 2 x half> %0, +    ptr %1, +    <vscale x 2 x i64> %2, +    i64 %3) + +  ret void +} + +declare void @llvm.riscv.vsoxei.mask.nxv2f16.nxv2i64( +  <vscale x 2 x half>, +  ptr, +  <vscale x 2 x i64>, +  <vscale x 2 x i1>, +  i64); + +define void @intrinsic_vsoxei_mask_v_nxv2f16_nxv2f16_nxv2i64(<vscale x 2 x half> %0, ptr %1, <vscale x 2 x i64> %2, <vscale x 2 x i1> %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vsoxei_mask_v_nxv2f16_nxv2f16_nxv2i64: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e16, mf2, ta, ma +; CHECK-NEXT:    vsoxei64.v v8, (a0), v10, v0.t +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsoxei.mask.nxv2f16.nxv2i64( +    <vscale x 2 x half> %0, +    ptr %1, +    <vscale x 2 x i64> %2, +    <vscale x 2 x i1> %3, +    i64 %4) + +  ret void +} + +declare void @llvm.riscv.vsoxei.nxv4f16.nxv4i64( +  <vscale x 4 x half>, +  ptr, +  <vscale x 4 x i64>, +  i64); + +define void @intrinsic_vsoxei_v_nxv4f16_nxv4f16_nxv4i64(<vscale x 4 x half> %0, ptr %1, <vscale x 4 x i64> %2, i64 %3) nounwind { +; CHECK-LABEL: intrinsic_vsoxei_v_nxv4f16_nxv4f16_nxv4i64: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e16, m1, ta, ma +; CHECK-NEXT:    vsoxei64.v v8, (a0), v12 +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsoxei.nxv4f16.nxv4i64( +    <vscale x 4 x half> %0, +    ptr %1, +    <vscale x 4 x i64> %2, +    i64 %3) + +  ret void +} + +declare void @llvm.riscv.vsoxei.mask.nxv4f16.nxv4i64( +  <vscale x 4 x half>, +  ptr, +  <vscale x 4 x i64>, +  <vscale x 4 x i1>, +  i64); + +define void @intrinsic_vsoxei_mask_v_nxv4f16_nxv4f16_nxv4i64(<vscale x 4 x half> %0, ptr %1, <vscale x 4 x i64> %2, <vscale x 4 x i1> %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vsoxei_mask_v_nxv4f16_nxv4f16_nxv4i64: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e16, m1, ta, ma +; CHECK-NEXT:    vsoxei64.v v8, (a0), v12, v0.t +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsoxei.mask.nxv4f16.nxv4i64( +    <vscale x 4 x half> %0, +    ptr %1, +    <vscale x 4 x i64> %2, +    <vscale x 4 x i1> %3, +    i64 %4) + +  ret void +} + +declare void @llvm.riscv.vsoxei.nxv8f16.nxv8i64( +  <vscale x 8 x half>, +  ptr, +  <vscale x 8 x i64>, +  i64); + +define void @intrinsic_vsoxei_v_nxv8f16_nxv8f16_nxv8i64(<vscale x 8 x half> %0, ptr %1, <vscale x 8 x i64> %2, i64 %3) nounwind { +; CHECK-LABEL: intrinsic_vsoxei_v_nxv8f16_nxv8f16_nxv8i64: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e16, m2, ta, ma +; CHECK-NEXT:    vsoxei64.v v8, (a0), v16 +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsoxei.nxv8f16.nxv8i64( +    <vscale x 8 x half> %0, +    ptr %1, +    <vscale x 8 x i64> %2, +    i64 %3) + +  ret void +} + +declare void @llvm.riscv.vsoxei.mask.nxv8f16.nxv8i64( +  <vscale x 8 x half>, +  ptr, +  <vscale x 8 x i64>, +  <vscale x 8 x i1>, +  i64); + +define void @intrinsic_vsoxei_mask_v_nxv8f16_nxv8f16_nxv8i64(<vscale x 8 x half> %0, ptr %1, <vscale x 8 x i64> %2, <vscale x 8 x i1> %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vsoxei_mask_v_nxv8f16_nxv8f16_nxv8i64: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e16, m2, ta, ma +; CHECK-NEXT:    vsoxei64.v v8, (a0), v16, v0.t +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsoxei.mask.nxv8f16.nxv8i64( +    <vscale x 8 x half> %0, +    ptr %1, +    <vscale x 8 x i64> %2, +    <vscale x 8 x i1> %3, +    i64 %4) + +  ret void +} + +declare void @llvm.riscv.vsoxei.nxv1f32.nxv1i64( +  <vscale x 1 x float>, +  ptr, +  <vscale x 1 x i64>, +  i64); + +define void @intrinsic_vsoxei_v_nxv1f32_nxv1f32_nxv1i64(<vscale x 1 x float> %0, ptr %1, <vscale x 1 x i64> %2, i64 %3) nounwind { +; CHECK-LABEL: intrinsic_vsoxei_v_nxv1f32_nxv1f32_nxv1i64: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e32, mf2, ta, ma +; CHECK-NEXT:    vsoxei64.v v8, (a0), v9 +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsoxei.nxv1f32.nxv1i64( +    <vscale x 1 x float> %0, +    ptr %1, +    <vscale x 1 x i64> %2, +    i64 %3) + +  ret void +} + +declare void @llvm.riscv.vsoxei.mask.nxv1f32.nxv1i64( +  <vscale x 1 x float>, +  ptr, +  <vscale x 1 x i64>, +  <vscale x 1 x i1>, +  i64); + +define void @intrinsic_vsoxei_mask_v_nxv1f32_nxv1f32_nxv1i64(<vscale x 1 x float> %0, ptr %1, <vscale x 1 x i64> %2, <vscale x 1 x i1> %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vsoxei_mask_v_nxv1f32_nxv1f32_nxv1i64: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e32, mf2, ta, ma +; CHECK-NEXT:    vsoxei64.v v8, (a0), v9, v0.t +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsoxei.mask.nxv1f32.nxv1i64( +    <vscale x 1 x float> %0, +    ptr %1, +    <vscale x 1 x i64> %2, +    <vscale x 1 x i1> %3, +    i64 %4) + +  ret void +} + +declare void @llvm.riscv.vsoxei.nxv2f32.nxv2i64( +  <vscale x 2 x float>, +  ptr, +  <vscale x 2 x i64>, +  i64); + +define void @intrinsic_vsoxei_v_nxv2f32_nxv2f32_nxv2i64(<vscale x 2 x float> %0, ptr %1, <vscale x 2 x i64> %2, i64 %3) nounwind { +; CHECK-LABEL: intrinsic_vsoxei_v_nxv2f32_nxv2f32_nxv2i64: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e32, m1, ta, ma +; CHECK-NEXT:    vsoxei64.v v8, (a0), v10 +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsoxei.nxv2f32.nxv2i64( +    <vscale x 2 x float> %0, +    ptr %1, +    <vscale x 2 x i64> %2, +    i64 %3) + +  ret void +} + +declare void @llvm.riscv.vsoxei.mask.nxv2f32.nxv2i64( +  <vscale x 2 x float>, +  ptr, +  <vscale x 2 x i64>, +  <vscale x 2 x i1>, +  i64); + +define void @intrinsic_vsoxei_mask_v_nxv2f32_nxv2f32_nxv2i64(<vscale x 2 x float> %0, ptr %1, <vscale x 2 x i64> %2, <vscale x 2 x i1> %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vsoxei_mask_v_nxv2f32_nxv2f32_nxv2i64: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e32, m1, ta, ma +; CHECK-NEXT:    vsoxei64.v v8, (a0), v10, v0.t +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsoxei.mask.nxv2f32.nxv2i64( +    <vscale x 2 x float> %0, +    ptr %1, +    <vscale x 2 x i64> %2, +    <vscale x 2 x i1> %3, +    i64 %4) + +  ret void +} + +declare void @llvm.riscv.vsoxei.nxv4f32.nxv4i64( +  <vscale x 4 x float>, +  ptr, +  <vscale x 4 x i64>, +  i64); + +define void @intrinsic_vsoxei_v_nxv4f32_nxv4f32_nxv4i64(<vscale x 4 x float> %0, ptr %1, <vscale x 4 x i64> %2, i64 %3) nounwind { +; CHECK-LABEL: intrinsic_vsoxei_v_nxv4f32_nxv4f32_nxv4i64: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e32, m2, ta, ma +; CHECK-NEXT:    vsoxei64.v v8, (a0), v12 +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsoxei.nxv4f32.nxv4i64( +    <vscale x 4 x float> %0, +    ptr %1, +    <vscale x 4 x i64> %2, +    i64 %3) + +  ret void +} + +declare void @llvm.riscv.vsoxei.mask.nxv4f32.nxv4i64( +  <vscale x 4 x float>, +  ptr, +  <vscale x 4 x i64>, +  <vscale x 4 x i1>, +  i64); + +define void @intrinsic_vsoxei_mask_v_nxv4f32_nxv4f32_nxv4i64(<vscale x 4 x float> %0, ptr %1, <vscale x 4 x i64> %2, <vscale x 4 x i1> %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vsoxei_mask_v_nxv4f32_nxv4f32_nxv4i64: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e32, m2, ta, ma +; CHECK-NEXT:    vsoxei64.v v8, (a0), v12, v0.t +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsoxei.mask.nxv4f32.nxv4i64( +    <vscale x 4 x float> %0, +    ptr %1, +    <vscale x 4 x i64> %2, +    <vscale x 4 x i1> %3, +    i64 %4) + +  ret void +} + +declare void @llvm.riscv.vsoxei.nxv8f32.nxv8i64( +  <vscale x 8 x float>, +  ptr, +  <vscale x 8 x i64>, +  i64); + +define void @intrinsic_vsoxei_v_nxv8f32_nxv8f32_nxv8i64(<vscale x 8 x float> %0, ptr %1, <vscale x 8 x i64> %2, i64 %3) nounwind { +; CHECK-LABEL: intrinsic_vsoxei_v_nxv8f32_nxv8f32_nxv8i64: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e32, m4, ta, ma +; CHECK-NEXT:    vsoxei64.v v8, (a0), v16 +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsoxei.nxv8f32.nxv8i64( +    <vscale x 8 x float> %0, +    ptr %1, +    <vscale x 8 x i64> %2, +    i64 %3) + +  ret void +} + +declare void @llvm.riscv.vsoxei.mask.nxv8f32.nxv8i64( +  <vscale x 8 x float>, +  ptr, +  <vscale x 8 x i64>, +  <vscale x 8 x i1>, +  i64); + +define void @intrinsic_vsoxei_mask_v_nxv8f32_nxv8f32_nxv8i64(<vscale x 8 x float> %0, ptr %1, <vscale x 8 x i64> %2, <vscale x 8 x i1> %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vsoxei_mask_v_nxv8f32_nxv8f32_nxv8i64: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e32, m4, ta, ma +; CHECK-NEXT:    vsoxei64.v v8, (a0), v16, v0.t +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsoxei.mask.nxv8f32.nxv8i64( +    <vscale x 8 x float> %0, +    ptr %1, +    <vscale x 8 x i64> %2, +    <vscale x 8 x i1> %3, +    i64 %4) + +  ret void +} + +declare void @llvm.riscv.vsoxei.nxv1f64.nxv1i64( +  <vscale x 1 x double>, +  ptr, +  <vscale x 1 x i64>, +  i64); + +define void @intrinsic_vsoxei_v_nxv1f64_nxv1f64_nxv1i64(<vscale x 1 x double> %0, ptr %1, <vscale x 1 x i64> %2, i64 %3) nounwind { +; CHECK-LABEL: intrinsic_vsoxei_v_nxv1f64_nxv1f64_nxv1i64: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e64, m1, ta, ma +; CHECK-NEXT:    vsoxei64.v v8, (a0), v9 +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsoxei.nxv1f64.nxv1i64( +    <vscale x 1 x double> %0, +    ptr %1, +    <vscale x 1 x i64> %2, +    i64 %3) + +  ret void +} + +declare void @llvm.riscv.vsoxei.mask.nxv1f64.nxv1i64( +  <vscale x 1 x double>, +  ptr, +  <vscale x 1 x i64>, +  <vscale x 1 x i1>, +  i64); + +define void @intrinsic_vsoxei_mask_v_nxv1f64_nxv1f64_nxv1i64(<vscale x 1 x double> %0, ptr %1, <vscale x 1 x i64> %2, <vscale x 1 x i1> %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vsoxei_mask_v_nxv1f64_nxv1f64_nxv1i64: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e64, m1, ta, ma +; CHECK-NEXT:    vsoxei64.v v8, (a0), v9, v0.t +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsoxei.mask.nxv1f64.nxv1i64( +    <vscale x 1 x double> %0, +    ptr %1, +    <vscale x 1 x i64> %2, +    <vscale x 1 x i1> %3, +    i64 %4) + +  ret void +} + +declare void @llvm.riscv.vsoxei.nxv2f64.nxv2i64( +  <vscale x 2 x double>, +  ptr, +  <vscale x 2 x i64>, +  i64); + +define void @intrinsic_vsoxei_v_nxv2f64_nxv2f64_nxv2i64(<vscale x 2 x double> %0, ptr %1, <vscale x 2 x i64> %2, i64 %3) nounwind { +; CHECK-LABEL: intrinsic_vsoxei_v_nxv2f64_nxv2f64_nxv2i64: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e64, m2, ta, ma +; CHECK-NEXT:    vsoxei64.v v8, (a0), v10 +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsoxei.nxv2f64.nxv2i64( +    <vscale x 2 x double> %0, +    ptr %1, +    <vscale x 2 x i64> %2, +    i64 %3) + +  ret void +} + +declare void @llvm.riscv.vsoxei.mask.nxv2f64.nxv2i64( +  <vscale x 2 x double>, +  ptr, +  <vscale x 2 x i64>, +  <vscale x 2 x i1>, +  i64); + +define void @intrinsic_vsoxei_mask_v_nxv2f64_nxv2f64_nxv2i64(<vscale x 2 x double> %0, ptr %1, <vscale x 2 x i64> %2, <vscale x 2 x i1> %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vsoxei_mask_v_nxv2f64_nxv2f64_nxv2i64: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e64, m2, ta, ma +; CHECK-NEXT:    vsoxei64.v v8, (a0), v10, v0.t +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsoxei.mask.nxv2f64.nxv2i64( +    <vscale x 2 x double> %0, +    ptr %1, +    <vscale x 2 x i64> %2, +    <vscale x 2 x i1> %3, +    i64 %4) + +  ret void +} + +declare void @llvm.riscv.vsoxei.nxv4f64.nxv4i64( +  <vscale x 4 x double>, +  ptr, +  <vscale x 4 x i64>, +  i64); + +define void @intrinsic_vsoxei_v_nxv4f64_nxv4f64_nxv4i64(<vscale x 4 x double> %0, ptr %1, <vscale x 4 x i64> %2, i64 %3) nounwind { +; CHECK-LABEL: intrinsic_vsoxei_v_nxv4f64_nxv4f64_nxv4i64: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e64, m4, ta, ma +; CHECK-NEXT:    vsoxei64.v v8, (a0), v12 +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsoxei.nxv4f64.nxv4i64( +    <vscale x 4 x double> %0, +    ptr %1, +    <vscale x 4 x i64> %2, +    i64 %3) + +  ret void +} + +declare void @llvm.riscv.vsoxei.mask.nxv4f64.nxv4i64( +  <vscale x 4 x double>, +  ptr, +  <vscale x 4 x i64>, +  <vscale x 4 x i1>, +  i64); + +define void @intrinsic_vsoxei_mask_v_nxv4f64_nxv4f64_nxv4i64(<vscale x 4 x double> %0, ptr %1, <vscale x 4 x i64> %2, <vscale x 4 x i1> %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vsoxei_mask_v_nxv4f64_nxv4f64_nxv4i64: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e64, m4, ta, ma +; CHECK-NEXT:    vsoxei64.v v8, (a0), v12, v0.t +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsoxei.mask.nxv4f64.nxv4i64( +    <vscale x 4 x double> %0, +    ptr %1, +    <vscale x 4 x i64> %2, +    <vscale x 4 x i1> %3, +    i64 %4) + +  ret void +} + +declare void @llvm.riscv.vsoxei.nxv8f64.nxv8i64( +  <vscale x 8 x double>, +  ptr, +  <vscale x 8 x i64>, +  i64); + +define void @intrinsic_vsoxei_v_nxv8f64_nxv8f64_nxv8i64(<vscale x 8 x double> %0, ptr %1, <vscale x 8 x i64> %2, i64 %3) nounwind { +; CHECK-LABEL: intrinsic_vsoxei_v_nxv8f64_nxv8f64_nxv8i64: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e64, m8, ta, ma +; CHECK-NEXT:    vsoxei64.v v8, (a0), v16 +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsoxei.nxv8f64.nxv8i64( +    <vscale x 8 x double> %0, +    ptr %1, +    <vscale x 8 x i64> %2, +    i64 %3) + +  ret void +} + +declare void @llvm.riscv.vsoxei.mask.nxv8f64.nxv8i64( +  <vscale x 8 x double>, +  ptr, +  <vscale x 8 x i64>, +  <vscale x 8 x i1>, +  i64); + +define void @intrinsic_vsoxei_mask_v_nxv8f64_nxv8f64_nxv8i64(<vscale x 8 x double> %0, ptr %1, <vscale x 8 x i64> %2, <vscale x 8 x i1> %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vsoxei_mask_v_nxv8f64_nxv8f64_nxv8i64: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e64, m8, ta, ma +; CHECK-NEXT:    vsoxei64.v v8, (a0), v16, v0.t +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsoxei.mask.nxv8f64.nxv8i64( +    <vscale x 8 x double> %0, +    ptr %1, +    <vscale x 8 x i64> %2, +    <vscale x 8 x i1> %3, +    i64 %4) + +  ret void +} diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/rvv/vsoxei.ll b/llvm/test/CodeGen/RISCV/GlobalISel/rvv/vsoxei.ll new file mode 100644 index 0000000..7ea2e17 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/GlobalISel/rvv/vsoxei.ll @@ -0,0 +1,4881 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v,+zvfhmin,+zvfbfmin \ +; RUN:   -global-isel -verify-machineinstrs -target-abi=ilp32d | FileCheck %s +; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v,+zvfhmin,+zvfbfmin \ +; RUN:   -global-isel -verify-machineinstrs -target-abi=lp64d | FileCheck %s + +declare void @llvm.riscv.vsoxei.nxv1i8.nxv1i32( +  <vscale x 1 x i8>, +  ptr, +  <vscale x 1 x i32>, +  iXLen); + +define void @intrinsic_vsoxei_v_nxv1i8_nxv1i8_nxv1i32(<vscale x 1 x i8> %0, ptr %1, <vscale x 1 x i32> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vsoxei_v_nxv1i8_nxv1i8_nxv1i32: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e8, mf8, ta, ma +; CHECK-NEXT:    vsoxei32.v v8, (a0), v9 +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsoxei.nxv1i8.nxv1i32( +    <vscale x 1 x i8> %0, +    ptr %1, +    <vscale x 1 x i32> %2, +    iXLen %3) + +  ret void +} + +declare void @llvm.riscv.vsoxei.mask.nxv1i8.nxv1i32( +  <vscale x 1 x i8>, +  ptr, +  <vscale x 1 x i32>, +  <vscale x 1 x i1>, +  iXLen); + +define void @intrinsic_vsoxei_mask_v_nxv1i8_nxv1i8_nxv1i32(<vscale x 1 x i8> %0, ptr %1, <vscale x 1 x i32> %2, <vscale x 1 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vsoxei_mask_v_nxv1i8_nxv1i8_nxv1i32: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e8, mf8, ta, ma +; CHECK-NEXT:    vsoxei32.v v8, (a0), v9, v0.t +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsoxei.mask.nxv1i8.nxv1i32( +    <vscale x 1 x i8> %0, +    ptr %1, +    <vscale x 1 x i32> %2, +    <vscale x 1 x i1> %3, +    iXLen %4) + +  ret void +} + +declare void @llvm.riscv.vsoxei.nxv2i8.nxv2i32( +  <vscale x 2 x i8>, +  ptr, +  <vscale x 2 x i32>, +  iXLen); + +define void @intrinsic_vsoxei_v_nxv2i8_nxv2i8_nxv2i32(<vscale x 2 x i8> %0, ptr %1, <vscale x 2 x i32> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vsoxei_v_nxv2i8_nxv2i8_nxv2i32: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e8, mf4, ta, ma +; CHECK-NEXT:    vsoxei32.v v8, (a0), v9 +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsoxei.nxv2i8.nxv2i32( +    <vscale x 2 x i8> %0, +    ptr %1, +    <vscale x 2 x i32> %2, +    iXLen %3) + +  ret void +} + +declare void @llvm.riscv.vsoxei.mask.nxv2i8.nxv2i32( +  <vscale x 2 x i8>, +  ptr, +  <vscale x 2 x i32>, +  <vscale x 2 x i1>, +  iXLen); + +define void @intrinsic_vsoxei_mask_v_nxv2i8_nxv2i8_nxv2i32(<vscale x 2 x i8> %0, ptr %1, <vscale x 2 x i32> %2, <vscale x 2 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vsoxei_mask_v_nxv2i8_nxv2i8_nxv2i32: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e8, mf4, ta, ma +; CHECK-NEXT:    vsoxei32.v v8, (a0), v9, v0.t +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsoxei.mask.nxv2i8.nxv2i32( +    <vscale x 2 x i8> %0, +    ptr %1, +    <vscale x 2 x i32> %2, +    <vscale x 2 x i1> %3, +    iXLen %4) + +  ret void +} + +declare void @llvm.riscv.vsoxei.nxv4i8.nxv4i32( +  <vscale x 4 x i8>, +  ptr, +  <vscale x 4 x i32>, +  iXLen); + +define void @intrinsic_vsoxei_v_nxv4i8_nxv4i8_nxv4i32(<vscale x 4 x i8> %0, ptr %1, <vscale x 4 x i32> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vsoxei_v_nxv4i8_nxv4i8_nxv4i32: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e8, mf2, ta, ma +; CHECK-NEXT:    vsoxei32.v v8, (a0), v10 +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsoxei.nxv4i8.nxv4i32( +    <vscale x 4 x i8> %0, +    ptr %1, +    <vscale x 4 x i32> %2, +    iXLen %3) + +  ret void +} + +declare void @llvm.riscv.vsoxei.mask.nxv4i8.nxv4i32( +  <vscale x 4 x i8>, +  ptr, +  <vscale x 4 x i32>, +  <vscale x 4 x i1>, +  iXLen); + +define void @intrinsic_vsoxei_mask_v_nxv4i8_nxv4i8_nxv4i32(<vscale x 4 x i8> %0, ptr %1, <vscale x 4 x i32> %2, <vscale x 4 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vsoxei_mask_v_nxv4i8_nxv4i8_nxv4i32: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e8, mf2, ta, ma +; CHECK-NEXT:    vsoxei32.v v8, (a0), v10, v0.t +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsoxei.mask.nxv4i8.nxv4i32( +    <vscale x 4 x i8> %0, +    ptr %1, +    <vscale x 4 x i32> %2, +    <vscale x 4 x i1> %3, +    iXLen %4) + +  ret void +} + +declare void @llvm.riscv.vsoxei.nxv8i8.nxv8i32( +  <vscale x 8 x i8>, +  ptr, +  <vscale x 8 x i32>, +  iXLen); + +define void @intrinsic_vsoxei_v_nxv8i8_nxv8i8_nxv8i32(<vscale x 8 x i8> %0, ptr %1, <vscale x 8 x i32> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vsoxei_v_nxv8i8_nxv8i8_nxv8i32: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e8, m1, ta, ma +; CHECK-NEXT:    vsoxei32.v v8, (a0), v12 +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsoxei.nxv8i8.nxv8i32( +    <vscale x 8 x i8> %0, +    ptr %1, +    <vscale x 8 x i32> %2, +    iXLen %3) + +  ret void +} + +declare void @llvm.riscv.vsoxei.mask.nxv8i8.nxv8i32( +  <vscale x 8 x i8>, +  ptr, +  <vscale x 8 x i32>, +  <vscale x 8 x i1>, +  iXLen); + +define void @intrinsic_vsoxei_mask_v_nxv8i8_nxv8i8_nxv8i32(<vscale x 8 x i8> %0, ptr %1, <vscale x 8 x i32> %2, <vscale x 8 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vsoxei_mask_v_nxv8i8_nxv8i8_nxv8i32: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e8, m1, ta, ma +; CHECK-NEXT:    vsoxei32.v v8, (a0), v12, v0.t +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsoxei.mask.nxv8i8.nxv8i32( +    <vscale x 8 x i8> %0, +    ptr %1, +    <vscale x 8 x i32> %2, +    <vscale x 8 x i1> %3, +    iXLen %4) + +  ret void +} + +declare void @llvm.riscv.vsoxei.nxv16i8.nxv16i32( +  <vscale x 16 x i8>, +  ptr, +  <vscale x 16 x i32>, +  iXLen); + +define void @intrinsic_vsoxei_v_nxv16i8_nxv16i8_nxv16i32(<vscale x 16 x i8> %0, ptr %1, <vscale x 16 x i32> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vsoxei_v_nxv16i8_nxv16i8_nxv16i32: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e8, m2, ta, ma +; CHECK-NEXT:    vsoxei32.v v8, (a0), v16 +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsoxei.nxv16i8.nxv16i32( +    <vscale x 16 x i8> %0, +    ptr %1, +    <vscale x 16 x i32> %2, +    iXLen %3) + +  ret void +} + +declare void @llvm.riscv.vsoxei.mask.nxv16i8.nxv16i32( +  <vscale x 16 x i8>, +  ptr, +  <vscale x 16 x i32>, +  <vscale x 16 x i1>, +  iXLen); + +define void @intrinsic_vsoxei_mask_v_nxv16i8_nxv16i8_nxv16i32(<vscale x 16 x i8> %0, ptr %1, <vscale x 16 x i32> %2, <vscale x 16 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vsoxei_mask_v_nxv16i8_nxv16i8_nxv16i32: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e8, m2, ta, ma +; CHECK-NEXT:    vsoxei32.v v8, (a0), v16, v0.t +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsoxei.mask.nxv16i8.nxv16i32( +    <vscale x 16 x i8> %0, +    ptr %1, +    <vscale x 16 x i32> %2, +    <vscale x 16 x i1> %3, +    iXLen %4) + +  ret void +} + +declare void @llvm.riscv.vsoxei.nxv1i16.nxv1i32( +  <vscale x 1 x i16>, +  ptr, +  <vscale x 1 x i32>, +  iXLen); + +define void @intrinsic_vsoxei_v_nxv1i16_nxv1i16_nxv1i32(<vscale x 1 x i16> %0, ptr %1, <vscale x 1 x i32> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vsoxei_v_nxv1i16_nxv1i16_nxv1i32: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e16, mf4, ta, ma +; CHECK-NEXT:    vsoxei32.v v8, (a0), v9 +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsoxei.nxv1i16.nxv1i32( +    <vscale x 1 x i16> %0, +    ptr %1, +    <vscale x 1 x i32> %2, +    iXLen %3) + +  ret void +} + +declare void @llvm.riscv.vsoxei.mask.nxv1i16.nxv1i32( +  <vscale x 1 x i16>, +  ptr, +  <vscale x 1 x i32>, +  <vscale x 1 x i1>, +  iXLen); + +define void @intrinsic_vsoxei_mask_v_nxv1i16_nxv1i16_nxv1i32(<vscale x 1 x i16> %0, ptr %1, <vscale x 1 x i32> %2, <vscale x 1 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vsoxei_mask_v_nxv1i16_nxv1i16_nxv1i32: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e16, mf4, ta, ma +; CHECK-NEXT:    vsoxei32.v v8, (a0), v9, v0.t +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsoxei.mask.nxv1i16.nxv1i32( +    <vscale x 1 x i16> %0, +    ptr %1, +    <vscale x 1 x i32> %2, +    <vscale x 1 x i1> %3, +    iXLen %4) + +  ret void +} + +declare void @llvm.riscv.vsoxei.nxv2i16.nxv2i32( +  <vscale x 2 x i16>, +  ptr, +  <vscale x 2 x i32>, +  iXLen); + +define void @intrinsic_vsoxei_v_nxv2i16_nxv2i16_nxv2i32(<vscale x 2 x i16> %0, ptr %1, <vscale x 2 x i32> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vsoxei_v_nxv2i16_nxv2i16_nxv2i32: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e16, mf2, ta, ma +; CHECK-NEXT:    vsoxei32.v v8, (a0), v9 +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsoxei.nxv2i16.nxv2i32( +    <vscale x 2 x i16> %0, +    ptr %1, +    <vscale x 2 x i32> %2, +    iXLen %3) + +  ret void +} + +declare void @llvm.riscv.vsoxei.mask.nxv2i16.nxv2i32( +  <vscale x 2 x i16>, +  ptr, +  <vscale x 2 x i32>, +  <vscale x 2 x i1>, +  iXLen); + +define void @intrinsic_vsoxei_mask_v_nxv2i16_nxv2i16_nxv2i32(<vscale x 2 x i16> %0, ptr %1, <vscale x 2 x i32> %2, <vscale x 2 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vsoxei_mask_v_nxv2i16_nxv2i16_nxv2i32: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e16, mf2, ta, ma +; CHECK-NEXT:    vsoxei32.v v8, (a0), v9, v0.t +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsoxei.mask.nxv2i16.nxv2i32( +    <vscale x 2 x i16> %0, +    ptr %1, +    <vscale x 2 x i32> %2, +    <vscale x 2 x i1> %3, +    iXLen %4) + +  ret void +} + +declare void @llvm.riscv.vsoxei.nxv4i16.nxv4i32( +  <vscale x 4 x i16>, +  ptr, +  <vscale x 4 x i32>, +  iXLen); + +define void @intrinsic_vsoxei_v_nxv4i16_nxv4i16_nxv4i32(<vscale x 4 x i16> %0, ptr %1, <vscale x 4 x i32> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vsoxei_v_nxv4i16_nxv4i16_nxv4i32: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e16, m1, ta, ma +; CHECK-NEXT:    vsoxei32.v v8, (a0), v10 +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsoxei.nxv4i16.nxv4i32( +    <vscale x 4 x i16> %0, +    ptr %1, +    <vscale x 4 x i32> %2, +    iXLen %3) + +  ret void +} + +declare void @llvm.riscv.vsoxei.mask.nxv4i16.nxv4i32( +  <vscale x 4 x i16>, +  ptr, +  <vscale x 4 x i32>, +  <vscale x 4 x i1>, +  iXLen); + +define void @intrinsic_vsoxei_mask_v_nxv4i16_nxv4i16_nxv4i32(<vscale x 4 x i16> %0, ptr %1, <vscale x 4 x i32> %2, <vscale x 4 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vsoxei_mask_v_nxv4i16_nxv4i16_nxv4i32: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e16, m1, ta, ma +; CHECK-NEXT:    vsoxei32.v v8, (a0), v10, v0.t +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsoxei.mask.nxv4i16.nxv4i32( +    <vscale x 4 x i16> %0, +    ptr %1, +    <vscale x 4 x i32> %2, +    <vscale x 4 x i1> %3, +    iXLen %4) + +  ret void +} + +declare void @llvm.riscv.vsoxei.nxv8i16.nxv8i32( +  <vscale x 8 x i16>, +  ptr, +  <vscale x 8 x i32>, +  iXLen); + +define void @intrinsic_vsoxei_v_nxv8i16_nxv8i16_nxv8i32(<vscale x 8 x i16> %0, ptr %1, <vscale x 8 x i32> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vsoxei_v_nxv8i16_nxv8i16_nxv8i32: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e16, m2, ta, ma +; CHECK-NEXT:    vsoxei32.v v8, (a0), v12 +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsoxei.nxv8i16.nxv8i32( +    <vscale x 8 x i16> %0, +    ptr %1, +    <vscale x 8 x i32> %2, +    iXLen %3) + +  ret void +} + +declare void @llvm.riscv.vsoxei.mask.nxv8i16.nxv8i32( +  <vscale x 8 x i16>, +  ptr, +  <vscale x 8 x i32>, +  <vscale x 8 x i1>, +  iXLen); + +define void @intrinsic_vsoxei_mask_v_nxv8i16_nxv8i16_nxv8i32(<vscale x 8 x i16> %0, ptr %1, <vscale x 8 x i32> %2, <vscale x 8 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vsoxei_mask_v_nxv8i16_nxv8i16_nxv8i32: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e16, m2, ta, ma +; CHECK-NEXT:    vsoxei32.v v8, (a0), v12, v0.t +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsoxei.mask.nxv8i16.nxv8i32( +    <vscale x 8 x i16> %0, +    ptr %1, +    <vscale x 8 x i32> %2, +    <vscale x 8 x i1> %3, +    iXLen %4) + +  ret void +} + +declare void @llvm.riscv.vsoxei.nxv16i16.nxv16i32( +  <vscale x 16 x i16>, +  ptr, +  <vscale x 16 x i32>, +  iXLen); + +define void @intrinsic_vsoxei_v_nxv16i16_nxv16i16_nxv16i32(<vscale x 16 x i16> %0, ptr %1, <vscale x 16 x i32> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vsoxei_v_nxv16i16_nxv16i16_nxv16i32: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e16, m4, ta, ma +; CHECK-NEXT:    vsoxei32.v v8, (a0), v16 +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsoxei.nxv16i16.nxv16i32( +    <vscale x 16 x i16> %0, +    ptr %1, +    <vscale x 16 x i32> %2, +    iXLen %3) + +  ret void +} + +declare void @llvm.riscv.vsoxei.mask.nxv16i16.nxv16i32( +  <vscale x 16 x i16>, +  ptr, +  <vscale x 16 x i32>, +  <vscale x 16 x i1>, +  iXLen); + +define void @intrinsic_vsoxei_mask_v_nxv16i16_nxv16i16_nxv16i32(<vscale x 16 x i16> %0, ptr %1, <vscale x 16 x i32> %2, <vscale x 16 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vsoxei_mask_v_nxv16i16_nxv16i16_nxv16i32: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e16, m4, ta, ma +; CHECK-NEXT:    vsoxei32.v v8, (a0), v16, v0.t +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsoxei.mask.nxv16i16.nxv16i32( +    <vscale x 16 x i16> %0, +    ptr %1, +    <vscale x 16 x i32> %2, +    <vscale x 16 x i1> %3, +    iXLen %4) + +  ret void +} + +declare void @llvm.riscv.vsoxei.nxv1i32.nxv1i32( +  <vscale x 1 x i32>, +  ptr, +  <vscale x 1 x i32>, +  iXLen); + +define void @intrinsic_vsoxei_v_nxv1i32_nxv1i32_nxv1i32(<vscale x 1 x i32> %0, ptr %1, <vscale x 1 x i32> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vsoxei_v_nxv1i32_nxv1i32_nxv1i32: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e32, mf2, ta, ma +; CHECK-NEXT:    vsoxei32.v v8, (a0), v9 +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsoxei.nxv1i32.nxv1i32( +    <vscale x 1 x i32> %0, +    ptr %1, +    <vscale x 1 x i32> %2, +    iXLen %3) + +  ret void +} + +declare void @llvm.riscv.vsoxei.mask.nxv1i32.nxv1i32( +  <vscale x 1 x i32>, +  ptr, +  <vscale x 1 x i32>, +  <vscale x 1 x i1>, +  iXLen); + +define void @intrinsic_vsoxei_mask_v_nxv1i32_nxv1i32_nxv1i32(<vscale x 1 x i32> %0, ptr %1, <vscale x 1 x i32> %2, <vscale x 1 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vsoxei_mask_v_nxv1i32_nxv1i32_nxv1i32: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e32, mf2, ta, ma +; CHECK-NEXT:    vsoxei32.v v8, (a0), v9, v0.t +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsoxei.mask.nxv1i32.nxv1i32( +    <vscale x 1 x i32> %0, +    ptr %1, +    <vscale x 1 x i32> %2, +    <vscale x 1 x i1> %3, +    iXLen %4) + +  ret void +} + +declare void @llvm.riscv.vsoxei.nxv2i32.nxv2i32( +  <vscale x 2 x i32>, +  ptr, +  <vscale x 2 x i32>, +  iXLen); + +define void @intrinsic_vsoxei_v_nxv2i32_nxv2i32_nxv2i32(<vscale x 2 x i32> %0, ptr %1, <vscale x 2 x i32> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vsoxei_v_nxv2i32_nxv2i32_nxv2i32: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e32, m1, ta, ma +; CHECK-NEXT:    vsoxei32.v v8, (a0), v9 +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsoxei.nxv2i32.nxv2i32( +    <vscale x 2 x i32> %0, +    ptr %1, +    <vscale x 2 x i32> %2, +    iXLen %3) + +  ret void +} + +declare void @llvm.riscv.vsoxei.mask.nxv2i32.nxv2i32( +  <vscale x 2 x i32>, +  ptr, +  <vscale x 2 x i32>, +  <vscale x 2 x i1>, +  iXLen); + +define void @intrinsic_vsoxei_mask_v_nxv2i32_nxv2i32_nxv2i32(<vscale x 2 x i32> %0, ptr %1, <vscale x 2 x i32> %2, <vscale x 2 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vsoxei_mask_v_nxv2i32_nxv2i32_nxv2i32: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e32, m1, ta, ma +; CHECK-NEXT:    vsoxei32.v v8, (a0), v9, v0.t +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsoxei.mask.nxv2i32.nxv2i32( +    <vscale x 2 x i32> %0, +    ptr %1, +    <vscale x 2 x i32> %2, +    <vscale x 2 x i1> %3, +    iXLen %4) + +  ret void +} + +declare void @llvm.riscv.vsoxei.nxv4i32.nxv4i32( +  <vscale x 4 x i32>, +  ptr, +  <vscale x 4 x i32>, +  iXLen); + +define void @intrinsic_vsoxei_v_nxv4i32_nxv4i32_nxv4i32(<vscale x 4 x i32> %0, ptr %1, <vscale x 4 x i32> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vsoxei_v_nxv4i32_nxv4i32_nxv4i32: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e32, m2, ta, ma +; CHECK-NEXT:    vsoxei32.v v8, (a0), v10 +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsoxei.nxv4i32.nxv4i32( +    <vscale x 4 x i32> %0, +    ptr %1, +    <vscale x 4 x i32> %2, +    iXLen %3) + +  ret void +} + +declare void @llvm.riscv.vsoxei.mask.nxv4i32.nxv4i32( +  <vscale x 4 x i32>, +  ptr, +  <vscale x 4 x i32>, +  <vscale x 4 x i1>, +  iXLen); + +define void @intrinsic_vsoxei_mask_v_nxv4i32_nxv4i32_nxv4i32(<vscale x 4 x i32> %0, ptr %1, <vscale x 4 x i32> %2, <vscale x 4 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vsoxei_mask_v_nxv4i32_nxv4i32_nxv4i32: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e32, m2, ta, ma +; CHECK-NEXT:    vsoxei32.v v8, (a0), v10, v0.t +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsoxei.mask.nxv4i32.nxv4i32( +    <vscale x 4 x i32> %0, +    ptr %1, +    <vscale x 4 x i32> %2, +    <vscale x 4 x i1> %3, +    iXLen %4) + +  ret void +} + +declare void @llvm.riscv.vsoxei.nxv8i32.nxv8i32( +  <vscale x 8 x i32>, +  ptr, +  <vscale x 8 x i32>, +  iXLen); + +define void @intrinsic_vsoxei_v_nxv8i32_nxv8i32_nxv8i32(<vscale x 8 x i32> %0, ptr %1, <vscale x 8 x i32> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vsoxei_v_nxv8i32_nxv8i32_nxv8i32: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e32, m4, ta, ma +; CHECK-NEXT:    vsoxei32.v v8, (a0), v12 +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsoxei.nxv8i32.nxv8i32( +    <vscale x 8 x i32> %0, +    ptr %1, +    <vscale x 8 x i32> %2, +    iXLen %3) + +  ret void +} + +declare void @llvm.riscv.vsoxei.mask.nxv8i32.nxv8i32( +  <vscale x 8 x i32>, +  ptr, +  <vscale x 8 x i32>, +  <vscale x 8 x i1>, +  iXLen); + +define void @intrinsic_vsoxei_mask_v_nxv8i32_nxv8i32_nxv8i32(<vscale x 8 x i32> %0, ptr %1, <vscale x 8 x i32> %2, <vscale x 8 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vsoxei_mask_v_nxv8i32_nxv8i32_nxv8i32: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e32, m4, ta, ma +; CHECK-NEXT:    vsoxei32.v v8, (a0), v12, v0.t +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsoxei.mask.nxv8i32.nxv8i32( +    <vscale x 8 x i32> %0, +    ptr %1, +    <vscale x 8 x i32> %2, +    <vscale x 8 x i1> %3, +    iXLen %4) + +  ret void +} + +declare void @llvm.riscv.vsoxei.nxv16i32.nxv16i32( +  <vscale x 16 x i32>, +  ptr, +  <vscale x 16 x i32>, +  iXLen); + +define void @intrinsic_vsoxei_v_nxv16i32_nxv16i32_nxv16i32(<vscale x 16 x i32> %0, ptr %1, <vscale x 16 x i32> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vsoxei_v_nxv16i32_nxv16i32_nxv16i32: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e32, m8, ta, ma +; CHECK-NEXT:    vsoxei32.v v8, (a0), v16 +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsoxei.nxv16i32.nxv16i32( +    <vscale x 16 x i32> %0, +    ptr %1, +    <vscale x 16 x i32> %2, +    iXLen %3) + +  ret void +} + +declare void @llvm.riscv.vsoxei.mask.nxv16i32.nxv16i32( +  <vscale x 16 x i32>, +  ptr, +  <vscale x 16 x i32>, +  <vscale x 16 x i1>, +  iXLen); + +define void @intrinsic_vsoxei_mask_v_nxv16i32_nxv16i32_nxv16i32(<vscale x 16 x i32> %0, ptr %1, <vscale x 16 x i32> %2, <vscale x 16 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vsoxei_mask_v_nxv16i32_nxv16i32_nxv16i32: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e32, m8, ta, ma +; CHECK-NEXT:    vsoxei32.v v8, (a0), v16, v0.t +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsoxei.mask.nxv16i32.nxv16i32( +    <vscale x 16 x i32> %0, +    ptr %1, +    <vscale x 16 x i32> %2, +    <vscale x 16 x i1> %3, +    iXLen %4) + +  ret void +} + +declare void @llvm.riscv.vsoxei.nxv1i64.nxv1i32( +  <vscale x 1 x i64>, +  ptr, +  <vscale x 1 x i32>, +  iXLen); + +define void @intrinsic_vsoxei_v_nxv1i64_nxv1i64_nxv1i32(<vscale x 1 x i64> %0, ptr %1, <vscale x 1 x i32> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vsoxei_v_nxv1i64_nxv1i64_nxv1i32: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e64, m1, ta, ma +; CHECK-NEXT:    vsoxei32.v v8, (a0), v9 +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsoxei.nxv1i64.nxv1i32( +    <vscale x 1 x i64> %0, +    ptr %1, +    <vscale x 1 x i32> %2, +    iXLen %3) + +  ret void +} + +declare void @llvm.riscv.vsoxei.mask.nxv1i64.nxv1i32( +  <vscale x 1 x i64>, +  ptr, +  <vscale x 1 x i32>, +  <vscale x 1 x i1>, +  iXLen); + +define void @intrinsic_vsoxei_mask_v_nxv1i64_nxv1i64_nxv1i32(<vscale x 1 x i64> %0, ptr %1, <vscale x 1 x i32> %2, <vscale x 1 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vsoxei_mask_v_nxv1i64_nxv1i64_nxv1i32: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e64, m1, ta, ma +; CHECK-NEXT:    vsoxei32.v v8, (a0), v9, v0.t +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsoxei.mask.nxv1i64.nxv1i32( +    <vscale x 1 x i64> %0, +    ptr %1, +    <vscale x 1 x i32> %2, +    <vscale x 1 x i1> %3, +    iXLen %4) + +  ret void +} + +declare void @llvm.riscv.vsoxei.nxv2i64.nxv2i32( +  <vscale x 2 x i64>, +  ptr, +  <vscale x 2 x i32>, +  iXLen); + +define void @intrinsic_vsoxei_v_nxv2i64_nxv2i64_nxv2i32(<vscale x 2 x i64> %0, ptr %1, <vscale x 2 x i32> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vsoxei_v_nxv2i64_nxv2i64_nxv2i32: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e64, m2, ta, ma +; CHECK-NEXT:    vsoxei32.v v8, (a0), v10 +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsoxei.nxv2i64.nxv2i32( +    <vscale x 2 x i64> %0, +    ptr %1, +    <vscale x 2 x i32> %2, +    iXLen %3) + +  ret void +} + +declare void @llvm.riscv.vsoxei.mask.nxv2i64.nxv2i32( +  <vscale x 2 x i64>, +  ptr, +  <vscale x 2 x i32>, +  <vscale x 2 x i1>, +  iXLen); + +define void @intrinsic_vsoxei_mask_v_nxv2i64_nxv2i64_nxv2i32(<vscale x 2 x i64> %0, ptr %1, <vscale x 2 x i32> %2, <vscale x 2 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vsoxei_mask_v_nxv2i64_nxv2i64_nxv2i32: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e64, m2, ta, ma +; CHECK-NEXT:    vsoxei32.v v8, (a0), v10, v0.t +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsoxei.mask.nxv2i64.nxv2i32( +    <vscale x 2 x i64> %0, +    ptr %1, +    <vscale x 2 x i32> %2, +    <vscale x 2 x i1> %3, +    iXLen %4) + +  ret void +} + +declare void @llvm.riscv.vsoxei.nxv4i64.nxv4i32( +  <vscale x 4 x i64>, +  ptr, +  <vscale x 4 x i32>, +  iXLen); + +define void @intrinsic_vsoxei_v_nxv4i64_nxv4i64_nxv4i32(<vscale x 4 x i64> %0, ptr %1, <vscale x 4 x i32> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vsoxei_v_nxv4i64_nxv4i64_nxv4i32: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e64, m4, ta, ma +; CHECK-NEXT:    vsoxei32.v v8, (a0), v12 +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsoxei.nxv4i64.nxv4i32( +    <vscale x 4 x i64> %0, +    ptr %1, +    <vscale x 4 x i32> %2, +    iXLen %3) + +  ret void +} + +declare void @llvm.riscv.vsoxei.mask.nxv4i64.nxv4i32( +  <vscale x 4 x i64>, +  ptr, +  <vscale x 4 x i32>, +  <vscale x 4 x i1>, +  iXLen); + +define void @intrinsic_vsoxei_mask_v_nxv4i64_nxv4i64_nxv4i32(<vscale x 4 x i64> %0, ptr %1, <vscale x 4 x i32> %2, <vscale x 4 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vsoxei_mask_v_nxv4i64_nxv4i64_nxv4i32: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e64, m4, ta, ma +; CHECK-NEXT:    vsoxei32.v v8, (a0), v12, v0.t +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsoxei.mask.nxv4i64.nxv4i32( +    <vscale x 4 x i64> %0, +    ptr %1, +    <vscale x 4 x i32> %2, +    <vscale x 4 x i1> %3, +    iXLen %4) + +  ret void +} + +declare void @llvm.riscv.vsoxei.nxv8i64.nxv8i32( +  <vscale x 8 x i64>, +  ptr, +  <vscale x 8 x i32>, +  iXLen); + +define void @intrinsic_vsoxei_v_nxv8i64_nxv8i64_nxv8i32(<vscale x 8 x i64> %0, ptr %1, <vscale x 8 x i32> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vsoxei_v_nxv8i64_nxv8i64_nxv8i32: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e64, m8, ta, ma +; CHECK-NEXT:    vsoxei32.v v8, (a0), v16 +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsoxei.nxv8i64.nxv8i32( +    <vscale x 8 x i64> %0, +    ptr %1, +    <vscale x 8 x i32> %2, +    iXLen %3) + +  ret void +} + +declare void @llvm.riscv.vsoxei.mask.nxv8i64.nxv8i32( +  <vscale x 8 x i64>, +  ptr, +  <vscale x 8 x i32>, +  <vscale x 8 x i1>, +  iXLen); + +define void @intrinsic_vsoxei_mask_v_nxv8i64_nxv8i64_nxv8i32(<vscale x 8 x i64> %0, ptr %1, <vscale x 8 x i32> %2, <vscale x 8 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vsoxei_mask_v_nxv8i64_nxv8i64_nxv8i32: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e64, m8, ta, ma +; CHECK-NEXT:    vsoxei32.v v8, (a0), v16, v0.t +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsoxei.mask.nxv8i64.nxv8i32( +    <vscale x 8 x i64> %0, +    ptr %1, +    <vscale x 8 x i32> %2, +    <vscale x 8 x i1> %3, +    iXLen %4) + +  ret void +} + +declare void @llvm.riscv.vsoxei.nxv1f16.nxv1i32( +  <vscale x 1 x half>, +  ptr, +  <vscale x 1 x i32>, +  iXLen); + +define void @intrinsic_vsoxei_v_nxv1f16_nxv1f16_nxv1i32(<vscale x 1 x half> %0, ptr %1, <vscale x 1 x i32> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vsoxei_v_nxv1f16_nxv1f16_nxv1i32: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e16, mf4, ta, ma +; CHECK-NEXT:    vsoxei32.v v8, (a0), v9 +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsoxei.nxv1f16.nxv1i32( +    <vscale x 1 x half> %0, +    ptr %1, +    <vscale x 1 x i32> %2, +    iXLen %3) + +  ret void +} + +declare void @llvm.riscv.vsoxei.mask.nxv1f16.nxv1i32( +  <vscale x 1 x half>, +  ptr, +  <vscale x 1 x i32>, +  <vscale x 1 x i1>, +  iXLen); + +define void @intrinsic_vsoxei_mask_v_nxv1f16_nxv1f16_nxv1i32(<vscale x 1 x half> %0, ptr %1, <vscale x 1 x i32> %2, <vscale x 1 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vsoxei_mask_v_nxv1f16_nxv1f16_nxv1i32: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e16, mf4, ta, ma +; CHECK-NEXT:    vsoxei32.v v8, (a0), v9, v0.t +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsoxei.mask.nxv1f16.nxv1i32( +    <vscale x 1 x half> %0, +    ptr %1, +    <vscale x 1 x i32> %2, +    <vscale x 1 x i1> %3, +    iXLen %4) + +  ret void +} + +declare void @llvm.riscv.vsoxei.nxv2f16.nxv2i32( +  <vscale x 2 x half>, +  ptr, +  <vscale x 2 x i32>, +  iXLen); + +define void @intrinsic_vsoxei_v_nxv2f16_nxv2f16_nxv2i32(<vscale x 2 x half> %0, ptr %1, <vscale x 2 x i32> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vsoxei_v_nxv2f16_nxv2f16_nxv2i32: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e16, mf2, ta, ma +; CHECK-NEXT:    vsoxei32.v v8, (a0), v9 +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsoxei.nxv2f16.nxv2i32( +    <vscale x 2 x half> %0, +    ptr %1, +    <vscale x 2 x i32> %2, +    iXLen %3) + +  ret void +} + +declare void @llvm.riscv.vsoxei.mask.nxv2f16.nxv2i32( +  <vscale x 2 x half>, +  ptr, +  <vscale x 2 x i32>, +  <vscale x 2 x i1>, +  iXLen); + +define void @intrinsic_vsoxei_mask_v_nxv2f16_nxv2f16_nxv2i32(<vscale x 2 x half> %0, ptr %1, <vscale x 2 x i32> %2, <vscale x 2 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vsoxei_mask_v_nxv2f16_nxv2f16_nxv2i32: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e16, mf2, ta, ma +; CHECK-NEXT:    vsoxei32.v v8, (a0), v9, v0.t +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsoxei.mask.nxv2f16.nxv2i32( +    <vscale x 2 x half> %0, +    ptr %1, +    <vscale x 2 x i32> %2, +    <vscale x 2 x i1> %3, +    iXLen %4) + +  ret void +} + +declare void @llvm.riscv.vsoxei.nxv4f16.nxv4i32( +  <vscale x 4 x half>, +  ptr, +  <vscale x 4 x i32>, +  iXLen); + +define void @intrinsic_vsoxei_v_nxv4f16_nxv4f16_nxv4i32(<vscale x 4 x half> %0, ptr %1, <vscale x 4 x i32> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vsoxei_v_nxv4f16_nxv4f16_nxv4i32: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e16, m1, ta, ma +; CHECK-NEXT:    vsoxei32.v v8, (a0), v10 +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsoxei.nxv4f16.nxv4i32( +    <vscale x 4 x half> %0, +    ptr %1, +    <vscale x 4 x i32> %2, +    iXLen %3) + +  ret void +} + +declare void @llvm.riscv.vsoxei.mask.nxv4f16.nxv4i32( +  <vscale x 4 x half>, +  ptr, +  <vscale x 4 x i32>, +  <vscale x 4 x i1>, +  iXLen); + +define void @intrinsic_vsoxei_mask_v_nxv4f16_nxv4f16_nxv4i32(<vscale x 4 x half> %0, ptr %1, <vscale x 4 x i32> %2, <vscale x 4 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vsoxei_mask_v_nxv4f16_nxv4f16_nxv4i32: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e16, m1, ta, ma +; CHECK-NEXT:    vsoxei32.v v8, (a0), v10, v0.t +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsoxei.mask.nxv4f16.nxv4i32( +    <vscale x 4 x half> %0, +    ptr %1, +    <vscale x 4 x i32> %2, +    <vscale x 4 x i1> %3, +    iXLen %4) + +  ret void +} + +declare void @llvm.riscv.vsoxei.nxv8f16.nxv8i32( +  <vscale x 8 x half>, +  ptr, +  <vscale x 8 x i32>, +  iXLen); + +define void @intrinsic_vsoxei_v_nxv8f16_nxv8f16_nxv8i32(<vscale x 8 x half> %0, ptr %1, <vscale x 8 x i32> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vsoxei_v_nxv8f16_nxv8f16_nxv8i32: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e16, m2, ta, ma +; CHECK-NEXT:    vsoxei32.v v8, (a0), v12 +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsoxei.nxv8f16.nxv8i32( +    <vscale x 8 x half> %0, +    ptr %1, +    <vscale x 8 x i32> %2, +    iXLen %3) + +  ret void +} + +declare void @llvm.riscv.vsoxei.mask.nxv8f16.nxv8i32( +  <vscale x 8 x half>, +  ptr, +  <vscale x 8 x i32>, +  <vscale x 8 x i1>, +  iXLen); + +define void @intrinsic_vsoxei_mask_v_nxv8f16_nxv8f16_nxv8i32(<vscale x 8 x half> %0, ptr %1, <vscale x 8 x i32> %2, <vscale x 8 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vsoxei_mask_v_nxv8f16_nxv8f16_nxv8i32: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e16, m2, ta, ma +; CHECK-NEXT:    vsoxei32.v v8, (a0), v12, v0.t +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsoxei.mask.nxv8f16.nxv8i32( +    <vscale x 8 x half> %0, +    ptr %1, +    <vscale x 8 x i32> %2, +    <vscale x 8 x i1> %3, +    iXLen %4) + +  ret void +} + +declare void @llvm.riscv.vsoxei.nxv16f16.nxv16i32( +  <vscale x 16 x half>, +  ptr, +  <vscale x 16 x i32>, +  iXLen); + +define void @intrinsic_vsoxei_v_nxv16f16_nxv16f16_nxv16i32(<vscale x 16 x half> %0, ptr %1, <vscale x 16 x i32> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vsoxei_v_nxv16f16_nxv16f16_nxv16i32: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e16, m4, ta, ma +; CHECK-NEXT:    vsoxei32.v v8, (a0), v16 +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsoxei.nxv16f16.nxv16i32( +    <vscale x 16 x half> %0, +    ptr %1, +    <vscale x 16 x i32> %2, +    iXLen %3) + +  ret void +} + +declare void @llvm.riscv.vsoxei.mask.nxv16f16.nxv16i32( +  <vscale x 16 x half>, +  ptr, +  <vscale x 16 x i32>, +  <vscale x 16 x i1>, +  iXLen); + +define void @intrinsic_vsoxei_mask_v_nxv16f16_nxv16f16_nxv16i32(<vscale x 16 x half> %0, ptr %1, <vscale x 16 x i32> %2, <vscale x 16 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vsoxei_mask_v_nxv16f16_nxv16f16_nxv16i32: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e16, m4, ta, ma +; CHECK-NEXT:    vsoxei32.v v8, (a0), v16, v0.t +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsoxei.mask.nxv16f16.nxv16i32( +    <vscale x 16 x half> %0, +    ptr %1, +    <vscale x 16 x i32> %2, +    <vscale x 16 x i1> %3, +    iXLen %4) + +  ret void +} + +declare void @llvm.riscv.vsoxei.nxv1f32.nxv1i32( +  <vscale x 1 x float>, +  ptr, +  <vscale x 1 x i32>, +  iXLen); + +define void @intrinsic_vsoxei_v_nxv1f32_nxv1f32_nxv1i32(<vscale x 1 x float> %0, ptr %1, <vscale x 1 x i32> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vsoxei_v_nxv1f32_nxv1f32_nxv1i32: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e32, mf2, ta, ma +; CHECK-NEXT:    vsoxei32.v v8, (a0), v9 +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsoxei.nxv1f32.nxv1i32( +    <vscale x 1 x float> %0, +    ptr %1, +    <vscale x 1 x i32> %2, +    iXLen %3) + +  ret void +} + +declare void @llvm.riscv.vsoxei.mask.nxv1f32.nxv1i32( +  <vscale x 1 x float>, +  ptr, +  <vscale x 1 x i32>, +  <vscale x 1 x i1>, +  iXLen); + +define void @intrinsic_vsoxei_mask_v_nxv1f32_nxv1f32_nxv1i32(<vscale x 1 x float> %0, ptr %1, <vscale x 1 x i32> %2, <vscale x 1 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vsoxei_mask_v_nxv1f32_nxv1f32_nxv1i32: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e32, mf2, ta, ma +; CHECK-NEXT:    vsoxei32.v v8, (a0), v9, v0.t +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsoxei.mask.nxv1f32.nxv1i32( +    <vscale x 1 x float> %0, +    ptr %1, +    <vscale x 1 x i32> %2, +    <vscale x 1 x i1> %3, +    iXLen %4) + +  ret void +} + +declare void @llvm.riscv.vsoxei.nxv2f32.nxv2i32( +  <vscale x 2 x float>, +  ptr, +  <vscale x 2 x i32>, +  iXLen); + +define void @intrinsic_vsoxei_v_nxv2f32_nxv2f32_nxv2i32(<vscale x 2 x float> %0, ptr %1, <vscale x 2 x i32> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vsoxei_v_nxv2f32_nxv2f32_nxv2i32: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e32, m1, ta, ma +; CHECK-NEXT:    vsoxei32.v v8, (a0), v9 +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsoxei.nxv2f32.nxv2i32( +    <vscale x 2 x float> %0, +    ptr %1, +    <vscale x 2 x i32> %2, +    iXLen %3) + +  ret void +} + +declare void @llvm.riscv.vsoxei.mask.nxv2f32.nxv2i32( +  <vscale x 2 x float>, +  ptr, +  <vscale x 2 x i32>, +  <vscale x 2 x i1>, +  iXLen); + +define void @intrinsic_vsoxei_mask_v_nxv2f32_nxv2f32_nxv2i32(<vscale x 2 x float> %0, ptr %1, <vscale x 2 x i32> %2, <vscale x 2 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vsoxei_mask_v_nxv2f32_nxv2f32_nxv2i32: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e32, m1, ta, ma +; CHECK-NEXT:    vsoxei32.v v8, (a0), v9, v0.t +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsoxei.mask.nxv2f32.nxv2i32( +    <vscale x 2 x float> %0, +    ptr %1, +    <vscale x 2 x i32> %2, +    <vscale x 2 x i1> %3, +    iXLen %4) + +  ret void +} + +declare void @llvm.riscv.vsoxei.nxv4f32.nxv4i32( +  <vscale x 4 x float>, +  ptr, +  <vscale x 4 x i32>, +  iXLen); + +define void @intrinsic_vsoxei_v_nxv4f32_nxv4f32_nxv4i32(<vscale x 4 x float> %0, ptr %1, <vscale x 4 x i32> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vsoxei_v_nxv4f32_nxv4f32_nxv4i32: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e32, m2, ta, ma +; CHECK-NEXT:    vsoxei32.v v8, (a0), v10 +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsoxei.nxv4f32.nxv4i32( +    <vscale x 4 x float> %0, +    ptr %1, +    <vscale x 4 x i32> %2, +    iXLen %3) + +  ret void +} + +declare void @llvm.riscv.vsoxei.mask.nxv4f32.nxv4i32( +  <vscale x 4 x float>, +  ptr, +  <vscale x 4 x i32>, +  <vscale x 4 x i1>, +  iXLen); + +define void @intrinsic_vsoxei_mask_v_nxv4f32_nxv4f32_nxv4i32(<vscale x 4 x float> %0, ptr %1, <vscale x 4 x i32> %2, <vscale x 4 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vsoxei_mask_v_nxv4f32_nxv4f32_nxv4i32: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e32, m2, ta, ma +; CHECK-NEXT:    vsoxei32.v v8, (a0), v10, v0.t +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsoxei.mask.nxv4f32.nxv4i32( +    <vscale x 4 x float> %0, +    ptr %1, +    <vscale x 4 x i32> %2, +    <vscale x 4 x i1> %3, +    iXLen %4) + +  ret void +} + +declare void @llvm.riscv.vsoxei.nxv8f32.nxv8i32( +  <vscale x 8 x float>, +  ptr, +  <vscale x 8 x i32>, +  iXLen); + +define void @intrinsic_vsoxei_v_nxv8f32_nxv8f32_nxv8i32(<vscale x 8 x float> %0, ptr %1, <vscale x 8 x i32> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vsoxei_v_nxv8f32_nxv8f32_nxv8i32: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e32, m4, ta, ma +; CHECK-NEXT:    vsoxei32.v v8, (a0), v12 +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsoxei.nxv8f32.nxv8i32( +    <vscale x 8 x float> %0, +    ptr %1, +    <vscale x 8 x i32> %2, +    iXLen %3) + +  ret void +} + +declare void @llvm.riscv.vsoxei.mask.nxv8f32.nxv8i32( +  <vscale x 8 x float>, +  ptr, +  <vscale x 8 x i32>, +  <vscale x 8 x i1>, +  iXLen); + +define void @intrinsic_vsoxei_mask_v_nxv8f32_nxv8f32_nxv8i32(<vscale x 8 x float> %0, ptr %1, <vscale x 8 x i32> %2, <vscale x 8 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vsoxei_mask_v_nxv8f32_nxv8f32_nxv8i32: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e32, m4, ta, ma +; CHECK-NEXT:    vsoxei32.v v8, (a0), v12, v0.t +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsoxei.mask.nxv8f32.nxv8i32( +    <vscale x 8 x float> %0, +    ptr %1, +    <vscale x 8 x i32> %2, +    <vscale x 8 x i1> %3, +    iXLen %4) + +  ret void +} + +declare void @llvm.riscv.vsoxei.nxv16f32.nxv16i32( +  <vscale x 16 x float>, +  ptr, +  <vscale x 16 x i32>, +  iXLen); + +define void @intrinsic_vsoxei_v_nxv16f32_nxv16f32_nxv16i32(<vscale x 16 x float> %0, ptr %1, <vscale x 16 x i32> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vsoxei_v_nxv16f32_nxv16f32_nxv16i32: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e32, m8, ta, ma +; CHECK-NEXT:    vsoxei32.v v8, (a0), v16 +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsoxei.nxv16f32.nxv16i32( +    <vscale x 16 x float> %0, +    ptr %1, +    <vscale x 16 x i32> %2, +    iXLen %3) + +  ret void +} + +declare void @llvm.riscv.vsoxei.mask.nxv16f32.nxv16i32( +  <vscale x 16 x float>, +  ptr, +  <vscale x 16 x i32>, +  <vscale x 16 x i1>, +  iXLen); + +define void @intrinsic_vsoxei_mask_v_nxv16f32_nxv16f32_nxv16i32(<vscale x 16 x float> %0, ptr %1, <vscale x 16 x i32> %2, <vscale x 16 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vsoxei_mask_v_nxv16f32_nxv16f32_nxv16i32: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e32, m8, ta, ma +; CHECK-NEXT:    vsoxei32.v v8, (a0), v16, v0.t +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsoxei.mask.nxv16f32.nxv16i32( +    <vscale x 16 x float> %0, +    ptr %1, +    <vscale x 16 x i32> %2, +    <vscale x 16 x i1> %3, +    iXLen %4) + +  ret void +} + +declare void @llvm.riscv.vsoxei.nxv1f64.nxv1i32( +  <vscale x 1 x double>, +  ptr, +  <vscale x 1 x i32>, +  iXLen); + +define void @intrinsic_vsoxei_v_nxv1f64_nxv1f64_nxv1i32(<vscale x 1 x double> %0, ptr %1, <vscale x 1 x i32> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vsoxei_v_nxv1f64_nxv1f64_nxv1i32: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e64, m1, ta, ma +; CHECK-NEXT:    vsoxei32.v v8, (a0), v9 +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsoxei.nxv1f64.nxv1i32( +    <vscale x 1 x double> %0, +    ptr %1, +    <vscale x 1 x i32> %2, +    iXLen %3) + +  ret void +} + +declare void @llvm.riscv.vsoxei.mask.nxv1f64.nxv1i32( +  <vscale x 1 x double>, +  ptr, +  <vscale x 1 x i32>, +  <vscale x 1 x i1>, +  iXLen); + +define void @intrinsic_vsoxei_mask_v_nxv1f64_nxv1f64_nxv1i32(<vscale x 1 x double> %0, ptr %1, <vscale x 1 x i32> %2, <vscale x 1 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vsoxei_mask_v_nxv1f64_nxv1f64_nxv1i32: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e64, m1, ta, ma +; CHECK-NEXT:    vsoxei32.v v8, (a0), v9, v0.t +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsoxei.mask.nxv1f64.nxv1i32( +    <vscale x 1 x double> %0, +    ptr %1, +    <vscale x 1 x i32> %2, +    <vscale x 1 x i1> %3, +    iXLen %4) + +  ret void +} + +declare void @llvm.riscv.vsoxei.nxv2f64.nxv2i32( +  <vscale x 2 x double>, +  ptr, +  <vscale x 2 x i32>, +  iXLen); + +define void @intrinsic_vsoxei_v_nxv2f64_nxv2f64_nxv2i32(<vscale x 2 x double> %0, ptr %1, <vscale x 2 x i32> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vsoxei_v_nxv2f64_nxv2f64_nxv2i32: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e64, m2, ta, ma +; CHECK-NEXT:    vsoxei32.v v8, (a0), v10 +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsoxei.nxv2f64.nxv2i32( +    <vscale x 2 x double> %0, +    ptr %1, +    <vscale x 2 x i32> %2, +    iXLen %3) + +  ret void +} + +declare void @llvm.riscv.vsoxei.mask.nxv2f64.nxv2i32( +  <vscale x 2 x double>, +  ptr, +  <vscale x 2 x i32>, +  <vscale x 2 x i1>, +  iXLen); + +define void @intrinsic_vsoxei_mask_v_nxv2f64_nxv2f64_nxv2i32(<vscale x 2 x double> %0, ptr %1, <vscale x 2 x i32> %2, <vscale x 2 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vsoxei_mask_v_nxv2f64_nxv2f64_nxv2i32: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e64, m2, ta, ma +; CHECK-NEXT:    vsoxei32.v v8, (a0), v10, v0.t +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsoxei.mask.nxv2f64.nxv2i32( +    <vscale x 2 x double> %0, +    ptr %1, +    <vscale x 2 x i32> %2, +    <vscale x 2 x i1> %3, +    iXLen %4) + +  ret void +} + +declare void @llvm.riscv.vsoxei.nxv4f64.nxv4i32( +  <vscale x 4 x double>, +  ptr, +  <vscale x 4 x i32>, +  iXLen); + +define void @intrinsic_vsoxei_v_nxv4f64_nxv4f64_nxv4i32(<vscale x 4 x double> %0, ptr %1, <vscale x 4 x i32> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vsoxei_v_nxv4f64_nxv4f64_nxv4i32: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e64, m4, ta, ma +; CHECK-NEXT:    vsoxei32.v v8, (a0), v12 +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsoxei.nxv4f64.nxv4i32( +    <vscale x 4 x double> %0, +    ptr %1, +    <vscale x 4 x i32> %2, +    iXLen %3) + +  ret void +} + +declare void @llvm.riscv.vsoxei.mask.nxv4f64.nxv4i32( +  <vscale x 4 x double>, +  ptr, +  <vscale x 4 x i32>, +  <vscale x 4 x i1>, +  iXLen); + +define void @intrinsic_vsoxei_mask_v_nxv4f64_nxv4f64_nxv4i32(<vscale x 4 x double> %0, ptr %1, <vscale x 4 x i32> %2, <vscale x 4 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vsoxei_mask_v_nxv4f64_nxv4f64_nxv4i32: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e64, m4, ta, ma +; CHECK-NEXT:    vsoxei32.v v8, (a0), v12, v0.t +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsoxei.mask.nxv4f64.nxv4i32( +    <vscale x 4 x double> %0, +    ptr %1, +    <vscale x 4 x i32> %2, +    <vscale x 4 x i1> %3, +    iXLen %4) + +  ret void +} + +declare void @llvm.riscv.vsoxei.nxv8f64.nxv8i32( +  <vscale x 8 x double>, +  ptr, +  <vscale x 8 x i32>, +  iXLen); + +define void @intrinsic_vsoxei_v_nxv8f64_nxv8f64_nxv8i32(<vscale x 8 x double> %0, ptr %1, <vscale x 8 x i32> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vsoxei_v_nxv8f64_nxv8f64_nxv8i32: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e64, m8, ta, ma +; CHECK-NEXT:    vsoxei32.v v8, (a0), v16 +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsoxei.nxv8f64.nxv8i32( +    <vscale x 8 x double> %0, +    ptr %1, +    <vscale x 8 x i32> %2, +    iXLen %3) + +  ret void +} + +declare void @llvm.riscv.vsoxei.mask.nxv8f64.nxv8i32( +  <vscale x 8 x double>, +  ptr, +  <vscale x 8 x i32>, +  <vscale x 8 x i1>, +  iXLen); + +define void @intrinsic_vsoxei_mask_v_nxv8f64_nxv8f64_nxv8i32(<vscale x 8 x double> %0, ptr %1, <vscale x 8 x i32> %2, <vscale x 8 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vsoxei_mask_v_nxv8f64_nxv8f64_nxv8i32: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e64, m8, ta, ma +; CHECK-NEXT:    vsoxei32.v v8, (a0), v16, v0.t +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsoxei.mask.nxv8f64.nxv8i32( +    <vscale x 8 x double> %0, +    ptr %1, +    <vscale x 8 x i32> %2, +    <vscale x 8 x i1> %3, +    iXLen %4) + +  ret void +} + +declare void @llvm.riscv.vsoxei.nxv1i8.nxv1i16( +  <vscale x 1 x i8>, +  ptr, +  <vscale x 1 x i16>, +  iXLen); + +define void @intrinsic_vsoxei_v_nxv1i8_nxv1i8_nxv1i16(<vscale x 1 x i8> %0, ptr %1, <vscale x 1 x i16> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vsoxei_v_nxv1i8_nxv1i8_nxv1i16: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e8, mf8, ta, ma +; CHECK-NEXT:    vsoxei16.v v8, (a0), v9 +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsoxei.nxv1i8.nxv1i16( +    <vscale x 1 x i8> %0, +    ptr %1, +    <vscale x 1 x i16> %2, +    iXLen %3) + +  ret void +} + +declare void @llvm.riscv.vsoxei.mask.nxv1i8.nxv1i16( +  <vscale x 1 x i8>, +  ptr, +  <vscale x 1 x i16>, +  <vscale x 1 x i1>, +  iXLen); + +define void @intrinsic_vsoxei_mask_v_nxv1i8_nxv1i8_nxv1i16(<vscale x 1 x i8> %0, ptr %1, <vscale x 1 x i16> %2, <vscale x 1 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vsoxei_mask_v_nxv1i8_nxv1i8_nxv1i16: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e8, mf8, ta, ma +; CHECK-NEXT:    vsoxei16.v v8, (a0), v9, v0.t +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsoxei.mask.nxv1i8.nxv1i16( +    <vscale x 1 x i8> %0, +    ptr %1, +    <vscale x 1 x i16> %2, +    <vscale x 1 x i1> %3, +    iXLen %4) + +  ret void +} + +declare void @llvm.riscv.vsoxei.nxv2i8.nxv2i16( +  <vscale x 2 x i8>, +  ptr, +  <vscale x 2 x i16>, +  iXLen); + +define void @intrinsic_vsoxei_v_nxv2i8_nxv2i8_nxv2i16(<vscale x 2 x i8> %0, ptr %1, <vscale x 2 x i16> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vsoxei_v_nxv2i8_nxv2i8_nxv2i16: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e8, mf4, ta, ma +; CHECK-NEXT:    vsoxei16.v v8, (a0), v9 +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsoxei.nxv2i8.nxv2i16( +    <vscale x 2 x i8> %0, +    ptr %1, +    <vscale x 2 x i16> %2, +    iXLen %3) + +  ret void +} + +declare void @llvm.riscv.vsoxei.mask.nxv2i8.nxv2i16( +  <vscale x 2 x i8>, +  ptr, +  <vscale x 2 x i16>, +  <vscale x 2 x i1>, +  iXLen); + +define void @intrinsic_vsoxei_mask_v_nxv2i8_nxv2i8_nxv2i16(<vscale x 2 x i8> %0, ptr %1, <vscale x 2 x i16> %2, <vscale x 2 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vsoxei_mask_v_nxv2i8_nxv2i8_nxv2i16: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e8, mf4, ta, ma +; CHECK-NEXT:    vsoxei16.v v8, (a0), v9, v0.t +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsoxei.mask.nxv2i8.nxv2i16( +    <vscale x 2 x i8> %0, +    ptr %1, +    <vscale x 2 x i16> %2, +    <vscale x 2 x i1> %3, +    iXLen %4) + +  ret void +} + +declare void @llvm.riscv.vsoxei.nxv4i8.nxv4i16( +  <vscale x 4 x i8>, +  ptr, +  <vscale x 4 x i16>, +  iXLen); + +define void @intrinsic_vsoxei_v_nxv4i8_nxv4i8_nxv4i16(<vscale x 4 x i8> %0, ptr %1, <vscale x 4 x i16> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vsoxei_v_nxv4i8_nxv4i8_nxv4i16: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e8, mf2, ta, ma +; CHECK-NEXT:    vsoxei16.v v8, (a0), v9 +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsoxei.nxv4i8.nxv4i16( +    <vscale x 4 x i8> %0, +    ptr %1, +    <vscale x 4 x i16> %2, +    iXLen %3) + +  ret void +} + +declare void @llvm.riscv.vsoxei.mask.nxv4i8.nxv4i16( +  <vscale x 4 x i8>, +  ptr, +  <vscale x 4 x i16>, +  <vscale x 4 x i1>, +  iXLen); + +define void @intrinsic_vsoxei_mask_v_nxv4i8_nxv4i8_nxv4i16(<vscale x 4 x i8> %0, ptr %1, <vscale x 4 x i16> %2, <vscale x 4 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vsoxei_mask_v_nxv4i8_nxv4i8_nxv4i16: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e8, mf2, ta, ma +; CHECK-NEXT:    vsoxei16.v v8, (a0), v9, v0.t +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsoxei.mask.nxv4i8.nxv4i16( +    <vscale x 4 x i8> %0, +    ptr %1, +    <vscale x 4 x i16> %2, +    <vscale x 4 x i1> %3, +    iXLen %4) + +  ret void +} + +declare void @llvm.riscv.vsoxei.nxv8i8.nxv8i16( +  <vscale x 8 x i8>, +  ptr, +  <vscale x 8 x i16>, +  iXLen); + +define void @intrinsic_vsoxei_v_nxv8i8_nxv8i8_nxv8i16(<vscale x 8 x i8> %0, ptr %1, <vscale x 8 x i16> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vsoxei_v_nxv8i8_nxv8i8_nxv8i16: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e8, m1, ta, ma +; CHECK-NEXT:    vsoxei16.v v8, (a0), v10 +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsoxei.nxv8i8.nxv8i16( +    <vscale x 8 x i8> %0, +    ptr %1, +    <vscale x 8 x i16> %2, +    iXLen %3) + +  ret void +} + +declare void @llvm.riscv.vsoxei.mask.nxv8i8.nxv8i16( +  <vscale x 8 x i8>, +  ptr, +  <vscale x 8 x i16>, +  <vscale x 8 x i1>, +  iXLen); + +define void @intrinsic_vsoxei_mask_v_nxv8i8_nxv8i8_nxv8i16(<vscale x 8 x i8> %0, ptr %1, <vscale x 8 x i16> %2, <vscale x 8 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vsoxei_mask_v_nxv8i8_nxv8i8_nxv8i16: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e8, m1, ta, ma +; CHECK-NEXT:    vsoxei16.v v8, (a0), v10, v0.t +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsoxei.mask.nxv8i8.nxv8i16( +    <vscale x 8 x i8> %0, +    ptr %1, +    <vscale x 8 x i16> %2, +    <vscale x 8 x i1> %3, +    iXLen %4) + +  ret void +} + +declare void @llvm.riscv.vsoxei.nxv16i8.nxv16i16( +  <vscale x 16 x i8>, +  ptr, +  <vscale x 16 x i16>, +  iXLen); + +define void @intrinsic_vsoxei_v_nxv16i8_nxv16i8_nxv16i16(<vscale x 16 x i8> %0, ptr %1, <vscale x 16 x i16> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vsoxei_v_nxv16i8_nxv16i8_nxv16i16: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e8, m2, ta, ma +; CHECK-NEXT:    vsoxei16.v v8, (a0), v12 +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsoxei.nxv16i8.nxv16i16( +    <vscale x 16 x i8> %0, +    ptr %1, +    <vscale x 16 x i16> %2, +    iXLen %3) + +  ret void +} + +declare void @llvm.riscv.vsoxei.mask.nxv16i8.nxv16i16( +  <vscale x 16 x i8>, +  ptr, +  <vscale x 16 x i16>, +  <vscale x 16 x i1>, +  iXLen); + +define void @intrinsic_vsoxei_mask_v_nxv16i8_nxv16i8_nxv16i16(<vscale x 16 x i8> %0, ptr %1, <vscale x 16 x i16> %2, <vscale x 16 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vsoxei_mask_v_nxv16i8_nxv16i8_nxv16i16: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e8, m2, ta, ma +; CHECK-NEXT:    vsoxei16.v v8, (a0), v12, v0.t +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsoxei.mask.nxv16i8.nxv16i16( +    <vscale x 16 x i8> %0, +    ptr %1, +    <vscale x 16 x i16> %2, +    <vscale x 16 x i1> %3, +    iXLen %4) + +  ret void +} + +declare void @llvm.riscv.vsoxei.nxv32i8.nxv32i16( +  <vscale x 32 x i8>, +  ptr, +  <vscale x 32 x i16>, +  iXLen); + +define void @intrinsic_vsoxei_v_nxv32i8_nxv32i8_nxv32i16(<vscale x 32 x i8> %0, ptr %1, <vscale x 32 x i16> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vsoxei_v_nxv32i8_nxv32i8_nxv32i16: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e8, m4, ta, ma +; CHECK-NEXT:    vsoxei16.v v8, (a0), v16 +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsoxei.nxv32i8.nxv32i16( +    <vscale x 32 x i8> %0, +    ptr %1, +    <vscale x 32 x i16> %2, +    iXLen %3) + +  ret void +} + +declare void @llvm.riscv.vsoxei.mask.nxv32i8.nxv32i16( +  <vscale x 32 x i8>, +  ptr, +  <vscale x 32 x i16>, +  <vscale x 32 x i1>, +  iXLen); + +define void @intrinsic_vsoxei_mask_v_nxv32i8_nxv32i8_nxv32i16(<vscale x 32 x i8> %0, ptr %1, <vscale x 32 x i16> %2, <vscale x 32 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vsoxei_mask_v_nxv32i8_nxv32i8_nxv32i16: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e8, m4, ta, ma +; CHECK-NEXT:    vsoxei16.v v8, (a0), v16, v0.t +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsoxei.mask.nxv32i8.nxv32i16( +    <vscale x 32 x i8> %0, +    ptr %1, +    <vscale x 32 x i16> %2, +    <vscale x 32 x i1> %3, +    iXLen %4) + +  ret void +} + +declare void @llvm.riscv.vsoxei.nxv1i16.nxv1i16( +  <vscale x 1 x i16>, +  ptr, +  <vscale x 1 x i16>, +  iXLen); + +define void @intrinsic_vsoxei_v_nxv1i16_nxv1i16_nxv1i16(<vscale x 1 x i16> %0, ptr %1, <vscale x 1 x i16> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vsoxei_v_nxv1i16_nxv1i16_nxv1i16: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e16, mf4, ta, ma +; CHECK-NEXT:    vsoxei16.v v8, (a0), v9 +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsoxei.nxv1i16.nxv1i16( +    <vscale x 1 x i16> %0, +    ptr %1, +    <vscale x 1 x i16> %2, +    iXLen %3) + +  ret void +} + +declare void @llvm.riscv.vsoxei.mask.nxv1i16.nxv1i16( +  <vscale x 1 x i16>, +  ptr, +  <vscale x 1 x i16>, +  <vscale x 1 x i1>, +  iXLen); + +define void @intrinsic_vsoxei_mask_v_nxv1i16_nxv1i16_nxv1i16(<vscale x 1 x i16> %0, ptr %1, <vscale x 1 x i16> %2, <vscale x 1 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vsoxei_mask_v_nxv1i16_nxv1i16_nxv1i16: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e16, mf4, ta, ma +; CHECK-NEXT:    vsoxei16.v v8, (a0), v9, v0.t +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsoxei.mask.nxv1i16.nxv1i16( +    <vscale x 1 x i16> %0, +    ptr %1, +    <vscale x 1 x i16> %2, +    <vscale x 1 x i1> %3, +    iXLen %4) + +  ret void +} + +declare void @llvm.riscv.vsoxei.nxv2i16.nxv2i16( +  <vscale x 2 x i16>, +  ptr, +  <vscale x 2 x i16>, +  iXLen); + +define void @intrinsic_vsoxei_v_nxv2i16_nxv2i16_nxv2i16(<vscale x 2 x i16> %0, ptr %1, <vscale x 2 x i16> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vsoxei_v_nxv2i16_nxv2i16_nxv2i16: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e16, mf2, ta, ma +; CHECK-NEXT:    vsoxei16.v v8, (a0), v9 +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsoxei.nxv2i16.nxv2i16( +    <vscale x 2 x i16> %0, +    ptr %1, +    <vscale x 2 x i16> %2, +    iXLen %3) + +  ret void +} + +declare void @llvm.riscv.vsoxei.mask.nxv2i16.nxv2i16( +  <vscale x 2 x i16>, +  ptr, +  <vscale x 2 x i16>, +  <vscale x 2 x i1>, +  iXLen); + +define void @intrinsic_vsoxei_mask_v_nxv2i16_nxv2i16_nxv2i16(<vscale x 2 x i16> %0, ptr %1, <vscale x 2 x i16> %2, <vscale x 2 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vsoxei_mask_v_nxv2i16_nxv2i16_nxv2i16: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e16, mf2, ta, ma +; CHECK-NEXT:    vsoxei16.v v8, (a0), v9, v0.t +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsoxei.mask.nxv2i16.nxv2i16( +    <vscale x 2 x i16> %0, +    ptr %1, +    <vscale x 2 x i16> %2, +    <vscale x 2 x i1> %3, +    iXLen %4) + +  ret void +} + +declare void @llvm.riscv.vsoxei.nxv4i16.nxv4i16( +  <vscale x 4 x i16>, +  ptr, +  <vscale x 4 x i16>, +  iXLen); + +define void @intrinsic_vsoxei_v_nxv4i16_nxv4i16_nxv4i16(<vscale x 4 x i16> %0, ptr %1, <vscale x 4 x i16> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vsoxei_v_nxv4i16_nxv4i16_nxv4i16: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e16, m1, ta, ma +; CHECK-NEXT:    vsoxei16.v v8, (a0), v9 +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsoxei.nxv4i16.nxv4i16( +    <vscale x 4 x i16> %0, +    ptr %1, +    <vscale x 4 x i16> %2, +    iXLen %3) + +  ret void +} + +declare void @llvm.riscv.vsoxei.mask.nxv4i16.nxv4i16( +  <vscale x 4 x i16>, +  ptr, +  <vscale x 4 x i16>, +  <vscale x 4 x i1>, +  iXLen); + +define void @intrinsic_vsoxei_mask_v_nxv4i16_nxv4i16_nxv4i16(<vscale x 4 x i16> %0, ptr %1, <vscale x 4 x i16> %2, <vscale x 4 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vsoxei_mask_v_nxv4i16_nxv4i16_nxv4i16: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e16, m1, ta, ma +; CHECK-NEXT:    vsoxei16.v v8, (a0), v9, v0.t +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsoxei.mask.nxv4i16.nxv4i16( +    <vscale x 4 x i16> %0, +    ptr %1, +    <vscale x 4 x i16> %2, +    <vscale x 4 x i1> %3, +    iXLen %4) + +  ret void +} + +declare void @llvm.riscv.vsoxei.nxv8i16.nxv8i16( +  <vscale x 8 x i16>, +  ptr, +  <vscale x 8 x i16>, +  iXLen); + +define void @intrinsic_vsoxei_v_nxv8i16_nxv8i16_nxv8i16(<vscale x 8 x i16> %0, ptr %1, <vscale x 8 x i16> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vsoxei_v_nxv8i16_nxv8i16_nxv8i16: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e16, m2, ta, ma +; CHECK-NEXT:    vsoxei16.v v8, (a0), v10 +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsoxei.nxv8i16.nxv8i16( +    <vscale x 8 x i16> %0, +    ptr %1, +    <vscale x 8 x i16> %2, +    iXLen %3) + +  ret void +} + +declare void @llvm.riscv.vsoxei.mask.nxv8i16.nxv8i16( +  <vscale x 8 x i16>, +  ptr, +  <vscale x 8 x i16>, +  <vscale x 8 x i1>, +  iXLen); + +define void @intrinsic_vsoxei_mask_v_nxv8i16_nxv8i16_nxv8i16(<vscale x 8 x i16> %0, ptr %1, <vscale x 8 x i16> %2, <vscale x 8 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vsoxei_mask_v_nxv8i16_nxv8i16_nxv8i16: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e16, m2, ta, ma +; CHECK-NEXT:    vsoxei16.v v8, (a0), v10, v0.t +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsoxei.mask.nxv8i16.nxv8i16( +    <vscale x 8 x i16> %0, +    ptr %1, +    <vscale x 8 x i16> %2, +    <vscale x 8 x i1> %3, +    iXLen %4) + +  ret void +} + +declare void @llvm.riscv.vsoxei.nxv16i16.nxv16i16( +  <vscale x 16 x i16>, +  ptr, +  <vscale x 16 x i16>, +  iXLen); + +define void @intrinsic_vsoxei_v_nxv16i16_nxv16i16_nxv16i16(<vscale x 16 x i16> %0, ptr %1, <vscale x 16 x i16> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vsoxei_v_nxv16i16_nxv16i16_nxv16i16: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e16, m4, ta, ma +; CHECK-NEXT:    vsoxei16.v v8, (a0), v12 +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsoxei.nxv16i16.nxv16i16( +    <vscale x 16 x i16> %0, +    ptr %1, +    <vscale x 16 x i16> %2, +    iXLen %3) + +  ret void +} + +declare void @llvm.riscv.vsoxei.mask.nxv16i16.nxv16i16( +  <vscale x 16 x i16>, +  ptr, +  <vscale x 16 x i16>, +  <vscale x 16 x i1>, +  iXLen); + +define void @intrinsic_vsoxei_mask_v_nxv16i16_nxv16i16_nxv16i16(<vscale x 16 x i16> %0, ptr %1, <vscale x 16 x i16> %2, <vscale x 16 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vsoxei_mask_v_nxv16i16_nxv16i16_nxv16i16: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e16, m4, ta, ma +; CHECK-NEXT:    vsoxei16.v v8, (a0), v12, v0.t +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsoxei.mask.nxv16i16.nxv16i16( +    <vscale x 16 x i16> %0, +    ptr %1, +    <vscale x 16 x i16> %2, +    <vscale x 16 x i1> %3, +    iXLen %4) + +  ret void +} + +declare void @llvm.riscv.vsoxei.nxv32i16.nxv32i16( +  <vscale x 32 x i16>, +  ptr, +  <vscale x 32 x i16>, +  iXLen); + +define void @intrinsic_vsoxei_v_nxv32i16_nxv32i16_nxv32i16(<vscale x 32 x i16> %0, ptr %1, <vscale x 32 x i16> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vsoxei_v_nxv32i16_nxv32i16_nxv32i16: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e16, m8, ta, ma +; CHECK-NEXT:    vsoxei16.v v8, (a0), v16 +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsoxei.nxv32i16.nxv32i16( +    <vscale x 32 x i16> %0, +    ptr %1, +    <vscale x 32 x i16> %2, +    iXLen %3) + +  ret void +} + +declare void @llvm.riscv.vsoxei.mask.nxv32i16.nxv32i16( +  <vscale x 32 x i16>, +  ptr, +  <vscale x 32 x i16>, +  <vscale x 32 x i1>, +  iXLen); + +define void @intrinsic_vsoxei_mask_v_nxv32i16_nxv32i16_nxv32i16(<vscale x 32 x i16> %0, ptr %1, <vscale x 32 x i16> %2, <vscale x 32 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vsoxei_mask_v_nxv32i16_nxv32i16_nxv32i16: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e16, m8, ta, ma +; CHECK-NEXT:    vsoxei16.v v8, (a0), v16, v0.t +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsoxei.mask.nxv32i16.nxv32i16( +    <vscale x 32 x i16> %0, +    ptr %1, +    <vscale x 32 x i16> %2, +    <vscale x 32 x i1> %3, +    iXLen %4) + +  ret void +} + +declare void @llvm.riscv.vsoxei.nxv1i32.nxv1i16( +  <vscale x 1 x i32>, +  ptr, +  <vscale x 1 x i16>, +  iXLen); + +define void @intrinsic_vsoxei_v_nxv1i32_nxv1i32_nxv1i16(<vscale x 1 x i32> %0, ptr %1, <vscale x 1 x i16> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vsoxei_v_nxv1i32_nxv1i32_nxv1i16: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e32, mf2, ta, ma +; CHECK-NEXT:    vsoxei16.v v8, (a0), v9 +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsoxei.nxv1i32.nxv1i16( +    <vscale x 1 x i32> %0, +    ptr %1, +    <vscale x 1 x i16> %2, +    iXLen %3) + +  ret void +} + +declare void @llvm.riscv.vsoxei.mask.nxv1i32.nxv1i16( +  <vscale x 1 x i32>, +  ptr, +  <vscale x 1 x i16>, +  <vscale x 1 x i1>, +  iXLen); + +define void @intrinsic_vsoxei_mask_v_nxv1i32_nxv1i32_nxv1i16(<vscale x 1 x i32> %0, ptr %1, <vscale x 1 x i16> %2, <vscale x 1 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vsoxei_mask_v_nxv1i32_nxv1i32_nxv1i16: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e32, mf2, ta, ma +; CHECK-NEXT:    vsoxei16.v v8, (a0), v9, v0.t +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsoxei.mask.nxv1i32.nxv1i16( +    <vscale x 1 x i32> %0, +    ptr %1, +    <vscale x 1 x i16> %2, +    <vscale x 1 x i1> %3, +    iXLen %4) + +  ret void +} + +declare void @llvm.riscv.vsoxei.nxv2i32.nxv2i16( +  <vscale x 2 x i32>, +  ptr, +  <vscale x 2 x i16>, +  iXLen); + +define void @intrinsic_vsoxei_v_nxv2i32_nxv2i32_nxv2i16(<vscale x 2 x i32> %0, ptr %1, <vscale x 2 x i16> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vsoxei_v_nxv2i32_nxv2i32_nxv2i16: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e32, m1, ta, ma +; CHECK-NEXT:    vsoxei16.v v8, (a0), v9 +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsoxei.nxv2i32.nxv2i16( +    <vscale x 2 x i32> %0, +    ptr %1, +    <vscale x 2 x i16> %2, +    iXLen %3) + +  ret void +} + +declare void @llvm.riscv.vsoxei.mask.nxv2i32.nxv2i16( +  <vscale x 2 x i32>, +  ptr, +  <vscale x 2 x i16>, +  <vscale x 2 x i1>, +  iXLen); + +define void @intrinsic_vsoxei_mask_v_nxv2i32_nxv2i32_nxv2i16(<vscale x 2 x i32> %0, ptr %1, <vscale x 2 x i16> %2, <vscale x 2 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vsoxei_mask_v_nxv2i32_nxv2i32_nxv2i16: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e32, m1, ta, ma +; CHECK-NEXT:    vsoxei16.v v8, (a0), v9, v0.t +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsoxei.mask.nxv2i32.nxv2i16( +    <vscale x 2 x i32> %0, +    ptr %1, +    <vscale x 2 x i16> %2, +    <vscale x 2 x i1> %3, +    iXLen %4) + +  ret void +} + +declare void @llvm.riscv.vsoxei.nxv4i32.nxv4i16( +  <vscale x 4 x i32>, +  ptr, +  <vscale x 4 x i16>, +  iXLen); + +define void @intrinsic_vsoxei_v_nxv4i32_nxv4i32_nxv4i16(<vscale x 4 x i32> %0, ptr %1, <vscale x 4 x i16> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vsoxei_v_nxv4i32_nxv4i32_nxv4i16: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e32, m2, ta, ma +; CHECK-NEXT:    vsoxei16.v v8, (a0), v10 +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsoxei.nxv4i32.nxv4i16( +    <vscale x 4 x i32> %0, +    ptr %1, +    <vscale x 4 x i16> %2, +    iXLen %3) + +  ret void +} + +declare void @llvm.riscv.vsoxei.mask.nxv4i32.nxv4i16( +  <vscale x 4 x i32>, +  ptr, +  <vscale x 4 x i16>, +  <vscale x 4 x i1>, +  iXLen); + +define void @intrinsic_vsoxei_mask_v_nxv4i32_nxv4i32_nxv4i16(<vscale x 4 x i32> %0, ptr %1, <vscale x 4 x i16> %2, <vscale x 4 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vsoxei_mask_v_nxv4i32_nxv4i32_nxv4i16: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e32, m2, ta, ma +; CHECK-NEXT:    vsoxei16.v v8, (a0), v10, v0.t +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsoxei.mask.nxv4i32.nxv4i16( +    <vscale x 4 x i32> %0, +    ptr %1, +    <vscale x 4 x i16> %2, +    <vscale x 4 x i1> %3, +    iXLen %4) + +  ret void +} + +declare void @llvm.riscv.vsoxei.nxv8i32.nxv8i16( +  <vscale x 8 x i32>, +  ptr, +  <vscale x 8 x i16>, +  iXLen); + +define void @intrinsic_vsoxei_v_nxv8i32_nxv8i32_nxv8i16(<vscale x 8 x i32> %0, ptr %1, <vscale x 8 x i16> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vsoxei_v_nxv8i32_nxv8i32_nxv8i16: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e32, m4, ta, ma +; CHECK-NEXT:    vsoxei16.v v8, (a0), v12 +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsoxei.nxv8i32.nxv8i16( +    <vscale x 8 x i32> %0, +    ptr %1, +    <vscale x 8 x i16> %2, +    iXLen %3) + +  ret void +} + +declare void @llvm.riscv.vsoxei.mask.nxv8i32.nxv8i16( +  <vscale x 8 x i32>, +  ptr, +  <vscale x 8 x i16>, +  <vscale x 8 x i1>, +  iXLen); + +define void @intrinsic_vsoxei_mask_v_nxv8i32_nxv8i32_nxv8i16(<vscale x 8 x i32> %0, ptr %1, <vscale x 8 x i16> %2, <vscale x 8 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vsoxei_mask_v_nxv8i32_nxv8i32_nxv8i16: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e32, m4, ta, ma +; CHECK-NEXT:    vsoxei16.v v8, (a0), v12, v0.t +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsoxei.mask.nxv8i32.nxv8i16( +    <vscale x 8 x i32> %0, +    ptr %1, +    <vscale x 8 x i16> %2, +    <vscale x 8 x i1> %3, +    iXLen %4) + +  ret void +} + +declare void @llvm.riscv.vsoxei.nxv16i32.nxv16i16( +  <vscale x 16 x i32>, +  ptr, +  <vscale x 16 x i16>, +  iXLen); + +define void @intrinsic_vsoxei_v_nxv16i32_nxv16i32_nxv16i16(<vscale x 16 x i32> %0, ptr %1, <vscale x 16 x i16> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vsoxei_v_nxv16i32_nxv16i32_nxv16i16: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e32, m8, ta, ma +; CHECK-NEXT:    vsoxei16.v v8, (a0), v16 +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsoxei.nxv16i32.nxv16i16( +    <vscale x 16 x i32> %0, +    ptr %1, +    <vscale x 16 x i16> %2, +    iXLen %3) + +  ret void +} + +declare void @llvm.riscv.vsoxei.mask.nxv16i32.nxv16i16( +  <vscale x 16 x i32>, +  ptr, +  <vscale x 16 x i16>, +  <vscale x 16 x i1>, +  iXLen); + +define void @intrinsic_vsoxei_mask_v_nxv16i32_nxv16i32_nxv16i16(<vscale x 16 x i32> %0, ptr %1, <vscale x 16 x i16> %2, <vscale x 16 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vsoxei_mask_v_nxv16i32_nxv16i32_nxv16i16: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e32, m8, ta, ma +; CHECK-NEXT:    vsoxei16.v v8, (a0), v16, v0.t +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsoxei.mask.nxv16i32.nxv16i16( +    <vscale x 16 x i32> %0, +    ptr %1, +    <vscale x 16 x i16> %2, +    <vscale x 16 x i1> %3, +    iXLen %4) + +  ret void +} + +declare void @llvm.riscv.vsoxei.nxv1i64.nxv1i16( +  <vscale x 1 x i64>, +  ptr, +  <vscale x 1 x i16>, +  iXLen); + +define void @intrinsic_vsoxei_v_nxv1i64_nxv1i64_nxv1i16(<vscale x 1 x i64> %0, ptr %1, <vscale x 1 x i16> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vsoxei_v_nxv1i64_nxv1i64_nxv1i16: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e64, m1, ta, ma +; CHECK-NEXT:    vsoxei16.v v8, (a0), v9 +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsoxei.nxv1i64.nxv1i16( +    <vscale x 1 x i64> %0, +    ptr %1, +    <vscale x 1 x i16> %2, +    iXLen %3) + +  ret void +} + +declare void @llvm.riscv.vsoxei.mask.nxv1i64.nxv1i16( +  <vscale x 1 x i64>, +  ptr, +  <vscale x 1 x i16>, +  <vscale x 1 x i1>, +  iXLen); + +define void @intrinsic_vsoxei_mask_v_nxv1i64_nxv1i64_nxv1i16(<vscale x 1 x i64> %0, ptr %1, <vscale x 1 x i16> %2, <vscale x 1 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vsoxei_mask_v_nxv1i64_nxv1i64_nxv1i16: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e64, m1, ta, ma +; CHECK-NEXT:    vsoxei16.v v8, (a0), v9, v0.t +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsoxei.mask.nxv1i64.nxv1i16( +    <vscale x 1 x i64> %0, +    ptr %1, +    <vscale x 1 x i16> %2, +    <vscale x 1 x i1> %3, +    iXLen %4) + +  ret void +} + +declare void @llvm.riscv.vsoxei.nxv2i64.nxv2i16( +  <vscale x 2 x i64>, +  ptr, +  <vscale x 2 x i16>, +  iXLen); + +define void @intrinsic_vsoxei_v_nxv2i64_nxv2i64_nxv2i16(<vscale x 2 x i64> %0, ptr %1, <vscale x 2 x i16> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vsoxei_v_nxv2i64_nxv2i64_nxv2i16: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e64, m2, ta, ma +; CHECK-NEXT:    vsoxei16.v v8, (a0), v10 +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsoxei.nxv2i64.nxv2i16( +    <vscale x 2 x i64> %0, +    ptr %1, +    <vscale x 2 x i16> %2, +    iXLen %3) + +  ret void +} + +declare void @llvm.riscv.vsoxei.mask.nxv2i64.nxv2i16( +  <vscale x 2 x i64>, +  ptr, +  <vscale x 2 x i16>, +  <vscale x 2 x i1>, +  iXLen); + +define void @intrinsic_vsoxei_mask_v_nxv2i64_nxv2i64_nxv2i16(<vscale x 2 x i64> %0, ptr %1, <vscale x 2 x i16> %2, <vscale x 2 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vsoxei_mask_v_nxv2i64_nxv2i64_nxv2i16: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e64, m2, ta, ma +; CHECK-NEXT:    vsoxei16.v v8, (a0), v10, v0.t +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsoxei.mask.nxv2i64.nxv2i16( +    <vscale x 2 x i64> %0, +    ptr %1, +    <vscale x 2 x i16> %2, +    <vscale x 2 x i1> %3, +    iXLen %4) + +  ret void +} + +declare void @llvm.riscv.vsoxei.nxv4i64.nxv4i16( +  <vscale x 4 x i64>, +  ptr, +  <vscale x 4 x i16>, +  iXLen); + +define void @intrinsic_vsoxei_v_nxv4i64_nxv4i64_nxv4i16(<vscale x 4 x i64> %0, ptr %1, <vscale x 4 x i16> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vsoxei_v_nxv4i64_nxv4i64_nxv4i16: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e64, m4, ta, ma +; CHECK-NEXT:    vsoxei16.v v8, (a0), v12 +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsoxei.nxv4i64.nxv4i16( +    <vscale x 4 x i64> %0, +    ptr %1, +    <vscale x 4 x i16> %2, +    iXLen %3) + +  ret void +} + +declare void @llvm.riscv.vsoxei.mask.nxv4i64.nxv4i16( +  <vscale x 4 x i64>, +  ptr, +  <vscale x 4 x i16>, +  <vscale x 4 x i1>, +  iXLen); + +define void @intrinsic_vsoxei_mask_v_nxv4i64_nxv4i64_nxv4i16(<vscale x 4 x i64> %0, ptr %1, <vscale x 4 x i16> %2, <vscale x 4 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vsoxei_mask_v_nxv4i64_nxv4i64_nxv4i16: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e64, m4, ta, ma +; CHECK-NEXT:    vsoxei16.v v8, (a0), v12, v0.t +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsoxei.mask.nxv4i64.nxv4i16( +    <vscale x 4 x i64> %0, +    ptr %1, +    <vscale x 4 x i16> %2, +    <vscale x 4 x i1> %3, +    iXLen %4) + +  ret void +} + +declare void @llvm.riscv.vsoxei.nxv8i64.nxv8i16( +  <vscale x 8 x i64>, +  ptr, +  <vscale x 8 x i16>, +  iXLen); + +define void @intrinsic_vsoxei_v_nxv8i64_nxv8i64_nxv8i16(<vscale x 8 x i64> %0, ptr %1, <vscale x 8 x i16> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vsoxei_v_nxv8i64_nxv8i64_nxv8i16: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e64, m8, ta, ma +; CHECK-NEXT:    vsoxei16.v v8, (a0), v16 +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsoxei.nxv8i64.nxv8i16( +    <vscale x 8 x i64> %0, +    ptr %1, +    <vscale x 8 x i16> %2, +    iXLen %3) + +  ret void +} + +declare void @llvm.riscv.vsoxei.mask.nxv8i64.nxv8i16( +  <vscale x 8 x i64>, +  ptr, +  <vscale x 8 x i16>, +  <vscale x 8 x i1>, +  iXLen); + +define void @intrinsic_vsoxei_mask_v_nxv8i64_nxv8i64_nxv8i16(<vscale x 8 x i64> %0, ptr %1, <vscale x 8 x i16> %2, <vscale x 8 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vsoxei_mask_v_nxv8i64_nxv8i64_nxv8i16: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e64, m8, ta, ma +; CHECK-NEXT:    vsoxei16.v v8, (a0), v16, v0.t +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsoxei.mask.nxv8i64.nxv8i16( +    <vscale x 8 x i64> %0, +    ptr %1, +    <vscale x 8 x i16> %2, +    <vscale x 8 x i1> %3, +    iXLen %4) + +  ret void +} + +declare void @llvm.riscv.vsoxei.nxv1f16.nxv1i16( +  <vscale x 1 x half>, +  ptr, +  <vscale x 1 x i16>, +  iXLen); + +define void @intrinsic_vsoxei_v_nxv1f16_nxv1f16_nxv1i16(<vscale x 1 x half> %0, ptr %1, <vscale x 1 x i16> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vsoxei_v_nxv1f16_nxv1f16_nxv1i16: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e16, mf4, ta, ma +; CHECK-NEXT:    vsoxei16.v v8, (a0), v9 +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsoxei.nxv1f16.nxv1i16( +    <vscale x 1 x half> %0, +    ptr %1, +    <vscale x 1 x i16> %2, +    iXLen %3) + +  ret void +} + +declare void @llvm.riscv.vsoxei.mask.nxv1f16.nxv1i16( +  <vscale x 1 x half>, +  ptr, +  <vscale x 1 x i16>, +  <vscale x 1 x i1>, +  iXLen); + +define void @intrinsic_vsoxei_mask_v_nxv1f16_nxv1f16_nxv1i16(<vscale x 1 x half> %0, ptr %1, <vscale x 1 x i16> %2, <vscale x 1 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vsoxei_mask_v_nxv1f16_nxv1f16_nxv1i16: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e16, mf4, ta, ma +; CHECK-NEXT:    vsoxei16.v v8, (a0), v9, v0.t +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsoxei.mask.nxv1f16.nxv1i16( +    <vscale x 1 x half> %0, +    ptr %1, +    <vscale x 1 x i16> %2, +    <vscale x 1 x i1> %3, +    iXLen %4) + +  ret void +} + +declare void @llvm.riscv.vsoxei.nxv2f16.nxv2i16( +  <vscale x 2 x half>, +  ptr, +  <vscale x 2 x i16>, +  iXLen); + +define void @intrinsic_vsoxei_v_nxv2f16_nxv2f16_nxv2i16(<vscale x 2 x half> %0, ptr %1, <vscale x 2 x i16> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vsoxei_v_nxv2f16_nxv2f16_nxv2i16: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e16, mf2, ta, ma +; CHECK-NEXT:    vsoxei16.v v8, (a0), v9 +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsoxei.nxv2f16.nxv2i16( +    <vscale x 2 x half> %0, +    ptr %1, +    <vscale x 2 x i16> %2, +    iXLen %3) + +  ret void +} + +declare void @llvm.riscv.vsoxei.mask.nxv2f16.nxv2i16( +  <vscale x 2 x half>, +  ptr, +  <vscale x 2 x i16>, +  <vscale x 2 x i1>, +  iXLen); + +define void @intrinsic_vsoxei_mask_v_nxv2f16_nxv2f16_nxv2i16(<vscale x 2 x half> %0, ptr %1, <vscale x 2 x i16> %2, <vscale x 2 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vsoxei_mask_v_nxv2f16_nxv2f16_nxv2i16: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e16, mf2, ta, ma +; CHECK-NEXT:    vsoxei16.v v8, (a0), v9, v0.t +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsoxei.mask.nxv2f16.nxv2i16( +    <vscale x 2 x half> %0, +    ptr %1, +    <vscale x 2 x i16> %2, +    <vscale x 2 x i1> %3, +    iXLen %4) + +  ret void +} + +declare void @llvm.riscv.vsoxei.nxv4f16.nxv4i16( +  <vscale x 4 x half>, +  ptr, +  <vscale x 4 x i16>, +  iXLen); + +define void @intrinsic_vsoxei_v_nxv4f16_nxv4f16_nxv4i16(<vscale x 4 x half> %0, ptr %1, <vscale x 4 x i16> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vsoxei_v_nxv4f16_nxv4f16_nxv4i16: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e16, m1, ta, ma +; CHECK-NEXT:    vsoxei16.v v8, (a0), v9 +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsoxei.nxv4f16.nxv4i16( +    <vscale x 4 x half> %0, +    ptr %1, +    <vscale x 4 x i16> %2, +    iXLen %3) + +  ret void +} + +declare void @llvm.riscv.vsoxei.mask.nxv4f16.nxv4i16( +  <vscale x 4 x half>, +  ptr, +  <vscale x 4 x i16>, +  <vscale x 4 x i1>, +  iXLen); + +define void @intrinsic_vsoxei_mask_v_nxv4f16_nxv4f16_nxv4i16(<vscale x 4 x half> %0, ptr %1, <vscale x 4 x i16> %2, <vscale x 4 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vsoxei_mask_v_nxv4f16_nxv4f16_nxv4i16: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e16, m1, ta, ma +; CHECK-NEXT:    vsoxei16.v v8, (a0), v9, v0.t +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsoxei.mask.nxv4f16.nxv4i16( +    <vscale x 4 x half> %0, +    ptr %1, +    <vscale x 4 x i16> %2, +    <vscale x 4 x i1> %3, +    iXLen %4) + +  ret void +} + +declare void @llvm.riscv.vsoxei.nxv8f16.nxv8i16( +  <vscale x 8 x half>, +  ptr, +  <vscale x 8 x i16>, +  iXLen); + +define void @intrinsic_vsoxei_v_nxv8f16_nxv8f16_nxv8i16(<vscale x 8 x half> %0, ptr %1, <vscale x 8 x i16> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vsoxei_v_nxv8f16_nxv8f16_nxv8i16: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e16, m2, ta, ma +; CHECK-NEXT:    vsoxei16.v v8, (a0), v10 +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsoxei.nxv8f16.nxv8i16( +    <vscale x 8 x half> %0, +    ptr %1, +    <vscale x 8 x i16> %2, +    iXLen %3) + +  ret void +} + +declare void @llvm.riscv.vsoxei.mask.nxv8f16.nxv8i16( +  <vscale x 8 x half>, +  ptr, +  <vscale x 8 x i16>, +  <vscale x 8 x i1>, +  iXLen); + +define void @intrinsic_vsoxei_mask_v_nxv8f16_nxv8f16_nxv8i16(<vscale x 8 x half> %0, ptr %1, <vscale x 8 x i16> %2, <vscale x 8 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vsoxei_mask_v_nxv8f16_nxv8f16_nxv8i16: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e16, m2, ta, ma +; CHECK-NEXT:    vsoxei16.v v8, (a0), v10, v0.t +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsoxei.mask.nxv8f16.nxv8i16( +    <vscale x 8 x half> %0, +    ptr %1, +    <vscale x 8 x i16> %2, +    <vscale x 8 x i1> %3, +    iXLen %4) + +  ret void +} + +declare void @llvm.riscv.vsoxei.nxv16f16.nxv16i16( +  <vscale x 16 x half>, +  ptr, +  <vscale x 16 x i16>, +  iXLen); + +define void @intrinsic_vsoxei_v_nxv16f16_nxv16f16_nxv16i16(<vscale x 16 x half> %0, ptr %1, <vscale x 16 x i16> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vsoxei_v_nxv16f16_nxv16f16_nxv16i16: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e16, m4, ta, ma +; CHECK-NEXT:    vsoxei16.v v8, (a0), v12 +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsoxei.nxv16f16.nxv16i16( +    <vscale x 16 x half> %0, +    ptr %1, +    <vscale x 16 x i16> %2, +    iXLen %3) + +  ret void +} + +declare void @llvm.riscv.vsoxei.mask.nxv16f16.nxv16i16( +  <vscale x 16 x half>, +  ptr, +  <vscale x 16 x i16>, +  <vscale x 16 x i1>, +  iXLen); + +define void @intrinsic_vsoxei_mask_v_nxv16f16_nxv16f16_nxv16i16(<vscale x 16 x half> %0, ptr %1, <vscale x 16 x i16> %2, <vscale x 16 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vsoxei_mask_v_nxv16f16_nxv16f16_nxv16i16: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e16, m4, ta, ma +; CHECK-NEXT:    vsoxei16.v v8, (a0), v12, v0.t +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsoxei.mask.nxv16f16.nxv16i16( +    <vscale x 16 x half> %0, +    ptr %1, +    <vscale x 16 x i16> %2, +    <vscale x 16 x i1> %3, +    iXLen %4) + +  ret void +} + +declare void @llvm.riscv.vsoxei.nxv32f16.nxv32i16( +  <vscale x 32 x half>, +  ptr, +  <vscale x 32 x i16>, +  iXLen); + +define void @intrinsic_vsoxei_v_nxv32f16_nxv32f16_nxv32i16(<vscale x 32 x half> %0, ptr %1, <vscale x 32 x i16> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vsoxei_v_nxv32f16_nxv32f16_nxv32i16: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e16, m8, ta, ma +; CHECK-NEXT:    vsoxei16.v v8, (a0), v16 +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsoxei.nxv32f16.nxv32i16( +    <vscale x 32 x half> %0, +    ptr %1, +    <vscale x 32 x i16> %2, +    iXLen %3) + +  ret void +} + +declare void @llvm.riscv.vsoxei.mask.nxv32f16.nxv32i16( +  <vscale x 32 x half>, +  ptr, +  <vscale x 32 x i16>, +  <vscale x 32 x i1>, +  iXLen); + +define void @intrinsic_vsoxei_mask_v_nxv32f16_nxv32f16_nxv32i16(<vscale x 32 x half> %0, ptr %1, <vscale x 32 x i16> %2, <vscale x 32 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vsoxei_mask_v_nxv32f16_nxv32f16_nxv32i16: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e16, m8, ta, ma +; CHECK-NEXT:    vsoxei16.v v8, (a0), v16, v0.t +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsoxei.mask.nxv32f16.nxv32i16( +    <vscale x 32 x half> %0, +    ptr %1, +    <vscale x 32 x i16> %2, +    <vscale x 32 x i1> %3, +    iXLen %4) + +  ret void +} + +declare void @llvm.riscv.vsoxei.nxv1f32.nxv1i16( +  <vscale x 1 x float>, +  ptr, +  <vscale x 1 x i16>, +  iXLen); + +define void @intrinsic_vsoxei_v_nxv1f32_nxv1f32_nxv1i16(<vscale x 1 x float> %0, ptr %1, <vscale x 1 x i16> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vsoxei_v_nxv1f32_nxv1f32_nxv1i16: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e32, mf2, ta, ma +; CHECK-NEXT:    vsoxei16.v v8, (a0), v9 +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsoxei.nxv1f32.nxv1i16( +    <vscale x 1 x float> %0, +    ptr %1, +    <vscale x 1 x i16> %2, +    iXLen %3) + +  ret void +} + +declare void @llvm.riscv.vsoxei.mask.nxv1f32.nxv1i16( +  <vscale x 1 x float>, +  ptr, +  <vscale x 1 x i16>, +  <vscale x 1 x i1>, +  iXLen); + +define void @intrinsic_vsoxei_mask_v_nxv1f32_nxv1f32_nxv1i16(<vscale x 1 x float> %0, ptr %1, <vscale x 1 x i16> %2, <vscale x 1 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vsoxei_mask_v_nxv1f32_nxv1f32_nxv1i16: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e32, mf2, ta, ma +; CHECK-NEXT:    vsoxei16.v v8, (a0), v9, v0.t +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsoxei.mask.nxv1f32.nxv1i16( +    <vscale x 1 x float> %0, +    ptr %1, +    <vscale x 1 x i16> %2, +    <vscale x 1 x i1> %3, +    iXLen %4) + +  ret void +} + +declare void @llvm.riscv.vsoxei.nxv2f32.nxv2i16( +  <vscale x 2 x float>, +  ptr, +  <vscale x 2 x i16>, +  iXLen); + +define void @intrinsic_vsoxei_v_nxv2f32_nxv2f32_nxv2i16(<vscale x 2 x float> %0, ptr %1, <vscale x 2 x i16> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vsoxei_v_nxv2f32_nxv2f32_nxv2i16: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e32, m1, ta, ma +; CHECK-NEXT:    vsoxei16.v v8, (a0), v9 +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsoxei.nxv2f32.nxv2i16( +    <vscale x 2 x float> %0, +    ptr %1, +    <vscale x 2 x i16> %2, +    iXLen %3) + +  ret void +} + +declare void @llvm.riscv.vsoxei.mask.nxv2f32.nxv2i16( +  <vscale x 2 x float>, +  ptr, +  <vscale x 2 x i16>, +  <vscale x 2 x i1>, +  iXLen); + +define void @intrinsic_vsoxei_mask_v_nxv2f32_nxv2f32_nxv2i16(<vscale x 2 x float> %0, ptr %1, <vscale x 2 x i16> %2, <vscale x 2 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vsoxei_mask_v_nxv2f32_nxv2f32_nxv2i16: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e32, m1, ta, ma +; CHECK-NEXT:    vsoxei16.v v8, (a0), v9, v0.t +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsoxei.mask.nxv2f32.nxv2i16( +    <vscale x 2 x float> %0, +    ptr %1, +    <vscale x 2 x i16> %2, +    <vscale x 2 x i1> %3, +    iXLen %4) + +  ret void +} + +declare void @llvm.riscv.vsoxei.nxv4f32.nxv4i16( +  <vscale x 4 x float>, +  ptr, +  <vscale x 4 x i16>, +  iXLen); + +define void @intrinsic_vsoxei_v_nxv4f32_nxv4f32_nxv4i16(<vscale x 4 x float> %0, ptr %1, <vscale x 4 x i16> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vsoxei_v_nxv4f32_nxv4f32_nxv4i16: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e32, m2, ta, ma +; CHECK-NEXT:    vsoxei16.v v8, (a0), v10 +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsoxei.nxv4f32.nxv4i16( +    <vscale x 4 x float> %0, +    ptr %1, +    <vscale x 4 x i16> %2, +    iXLen %3) + +  ret void +} + +declare void @llvm.riscv.vsoxei.mask.nxv4f32.nxv4i16( +  <vscale x 4 x float>, +  ptr, +  <vscale x 4 x i16>, +  <vscale x 4 x i1>, +  iXLen); + +define void @intrinsic_vsoxei_mask_v_nxv4f32_nxv4f32_nxv4i16(<vscale x 4 x float> %0, ptr %1, <vscale x 4 x i16> %2, <vscale x 4 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vsoxei_mask_v_nxv4f32_nxv4f32_nxv4i16: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e32, m2, ta, ma +; CHECK-NEXT:    vsoxei16.v v8, (a0), v10, v0.t +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsoxei.mask.nxv4f32.nxv4i16( +    <vscale x 4 x float> %0, +    ptr %1, +    <vscale x 4 x i16> %2, +    <vscale x 4 x i1> %3, +    iXLen %4) + +  ret void +} + +declare void @llvm.riscv.vsoxei.nxv8f32.nxv8i16( +  <vscale x 8 x float>, +  ptr, +  <vscale x 8 x i16>, +  iXLen); + +define void @intrinsic_vsoxei_v_nxv8f32_nxv8f32_nxv8i16(<vscale x 8 x float> %0, ptr %1, <vscale x 8 x i16> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vsoxei_v_nxv8f32_nxv8f32_nxv8i16: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e32, m4, ta, ma +; CHECK-NEXT:    vsoxei16.v v8, (a0), v12 +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsoxei.nxv8f32.nxv8i16( +    <vscale x 8 x float> %0, +    ptr %1, +    <vscale x 8 x i16> %2, +    iXLen %3) + +  ret void +} + +declare void @llvm.riscv.vsoxei.mask.nxv8f32.nxv8i16( +  <vscale x 8 x float>, +  ptr, +  <vscale x 8 x i16>, +  <vscale x 8 x i1>, +  iXLen); + +define void @intrinsic_vsoxei_mask_v_nxv8f32_nxv8f32_nxv8i16(<vscale x 8 x float> %0, ptr %1, <vscale x 8 x i16> %2, <vscale x 8 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vsoxei_mask_v_nxv8f32_nxv8f32_nxv8i16: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e32, m4, ta, ma +; CHECK-NEXT:    vsoxei16.v v8, (a0), v12, v0.t +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsoxei.mask.nxv8f32.nxv8i16( +    <vscale x 8 x float> %0, +    ptr %1, +    <vscale x 8 x i16> %2, +    <vscale x 8 x i1> %3, +    iXLen %4) + +  ret void +} + +declare void @llvm.riscv.vsoxei.nxv16f32.nxv16i16( +  <vscale x 16 x float>, +  ptr, +  <vscale x 16 x i16>, +  iXLen); + +define void @intrinsic_vsoxei_v_nxv16f32_nxv16f32_nxv16i16(<vscale x 16 x float> %0, ptr %1, <vscale x 16 x i16> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vsoxei_v_nxv16f32_nxv16f32_nxv16i16: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e32, m8, ta, ma +; CHECK-NEXT:    vsoxei16.v v8, (a0), v16 +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsoxei.nxv16f32.nxv16i16( +    <vscale x 16 x float> %0, +    ptr %1, +    <vscale x 16 x i16> %2, +    iXLen %3) + +  ret void +} + +declare void @llvm.riscv.vsoxei.mask.nxv16f32.nxv16i16( +  <vscale x 16 x float>, +  ptr, +  <vscale x 16 x i16>, +  <vscale x 16 x i1>, +  iXLen); + +define void @intrinsic_vsoxei_mask_v_nxv16f32_nxv16f32_nxv16i16(<vscale x 16 x float> %0, ptr %1, <vscale x 16 x i16> %2, <vscale x 16 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vsoxei_mask_v_nxv16f32_nxv16f32_nxv16i16: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e32, m8, ta, ma +; CHECK-NEXT:    vsoxei16.v v8, (a0), v16, v0.t +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsoxei.mask.nxv16f32.nxv16i16( +    <vscale x 16 x float> %0, +    ptr %1, +    <vscale x 16 x i16> %2, +    <vscale x 16 x i1> %3, +    iXLen %4) + +  ret void +} + +declare void @llvm.riscv.vsoxei.nxv1f64.nxv1i16( +  <vscale x 1 x double>, +  ptr, +  <vscale x 1 x i16>, +  iXLen); + +define void @intrinsic_vsoxei_v_nxv1f64_nxv1f64_nxv1i16(<vscale x 1 x double> %0, ptr %1, <vscale x 1 x i16> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vsoxei_v_nxv1f64_nxv1f64_nxv1i16: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e64, m1, ta, ma +; CHECK-NEXT:    vsoxei16.v v8, (a0), v9 +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsoxei.nxv1f64.nxv1i16( +    <vscale x 1 x double> %0, +    ptr %1, +    <vscale x 1 x i16> %2, +    iXLen %3) + +  ret void +} + +declare void @llvm.riscv.vsoxei.mask.nxv1f64.nxv1i16( +  <vscale x 1 x double>, +  ptr, +  <vscale x 1 x i16>, +  <vscale x 1 x i1>, +  iXLen); + +define void @intrinsic_vsoxei_mask_v_nxv1f64_nxv1f64_nxv1i16(<vscale x 1 x double> %0, ptr %1, <vscale x 1 x i16> %2, <vscale x 1 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vsoxei_mask_v_nxv1f64_nxv1f64_nxv1i16: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e64, m1, ta, ma +; CHECK-NEXT:    vsoxei16.v v8, (a0), v9, v0.t +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsoxei.mask.nxv1f64.nxv1i16( +    <vscale x 1 x double> %0, +    ptr %1, +    <vscale x 1 x i16> %2, +    <vscale x 1 x i1> %3, +    iXLen %4) + +  ret void +} + +declare void @llvm.riscv.vsoxei.nxv2f64.nxv2i16( +  <vscale x 2 x double>, +  ptr, +  <vscale x 2 x i16>, +  iXLen); + +define void @intrinsic_vsoxei_v_nxv2f64_nxv2f64_nxv2i16(<vscale x 2 x double> %0, ptr %1, <vscale x 2 x i16> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vsoxei_v_nxv2f64_nxv2f64_nxv2i16: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e64, m2, ta, ma +; CHECK-NEXT:    vsoxei16.v v8, (a0), v10 +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsoxei.nxv2f64.nxv2i16( +    <vscale x 2 x double> %0, +    ptr %1, +    <vscale x 2 x i16> %2, +    iXLen %3) + +  ret void +} + +declare void @llvm.riscv.vsoxei.mask.nxv2f64.nxv2i16( +  <vscale x 2 x double>, +  ptr, +  <vscale x 2 x i16>, +  <vscale x 2 x i1>, +  iXLen); + +define void @intrinsic_vsoxei_mask_v_nxv2f64_nxv2f64_nxv2i16(<vscale x 2 x double> %0, ptr %1, <vscale x 2 x i16> %2, <vscale x 2 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vsoxei_mask_v_nxv2f64_nxv2f64_nxv2i16: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e64, m2, ta, ma +; CHECK-NEXT:    vsoxei16.v v8, (a0), v10, v0.t +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsoxei.mask.nxv2f64.nxv2i16( +    <vscale x 2 x double> %0, +    ptr %1, +    <vscale x 2 x i16> %2, +    <vscale x 2 x i1> %3, +    iXLen %4) + +  ret void +} + +declare void @llvm.riscv.vsoxei.nxv4f64.nxv4i16( +  <vscale x 4 x double>, +  ptr, +  <vscale x 4 x i16>, +  iXLen); + +define void @intrinsic_vsoxei_v_nxv4f64_nxv4f64_nxv4i16(<vscale x 4 x double> %0, ptr %1, <vscale x 4 x i16> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vsoxei_v_nxv4f64_nxv4f64_nxv4i16: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e64, m4, ta, ma +; CHECK-NEXT:    vsoxei16.v v8, (a0), v12 +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsoxei.nxv4f64.nxv4i16( +    <vscale x 4 x double> %0, +    ptr %1, +    <vscale x 4 x i16> %2, +    iXLen %3) + +  ret void +} + +declare void @llvm.riscv.vsoxei.mask.nxv4f64.nxv4i16( +  <vscale x 4 x double>, +  ptr, +  <vscale x 4 x i16>, +  <vscale x 4 x i1>, +  iXLen); + +define void @intrinsic_vsoxei_mask_v_nxv4f64_nxv4f64_nxv4i16(<vscale x 4 x double> %0, ptr %1, <vscale x 4 x i16> %2, <vscale x 4 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vsoxei_mask_v_nxv4f64_nxv4f64_nxv4i16: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e64, m4, ta, ma +; CHECK-NEXT:    vsoxei16.v v8, (a0), v12, v0.t +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsoxei.mask.nxv4f64.nxv4i16( +    <vscale x 4 x double> %0, +    ptr %1, +    <vscale x 4 x i16> %2, +    <vscale x 4 x i1> %3, +    iXLen %4) + +  ret void +} + +declare void @llvm.riscv.vsoxei.nxv8f64.nxv8i16( +  <vscale x 8 x double>, +  ptr, +  <vscale x 8 x i16>, +  iXLen); + +define void @intrinsic_vsoxei_v_nxv8f64_nxv8f64_nxv8i16(<vscale x 8 x double> %0, ptr %1, <vscale x 8 x i16> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vsoxei_v_nxv8f64_nxv8f64_nxv8i16: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e64, m8, ta, ma +; CHECK-NEXT:    vsoxei16.v v8, (a0), v16 +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsoxei.nxv8f64.nxv8i16( +    <vscale x 8 x double> %0, +    ptr %1, +    <vscale x 8 x i16> %2, +    iXLen %3) + +  ret void +} + +declare void @llvm.riscv.vsoxei.mask.nxv8f64.nxv8i16( +  <vscale x 8 x double>, +  ptr, +  <vscale x 8 x i16>, +  <vscale x 8 x i1>, +  iXLen); + +define void @intrinsic_vsoxei_mask_v_nxv8f64_nxv8f64_nxv8i16(<vscale x 8 x double> %0, ptr %1, <vscale x 8 x i16> %2, <vscale x 8 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vsoxei_mask_v_nxv8f64_nxv8f64_nxv8i16: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e64, m8, ta, ma +; CHECK-NEXT:    vsoxei16.v v8, (a0), v16, v0.t +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsoxei.mask.nxv8f64.nxv8i16( +    <vscale x 8 x double> %0, +    ptr %1, +    <vscale x 8 x i16> %2, +    <vscale x 8 x i1> %3, +    iXLen %4) + +  ret void +} + +declare void @llvm.riscv.vsoxei.nxv1i8.nxv1i8( +  <vscale x 1 x i8>, +  ptr, +  <vscale x 1 x i8>, +  iXLen); + +define void @intrinsic_vsoxei_v_nxv1i8_nxv1i8_nxv1i8(<vscale x 1 x i8> %0, ptr %1, <vscale x 1 x i8> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vsoxei_v_nxv1i8_nxv1i8_nxv1i8: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e8, mf8, ta, ma +; CHECK-NEXT:    vsoxei8.v v8, (a0), v9 +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsoxei.nxv1i8.nxv1i8( +    <vscale x 1 x i8> %0, +    ptr %1, +    <vscale x 1 x i8> %2, +    iXLen %3) + +  ret void +} + +declare void @llvm.riscv.vsoxei.mask.nxv1i8.nxv1i8( +  <vscale x 1 x i8>, +  ptr, +  <vscale x 1 x i8>, +  <vscale x 1 x i1>, +  iXLen); + +define void @intrinsic_vsoxei_mask_v_nxv1i8_nxv1i8_nxv1i8(<vscale x 1 x i8> %0, ptr %1, <vscale x 1 x i8> %2, <vscale x 1 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vsoxei_mask_v_nxv1i8_nxv1i8_nxv1i8: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e8, mf8, ta, ma +; CHECK-NEXT:    vsoxei8.v v8, (a0), v9, v0.t +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsoxei.mask.nxv1i8.nxv1i8( +    <vscale x 1 x i8> %0, +    ptr %1, +    <vscale x 1 x i8> %2, +    <vscale x 1 x i1> %3, +    iXLen %4) + +  ret void +} + +declare void @llvm.riscv.vsoxei.nxv2i8.nxv2i8( +  <vscale x 2 x i8>, +  ptr, +  <vscale x 2 x i8>, +  iXLen); + +define void @intrinsic_vsoxei_v_nxv2i8_nxv2i8_nxv2i8(<vscale x 2 x i8> %0, ptr %1, <vscale x 2 x i8> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vsoxei_v_nxv2i8_nxv2i8_nxv2i8: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e8, mf4, ta, ma +; CHECK-NEXT:    vsoxei8.v v8, (a0), v9 +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsoxei.nxv2i8.nxv2i8( +    <vscale x 2 x i8> %0, +    ptr %1, +    <vscale x 2 x i8> %2, +    iXLen %3) + +  ret void +} + +declare void @llvm.riscv.vsoxei.mask.nxv2i8.nxv2i8( +  <vscale x 2 x i8>, +  ptr, +  <vscale x 2 x i8>, +  <vscale x 2 x i1>, +  iXLen); + +define void @intrinsic_vsoxei_mask_v_nxv2i8_nxv2i8_nxv2i8(<vscale x 2 x i8> %0, ptr %1, <vscale x 2 x i8> %2, <vscale x 2 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vsoxei_mask_v_nxv2i8_nxv2i8_nxv2i8: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e8, mf4, ta, ma +; CHECK-NEXT:    vsoxei8.v v8, (a0), v9, v0.t +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsoxei.mask.nxv2i8.nxv2i8( +    <vscale x 2 x i8> %0, +    ptr %1, +    <vscale x 2 x i8> %2, +    <vscale x 2 x i1> %3, +    iXLen %4) + +  ret void +} + +declare void @llvm.riscv.vsoxei.nxv4i8.nxv4i8( +  <vscale x 4 x i8>, +  ptr, +  <vscale x 4 x i8>, +  iXLen); + +define void @intrinsic_vsoxei_v_nxv4i8_nxv4i8_nxv4i8(<vscale x 4 x i8> %0, ptr %1, <vscale x 4 x i8> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vsoxei_v_nxv4i8_nxv4i8_nxv4i8: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e8, mf2, ta, ma +; CHECK-NEXT:    vsoxei8.v v8, (a0), v9 +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsoxei.nxv4i8.nxv4i8( +    <vscale x 4 x i8> %0, +    ptr %1, +    <vscale x 4 x i8> %2, +    iXLen %3) + +  ret void +} + +declare void @llvm.riscv.vsoxei.mask.nxv4i8.nxv4i8( +  <vscale x 4 x i8>, +  ptr, +  <vscale x 4 x i8>, +  <vscale x 4 x i1>, +  iXLen); + +define void @intrinsic_vsoxei_mask_v_nxv4i8_nxv4i8_nxv4i8(<vscale x 4 x i8> %0, ptr %1, <vscale x 4 x i8> %2, <vscale x 4 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vsoxei_mask_v_nxv4i8_nxv4i8_nxv4i8: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e8, mf2, ta, ma +; CHECK-NEXT:    vsoxei8.v v8, (a0), v9, v0.t +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsoxei.mask.nxv4i8.nxv4i8( +    <vscale x 4 x i8> %0, +    ptr %1, +    <vscale x 4 x i8> %2, +    <vscale x 4 x i1> %3, +    iXLen %4) + +  ret void +} + +declare void @llvm.riscv.vsoxei.nxv8i8.nxv8i8( +  <vscale x 8 x i8>, +  ptr, +  <vscale x 8 x i8>, +  iXLen); + +define void @intrinsic_vsoxei_v_nxv8i8_nxv8i8_nxv8i8(<vscale x 8 x i8> %0, ptr %1, <vscale x 8 x i8> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vsoxei_v_nxv8i8_nxv8i8_nxv8i8: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e8, m1, ta, ma +; CHECK-NEXT:    vsoxei8.v v8, (a0), v9 +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsoxei.nxv8i8.nxv8i8( +    <vscale x 8 x i8> %0, +    ptr %1, +    <vscale x 8 x i8> %2, +    iXLen %3) + +  ret void +} + +declare void @llvm.riscv.vsoxei.mask.nxv8i8.nxv8i8( +  <vscale x 8 x i8>, +  ptr, +  <vscale x 8 x i8>, +  <vscale x 8 x i1>, +  iXLen); + +define void @intrinsic_vsoxei_mask_v_nxv8i8_nxv8i8_nxv8i8(<vscale x 8 x i8> %0, ptr %1, <vscale x 8 x i8> %2, <vscale x 8 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vsoxei_mask_v_nxv8i8_nxv8i8_nxv8i8: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e8, m1, ta, ma +; CHECK-NEXT:    vsoxei8.v v8, (a0), v9, v0.t +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsoxei.mask.nxv8i8.nxv8i8( +    <vscale x 8 x i8> %0, +    ptr %1, +    <vscale x 8 x i8> %2, +    <vscale x 8 x i1> %3, +    iXLen %4) + +  ret void +} + +declare void @llvm.riscv.vsoxei.nxv16i8.nxv16i8( +  <vscale x 16 x i8>, +  ptr, +  <vscale x 16 x i8>, +  iXLen); + +define void @intrinsic_vsoxei_v_nxv16i8_nxv16i8_nxv16i8(<vscale x 16 x i8> %0, ptr %1, <vscale x 16 x i8> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vsoxei_v_nxv16i8_nxv16i8_nxv16i8: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e8, m2, ta, ma +; CHECK-NEXT:    vsoxei8.v v8, (a0), v10 +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsoxei.nxv16i8.nxv16i8( +    <vscale x 16 x i8> %0, +    ptr %1, +    <vscale x 16 x i8> %2, +    iXLen %3) + +  ret void +} + +declare void @llvm.riscv.vsoxei.mask.nxv16i8.nxv16i8( +  <vscale x 16 x i8>, +  ptr, +  <vscale x 16 x i8>, +  <vscale x 16 x i1>, +  iXLen); + +define void @intrinsic_vsoxei_mask_v_nxv16i8_nxv16i8_nxv16i8(<vscale x 16 x i8> %0, ptr %1, <vscale x 16 x i8> %2, <vscale x 16 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vsoxei_mask_v_nxv16i8_nxv16i8_nxv16i8: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e8, m2, ta, ma +; CHECK-NEXT:    vsoxei8.v v8, (a0), v10, v0.t +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsoxei.mask.nxv16i8.nxv16i8( +    <vscale x 16 x i8> %0, +    ptr %1, +    <vscale x 16 x i8> %2, +    <vscale x 16 x i1> %3, +    iXLen %4) + +  ret void +} + +declare void @llvm.riscv.vsoxei.nxv32i8.nxv32i8( +  <vscale x 32 x i8>, +  ptr, +  <vscale x 32 x i8>, +  iXLen); + +define void @intrinsic_vsoxei_v_nxv32i8_nxv32i8_nxv32i8(<vscale x 32 x i8> %0, ptr %1, <vscale x 32 x i8> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vsoxei_v_nxv32i8_nxv32i8_nxv32i8: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e8, m4, ta, ma +; CHECK-NEXT:    vsoxei8.v v8, (a0), v12 +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsoxei.nxv32i8.nxv32i8( +    <vscale x 32 x i8> %0, +    ptr %1, +    <vscale x 32 x i8> %2, +    iXLen %3) + +  ret void +} + +declare void @llvm.riscv.vsoxei.mask.nxv32i8.nxv32i8( +  <vscale x 32 x i8>, +  ptr, +  <vscale x 32 x i8>, +  <vscale x 32 x i1>, +  iXLen); + +define void @intrinsic_vsoxei_mask_v_nxv32i8_nxv32i8_nxv32i8(<vscale x 32 x i8> %0, ptr %1, <vscale x 32 x i8> %2, <vscale x 32 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vsoxei_mask_v_nxv32i8_nxv32i8_nxv32i8: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e8, m4, ta, ma +; CHECK-NEXT:    vsoxei8.v v8, (a0), v12, v0.t +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsoxei.mask.nxv32i8.nxv32i8( +    <vscale x 32 x i8> %0, +    ptr %1, +    <vscale x 32 x i8> %2, +    <vscale x 32 x i1> %3, +    iXLen %4) + +  ret void +} + +declare void @llvm.riscv.vsoxei.nxv64i8.nxv64i8( +  <vscale x 64 x i8>, +  ptr, +  <vscale x 64 x i8>, +  iXLen); + +define void @intrinsic_vsoxei_v_nxv64i8_nxv64i8_nxv64i8(<vscale x 64 x i8> %0, ptr %1, <vscale x 64 x i8> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vsoxei_v_nxv64i8_nxv64i8_nxv64i8: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e8, m8, ta, ma +; CHECK-NEXT:    vsoxei8.v v8, (a0), v16 +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsoxei.nxv64i8.nxv64i8( +    <vscale x 64 x i8> %0, +    ptr %1, +    <vscale x 64 x i8> %2, +    iXLen %3) + +  ret void +} + +declare void @llvm.riscv.vsoxei.mask.nxv64i8.nxv64i8( +  <vscale x 64 x i8>, +  ptr, +  <vscale x 64 x i8>, +  <vscale x 64 x i1>, +  iXLen); + +define void @intrinsic_vsoxei_mask_v_nxv64i8_nxv64i8_nxv64i8(<vscale x 64 x i8> %0, ptr %1, <vscale x 64 x i8> %2, <vscale x 64 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vsoxei_mask_v_nxv64i8_nxv64i8_nxv64i8: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e8, m8, ta, ma +; CHECK-NEXT:    vsoxei8.v v8, (a0), v16, v0.t +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsoxei.mask.nxv64i8.nxv64i8( +    <vscale x 64 x i8> %0, +    ptr %1, +    <vscale x 64 x i8> %2, +    <vscale x 64 x i1> %3, +    iXLen %4) + +  ret void +} + +declare void @llvm.riscv.vsoxei.nxv1i16.nxv1i8( +  <vscale x 1 x i16>, +  ptr, +  <vscale x 1 x i8>, +  iXLen); + +define void @intrinsic_vsoxei_v_nxv1i16_nxv1i16_nxv1i8(<vscale x 1 x i16> %0, ptr %1, <vscale x 1 x i8> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vsoxei_v_nxv1i16_nxv1i16_nxv1i8: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e16, mf4, ta, ma +; CHECK-NEXT:    vsoxei8.v v8, (a0), v9 +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsoxei.nxv1i16.nxv1i8( +    <vscale x 1 x i16> %0, +    ptr %1, +    <vscale x 1 x i8> %2, +    iXLen %3) + +  ret void +} + +declare void @llvm.riscv.vsoxei.mask.nxv1i16.nxv1i8( +  <vscale x 1 x i16>, +  ptr, +  <vscale x 1 x i8>, +  <vscale x 1 x i1>, +  iXLen); + +define void @intrinsic_vsoxei_mask_v_nxv1i16_nxv1i16_nxv1i8(<vscale x 1 x i16> %0, ptr %1, <vscale x 1 x i8> %2, <vscale x 1 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vsoxei_mask_v_nxv1i16_nxv1i16_nxv1i8: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e16, mf4, ta, ma +; CHECK-NEXT:    vsoxei8.v v8, (a0), v9, v0.t +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsoxei.mask.nxv1i16.nxv1i8( +    <vscale x 1 x i16> %0, +    ptr %1, +    <vscale x 1 x i8> %2, +    <vscale x 1 x i1> %3, +    iXLen %4) + +  ret void +} + +declare void @llvm.riscv.vsoxei.nxv2i16.nxv2i8( +  <vscale x 2 x i16>, +  ptr, +  <vscale x 2 x i8>, +  iXLen); + +define void @intrinsic_vsoxei_v_nxv2i16_nxv2i16_nxv2i8(<vscale x 2 x i16> %0, ptr %1, <vscale x 2 x i8> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vsoxei_v_nxv2i16_nxv2i16_nxv2i8: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e16, mf2, ta, ma +; CHECK-NEXT:    vsoxei8.v v8, (a0), v9 +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsoxei.nxv2i16.nxv2i8( +    <vscale x 2 x i16> %0, +    ptr %1, +    <vscale x 2 x i8> %2, +    iXLen %3) + +  ret void +} + +declare void @llvm.riscv.vsoxei.mask.nxv2i16.nxv2i8( +  <vscale x 2 x i16>, +  ptr, +  <vscale x 2 x i8>, +  <vscale x 2 x i1>, +  iXLen); + +define void @intrinsic_vsoxei_mask_v_nxv2i16_nxv2i16_nxv2i8(<vscale x 2 x i16> %0, ptr %1, <vscale x 2 x i8> %2, <vscale x 2 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vsoxei_mask_v_nxv2i16_nxv2i16_nxv2i8: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e16, mf2, ta, ma +; CHECK-NEXT:    vsoxei8.v v8, (a0), v9, v0.t +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsoxei.mask.nxv2i16.nxv2i8( +    <vscale x 2 x i16> %0, +    ptr %1, +    <vscale x 2 x i8> %2, +    <vscale x 2 x i1> %3, +    iXLen %4) + +  ret void +} + +declare void @llvm.riscv.vsoxei.nxv4i16.nxv4i8( +  <vscale x 4 x i16>, +  ptr, +  <vscale x 4 x i8>, +  iXLen); + +define void @intrinsic_vsoxei_v_nxv4i16_nxv4i16_nxv4i8(<vscale x 4 x i16> %0, ptr %1, <vscale x 4 x i8> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vsoxei_v_nxv4i16_nxv4i16_nxv4i8: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e16, m1, ta, ma +; CHECK-NEXT:    vsoxei8.v v8, (a0), v9 +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsoxei.nxv4i16.nxv4i8( +    <vscale x 4 x i16> %0, +    ptr %1, +    <vscale x 4 x i8> %2, +    iXLen %3) + +  ret void +} + +declare void @llvm.riscv.vsoxei.mask.nxv4i16.nxv4i8( +  <vscale x 4 x i16>, +  ptr, +  <vscale x 4 x i8>, +  <vscale x 4 x i1>, +  iXLen); + +define void @intrinsic_vsoxei_mask_v_nxv4i16_nxv4i16_nxv4i8(<vscale x 4 x i16> %0, ptr %1, <vscale x 4 x i8> %2, <vscale x 4 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vsoxei_mask_v_nxv4i16_nxv4i16_nxv4i8: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e16, m1, ta, ma +; CHECK-NEXT:    vsoxei8.v v8, (a0), v9, v0.t +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsoxei.mask.nxv4i16.nxv4i8( +    <vscale x 4 x i16> %0, +    ptr %1, +    <vscale x 4 x i8> %2, +    <vscale x 4 x i1> %3, +    iXLen %4) + +  ret void +} + +declare void @llvm.riscv.vsoxei.nxv8i16.nxv8i8( +  <vscale x 8 x i16>, +  ptr, +  <vscale x 8 x i8>, +  iXLen); + +define void @intrinsic_vsoxei_v_nxv8i16_nxv8i16_nxv8i8(<vscale x 8 x i16> %0, ptr %1, <vscale x 8 x i8> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vsoxei_v_nxv8i16_nxv8i16_nxv8i8: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e16, m2, ta, ma +; CHECK-NEXT:    vsoxei8.v v8, (a0), v10 +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsoxei.nxv8i16.nxv8i8( +    <vscale x 8 x i16> %0, +    ptr %1, +    <vscale x 8 x i8> %2, +    iXLen %3) + +  ret void +} + +declare void @llvm.riscv.vsoxei.mask.nxv8i16.nxv8i8( +  <vscale x 8 x i16>, +  ptr, +  <vscale x 8 x i8>, +  <vscale x 8 x i1>, +  iXLen); + +define void @intrinsic_vsoxei_mask_v_nxv8i16_nxv8i16_nxv8i8(<vscale x 8 x i16> %0, ptr %1, <vscale x 8 x i8> %2, <vscale x 8 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vsoxei_mask_v_nxv8i16_nxv8i16_nxv8i8: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e16, m2, ta, ma +; CHECK-NEXT:    vsoxei8.v v8, (a0), v10, v0.t +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsoxei.mask.nxv8i16.nxv8i8( +    <vscale x 8 x i16> %0, +    ptr %1, +    <vscale x 8 x i8> %2, +    <vscale x 8 x i1> %3, +    iXLen %4) + +  ret void +} + +declare void @llvm.riscv.vsoxei.nxv16i16.nxv16i8( +  <vscale x 16 x i16>, +  ptr, +  <vscale x 16 x i8>, +  iXLen); + +define void @intrinsic_vsoxei_v_nxv16i16_nxv16i16_nxv16i8(<vscale x 16 x i16> %0, ptr %1, <vscale x 16 x i8> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vsoxei_v_nxv16i16_nxv16i16_nxv16i8: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e16, m4, ta, ma +; CHECK-NEXT:    vsoxei8.v v8, (a0), v12 +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsoxei.nxv16i16.nxv16i8( +    <vscale x 16 x i16> %0, +    ptr %1, +    <vscale x 16 x i8> %2, +    iXLen %3) + +  ret void +} + +declare void @llvm.riscv.vsoxei.mask.nxv16i16.nxv16i8( +  <vscale x 16 x i16>, +  ptr, +  <vscale x 16 x i8>, +  <vscale x 16 x i1>, +  iXLen); + +define void @intrinsic_vsoxei_mask_v_nxv16i16_nxv16i16_nxv16i8(<vscale x 16 x i16> %0, ptr %1, <vscale x 16 x i8> %2, <vscale x 16 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vsoxei_mask_v_nxv16i16_nxv16i16_nxv16i8: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e16, m4, ta, ma +; CHECK-NEXT:    vsoxei8.v v8, (a0), v12, v0.t +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsoxei.mask.nxv16i16.nxv16i8( +    <vscale x 16 x i16> %0, +    ptr %1, +    <vscale x 16 x i8> %2, +    <vscale x 16 x i1> %3, +    iXLen %4) + +  ret void +} + +declare void @llvm.riscv.vsoxei.nxv32i16.nxv32i8( +  <vscale x 32 x i16>, +  ptr, +  <vscale x 32 x i8>, +  iXLen); + +define void @intrinsic_vsoxei_v_nxv32i16_nxv32i16_nxv32i8(<vscale x 32 x i16> %0, ptr %1, <vscale x 32 x i8> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vsoxei_v_nxv32i16_nxv32i16_nxv32i8: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e16, m8, ta, ma +; CHECK-NEXT:    vsoxei8.v v8, (a0), v16 +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsoxei.nxv32i16.nxv32i8( +    <vscale x 32 x i16> %0, +    ptr %1, +    <vscale x 32 x i8> %2, +    iXLen %3) + +  ret void +} + +declare void @llvm.riscv.vsoxei.mask.nxv32i16.nxv32i8( +  <vscale x 32 x i16>, +  ptr, +  <vscale x 32 x i8>, +  <vscale x 32 x i1>, +  iXLen); + +define void @intrinsic_vsoxei_mask_v_nxv32i16_nxv32i16_nxv32i8(<vscale x 32 x i16> %0, ptr %1, <vscale x 32 x i8> %2, <vscale x 32 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vsoxei_mask_v_nxv32i16_nxv32i16_nxv32i8: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e16, m8, ta, ma +; CHECK-NEXT:    vsoxei8.v v8, (a0), v16, v0.t +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsoxei.mask.nxv32i16.nxv32i8( +    <vscale x 32 x i16> %0, +    ptr %1, +    <vscale x 32 x i8> %2, +    <vscale x 32 x i1> %3, +    iXLen %4) + +  ret void +} + +declare void @llvm.riscv.vsoxei.nxv1i32.nxv1i8( +  <vscale x 1 x i32>, +  ptr, +  <vscale x 1 x i8>, +  iXLen); + +define void @intrinsic_vsoxei_v_nxv1i32_nxv1i32_nxv1i8(<vscale x 1 x i32> %0, ptr %1, <vscale x 1 x i8> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vsoxei_v_nxv1i32_nxv1i32_nxv1i8: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e32, mf2, ta, ma +; CHECK-NEXT:    vsoxei8.v v8, (a0), v9 +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsoxei.nxv1i32.nxv1i8( +    <vscale x 1 x i32> %0, +    ptr %1, +    <vscale x 1 x i8> %2, +    iXLen %3) + +  ret void +} + +declare void @llvm.riscv.vsoxei.mask.nxv1i32.nxv1i8( +  <vscale x 1 x i32>, +  ptr, +  <vscale x 1 x i8>, +  <vscale x 1 x i1>, +  iXLen); + +define void @intrinsic_vsoxei_mask_v_nxv1i32_nxv1i32_nxv1i8(<vscale x 1 x i32> %0, ptr %1, <vscale x 1 x i8> %2, <vscale x 1 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vsoxei_mask_v_nxv1i32_nxv1i32_nxv1i8: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e32, mf2, ta, ma +; CHECK-NEXT:    vsoxei8.v v8, (a0), v9, v0.t +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsoxei.mask.nxv1i32.nxv1i8( +    <vscale x 1 x i32> %0, +    ptr %1, +    <vscale x 1 x i8> %2, +    <vscale x 1 x i1> %3, +    iXLen %4) + +  ret void +} + +declare void @llvm.riscv.vsoxei.nxv2i32.nxv2i8( +  <vscale x 2 x i32>, +  ptr, +  <vscale x 2 x i8>, +  iXLen); + +define void @intrinsic_vsoxei_v_nxv2i32_nxv2i32_nxv2i8(<vscale x 2 x i32> %0, ptr %1, <vscale x 2 x i8> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vsoxei_v_nxv2i32_nxv2i32_nxv2i8: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e32, m1, ta, ma +; CHECK-NEXT:    vsoxei8.v v8, (a0), v9 +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsoxei.nxv2i32.nxv2i8( +    <vscale x 2 x i32> %0, +    ptr %1, +    <vscale x 2 x i8> %2, +    iXLen %3) + +  ret void +} + +declare void @llvm.riscv.vsoxei.mask.nxv2i32.nxv2i8( +  <vscale x 2 x i32>, +  ptr, +  <vscale x 2 x i8>, +  <vscale x 2 x i1>, +  iXLen); + +define void @intrinsic_vsoxei_mask_v_nxv2i32_nxv2i32_nxv2i8(<vscale x 2 x i32> %0, ptr %1, <vscale x 2 x i8> %2, <vscale x 2 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vsoxei_mask_v_nxv2i32_nxv2i32_nxv2i8: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e32, m1, ta, ma +; CHECK-NEXT:    vsoxei8.v v8, (a0), v9, v0.t +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsoxei.mask.nxv2i32.nxv2i8( +    <vscale x 2 x i32> %0, +    ptr %1, +    <vscale x 2 x i8> %2, +    <vscale x 2 x i1> %3, +    iXLen %4) + +  ret void +} + +declare void @llvm.riscv.vsoxei.nxv4i32.nxv4i8( +  <vscale x 4 x i32>, +  ptr, +  <vscale x 4 x i8>, +  iXLen); + +define void @intrinsic_vsoxei_v_nxv4i32_nxv4i32_nxv4i8(<vscale x 4 x i32> %0, ptr %1, <vscale x 4 x i8> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vsoxei_v_nxv4i32_nxv4i32_nxv4i8: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e32, m2, ta, ma +; CHECK-NEXT:    vsoxei8.v v8, (a0), v10 +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsoxei.nxv4i32.nxv4i8( +    <vscale x 4 x i32> %0, +    ptr %1, +    <vscale x 4 x i8> %2, +    iXLen %3) + +  ret void +} + +declare void @llvm.riscv.vsoxei.mask.nxv4i32.nxv4i8( +  <vscale x 4 x i32>, +  ptr, +  <vscale x 4 x i8>, +  <vscale x 4 x i1>, +  iXLen); + +define void @intrinsic_vsoxei_mask_v_nxv4i32_nxv4i32_nxv4i8(<vscale x 4 x i32> %0, ptr %1, <vscale x 4 x i8> %2, <vscale x 4 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vsoxei_mask_v_nxv4i32_nxv4i32_nxv4i8: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e32, m2, ta, ma +; CHECK-NEXT:    vsoxei8.v v8, (a0), v10, v0.t +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsoxei.mask.nxv4i32.nxv4i8( +    <vscale x 4 x i32> %0, +    ptr %1, +    <vscale x 4 x i8> %2, +    <vscale x 4 x i1> %3, +    iXLen %4) + +  ret void +} + +declare void @llvm.riscv.vsoxei.nxv8i32.nxv8i8( +  <vscale x 8 x i32>, +  ptr, +  <vscale x 8 x i8>, +  iXLen); + +define void @intrinsic_vsoxei_v_nxv8i32_nxv8i32_nxv8i8(<vscale x 8 x i32> %0, ptr %1, <vscale x 8 x i8> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vsoxei_v_nxv8i32_nxv8i32_nxv8i8: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e32, m4, ta, ma +; CHECK-NEXT:    vsoxei8.v v8, (a0), v12 +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsoxei.nxv8i32.nxv8i8( +    <vscale x 8 x i32> %0, +    ptr %1, +    <vscale x 8 x i8> %2, +    iXLen %3) + +  ret void +} + +declare void @llvm.riscv.vsoxei.mask.nxv8i32.nxv8i8( +  <vscale x 8 x i32>, +  ptr, +  <vscale x 8 x i8>, +  <vscale x 8 x i1>, +  iXLen); + +define void @intrinsic_vsoxei_mask_v_nxv8i32_nxv8i32_nxv8i8(<vscale x 8 x i32> %0, ptr %1, <vscale x 8 x i8> %2, <vscale x 8 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vsoxei_mask_v_nxv8i32_nxv8i32_nxv8i8: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e32, m4, ta, ma +; CHECK-NEXT:    vsoxei8.v v8, (a0), v12, v0.t +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsoxei.mask.nxv8i32.nxv8i8( +    <vscale x 8 x i32> %0, +    ptr %1, +    <vscale x 8 x i8> %2, +    <vscale x 8 x i1> %3, +    iXLen %4) + +  ret void +} + +declare void @llvm.riscv.vsoxei.nxv16i32.nxv16i8( +  <vscale x 16 x i32>, +  ptr, +  <vscale x 16 x i8>, +  iXLen); + +define void @intrinsic_vsoxei_v_nxv16i32_nxv16i32_nxv16i8(<vscale x 16 x i32> %0, ptr %1, <vscale x 16 x i8> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vsoxei_v_nxv16i32_nxv16i32_nxv16i8: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e32, m8, ta, ma +; CHECK-NEXT:    vsoxei8.v v8, (a0), v16 +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsoxei.nxv16i32.nxv16i8( +    <vscale x 16 x i32> %0, +    ptr %1, +    <vscale x 16 x i8> %2, +    iXLen %3) + +  ret void +} + +declare void @llvm.riscv.vsoxei.mask.nxv16i32.nxv16i8( +  <vscale x 16 x i32>, +  ptr, +  <vscale x 16 x i8>, +  <vscale x 16 x i1>, +  iXLen); + +define void @intrinsic_vsoxei_mask_v_nxv16i32_nxv16i32_nxv16i8(<vscale x 16 x i32> %0, ptr %1, <vscale x 16 x i8> %2, <vscale x 16 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vsoxei_mask_v_nxv16i32_nxv16i32_nxv16i8: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e32, m8, ta, ma +; CHECK-NEXT:    vsoxei8.v v8, (a0), v16, v0.t +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsoxei.mask.nxv16i32.nxv16i8( +    <vscale x 16 x i32> %0, +    ptr %1, +    <vscale x 16 x i8> %2, +    <vscale x 16 x i1> %3, +    iXLen %4) + +  ret void +} + +declare void @llvm.riscv.vsoxei.nxv1i64.nxv1i8( +  <vscale x 1 x i64>, +  ptr, +  <vscale x 1 x i8>, +  iXLen); + +define void @intrinsic_vsoxei_v_nxv1i64_nxv1i64_nxv1i8(<vscale x 1 x i64> %0, ptr %1, <vscale x 1 x i8> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vsoxei_v_nxv1i64_nxv1i64_nxv1i8: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e64, m1, ta, ma +; CHECK-NEXT:    vsoxei8.v v8, (a0), v9 +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsoxei.nxv1i64.nxv1i8( +    <vscale x 1 x i64> %0, +    ptr %1, +    <vscale x 1 x i8> %2, +    iXLen %3) + +  ret void +} + +declare void @llvm.riscv.vsoxei.mask.nxv1i64.nxv1i8( +  <vscale x 1 x i64>, +  ptr, +  <vscale x 1 x i8>, +  <vscale x 1 x i1>, +  iXLen); + +define void @intrinsic_vsoxei_mask_v_nxv1i64_nxv1i64_nxv1i8(<vscale x 1 x i64> %0, ptr %1, <vscale x 1 x i8> %2, <vscale x 1 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vsoxei_mask_v_nxv1i64_nxv1i64_nxv1i8: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e64, m1, ta, ma +; CHECK-NEXT:    vsoxei8.v v8, (a0), v9, v0.t +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsoxei.mask.nxv1i64.nxv1i8( +    <vscale x 1 x i64> %0, +    ptr %1, +    <vscale x 1 x i8> %2, +    <vscale x 1 x i1> %3, +    iXLen %4) + +  ret void +} + +declare void @llvm.riscv.vsoxei.nxv2i64.nxv2i8( +  <vscale x 2 x i64>, +  ptr, +  <vscale x 2 x i8>, +  iXLen); + +define void @intrinsic_vsoxei_v_nxv2i64_nxv2i64_nxv2i8(<vscale x 2 x i64> %0, ptr %1, <vscale x 2 x i8> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vsoxei_v_nxv2i64_nxv2i64_nxv2i8: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e64, m2, ta, ma +; CHECK-NEXT:    vsoxei8.v v8, (a0), v10 +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsoxei.nxv2i64.nxv2i8( +    <vscale x 2 x i64> %0, +    ptr %1, +    <vscale x 2 x i8> %2, +    iXLen %3) + +  ret void +} + +declare void @llvm.riscv.vsoxei.mask.nxv2i64.nxv2i8( +  <vscale x 2 x i64>, +  ptr, +  <vscale x 2 x i8>, +  <vscale x 2 x i1>, +  iXLen); + +define void @intrinsic_vsoxei_mask_v_nxv2i64_nxv2i64_nxv2i8(<vscale x 2 x i64> %0, ptr %1, <vscale x 2 x i8> %2, <vscale x 2 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vsoxei_mask_v_nxv2i64_nxv2i64_nxv2i8: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e64, m2, ta, ma +; CHECK-NEXT:    vsoxei8.v v8, (a0), v10, v0.t +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsoxei.mask.nxv2i64.nxv2i8( +    <vscale x 2 x i64> %0, +    ptr %1, +    <vscale x 2 x i8> %2, +    <vscale x 2 x i1> %3, +    iXLen %4) + +  ret void +} + +declare void @llvm.riscv.vsoxei.nxv4i64.nxv4i8( +  <vscale x 4 x i64>, +  ptr, +  <vscale x 4 x i8>, +  iXLen); + +define void @intrinsic_vsoxei_v_nxv4i64_nxv4i64_nxv4i8(<vscale x 4 x i64> %0, ptr %1, <vscale x 4 x i8> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vsoxei_v_nxv4i64_nxv4i64_nxv4i8: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e64, m4, ta, ma +; CHECK-NEXT:    vsoxei8.v v8, (a0), v12 +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsoxei.nxv4i64.nxv4i8( +    <vscale x 4 x i64> %0, +    ptr %1, +    <vscale x 4 x i8> %2, +    iXLen %3) + +  ret void +} + +declare void @llvm.riscv.vsoxei.mask.nxv4i64.nxv4i8( +  <vscale x 4 x i64>, +  ptr, +  <vscale x 4 x i8>, +  <vscale x 4 x i1>, +  iXLen); + +define void @intrinsic_vsoxei_mask_v_nxv4i64_nxv4i64_nxv4i8(<vscale x 4 x i64> %0, ptr %1, <vscale x 4 x i8> %2, <vscale x 4 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vsoxei_mask_v_nxv4i64_nxv4i64_nxv4i8: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e64, m4, ta, ma +; CHECK-NEXT:    vsoxei8.v v8, (a0), v12, v0.t +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsoxei.mask.nxv4i64.nxv4i8( +    <vscale x 4 x i64> %0, +    ptr %1, +    <vscale x 4 x i8> %2, +    <vscale x 4 x i1> %3, +    iXLen %4) + +  ret void +} + +declare void @llvm.riscv.vsoxei.nxv8i64.nxv8i8( +  <vscale x 8 x i64>, +  ptr, +  <vscale x 8 x i8>, +  iXLen); + +define void @intrinsic_vsoxei_v_nxv8i64_nxv8i64_nxv8i8(<vscale x 8 x i64> %0, ptr %1, <vscale x 8 x i8> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vsoxei_v_nxv8i64_nxv8i64_nxv8i8: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e64, m8, ta, ma +; CHECK-NEXT:    vsoxei8.v v8, (a0), v16 +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsoxei.nxv8i64.nxv8i8( +    <vscale x 8 x i64> %0, +    ptr %1, +    <vscale x 8 x i8> %2, +    iXLen %3) + +  ret void +} + +declare void @llvm.riscv.vsoxei.mask.nxv8i64.nxv8i8( +  <vscale x 8 x i64>, +  ptr, +  <vscale x 8 x i8>, +  <vscale x 8 x i1>, +  iXLen); + +define void @intrinsic_vsoxei_mask_v_nxv8i64_nxv8i64_nxv8i8(<vscale x 8 x i64> %0, ptr %1, <vscale x 8 x i8> %2, <vscale x 8 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vsoxei_mask_v_nxv8i64_nxv8i64_nxv8i8: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e64, m8, ta, ma +; CHECK-NEXT:    vsoxei8.v v8, (a0), v16, v0.t +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsoxei.mask.nxv8i64.nxv8i8( +    <vscale x 8 x i64> %0, +    ptr %1, +    <vscale x 8 x i8> %2, +    <vscale x 8 x i1> %3, +    iXLen %4) + +  ret void +} + +declare void @llvm.riscv.vsoxei.nxv1f16.nxv1i8( +  <vscale x 1 x half>, +  ptr, +  <vscale x 1 x i8>, +  iXLen); + +define void @intrinsic_vsoxei_v_nxv1f16_nxv1f16_nxv1i8(<vscale x 1 x half> %0, ptr %1, <vscale x 1 x i8> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vsoxei_v_nxv1f16_nxv1f16_nxv1i8: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e16, mf4, ta, ma +; CHECK-NEXT:    vsoxei8.v v8, (a0), v9 +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsoxei.nxv1f16.nxv1i8( +    <vscale x 1 x half> %0, +    ptr %1, +    <vscale x 1 x i8> %2, +    iXLen %3) + +  ret void +} + +declare void @llvm.riscv.vsoxei.mask.nxv1f16.nxv1i8( +  <vscale x 1 x half>, +  ptr, +  <vscale x 1 x i8>, +  <vscale x 1 x i1>, +  iXLen); + +define void @intrinsic_vsoxei_mask_v_nxv1f16_nxv1f16_nxv1i8(<vscale x 1 x half> %0, ptr %1, <vscale x 1 x i8> %2, <vscale x 1 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vsoxei_mask_v_nxv1f16_nxv1f16_nxv1i8: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e16, mf4, ta, ma +; CHECK-NEXT:    vsoxei8.v v8, (a0), v9, v0.t +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsoxei.mask.nxv1f16.nxv1i8( +    <vscale x 1 x half> %0, +    ptr %1, +    <vscale x 1 x i8> %2, +    <vscale x 1 x i1> %3, +    iXLen %4) + +  ret void +} + +declare void @llvm.riscv.vsoxei.nxv2f16.nxv2i8( +  <vscale x 2 x half>, +  ptr, +  <vscale x 2 x i8>, +  iXLen); + +define void @intrinsic_vsoxei_v_nxv2f16_nxv2f16_nxv2i8(<vscale x 2 x half> %0, ptr %1, <vscale x 2 x i8> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vsoxei_v_nxv2f16_nxv2f16_nxv2i8: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e16, mf2, ta, ma +; CHECK-NEXT:    vsoxei8.v v8, (a0), v9 +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsoxei.nxv2f16.nxv2i8( +    <vscale x 2 x half> %0, +    ptr %1, +    <vscale x 2 x i8> %2, +    iXLen %3) + +  ret void +} + +declare void @llvm.riscv.vsoxei.mask.nxv2f16.nxv2i8( +  <vscale x 2 x half>, +  ptr, +  <vscale x 2 x i8>, +  <vscale x 2 x i1>, +  iXLen); + +define void @intrinsic_vsoxei_mask_v_nxv2f16_nxv2f16_nxv2i8(<vscale x 2 x half> %0, ptr %1, <vscale x 2 x i8> %2, <vscale x 2 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vsoxei_mask_v_nxv2f16_nxv2f16_nxv2i8: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e16, mf2, ta, ma +; CHECK-NEXT:    vsoxei8.v v8, (a0), v9, v0.t +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsoxei.mask.nxv2f16.nxv2i8( +    <vscale x 2 x half> %0, +    ptr %1, +    <vscale x 2 x i8> %2, +    <vscale x 2 x i1> %3, +    iXLen %4) + +  ret void +} + +declare void @llvm.riscv.vsoxei.nxv4f16.nxv4i8( +  <vscale x 4 x half>, +  ptr, +  <vscale x 4 x i8>, +  iXLen); + +define void @intrinsic_vsoxei_v_nxv4f16_nxv4f16_nxv4i8(<vscale x 4 x half> %0, ptr %1, <vscale x 4 x i8> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vsoxei_v_nxv4f16_nxv4f16_nxv4i8: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e16, m1, ta, ma +; CHECK-NEXT:    vsoxei8.v v8, (a0), v9 +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsoxei.nxv4f16.nxv4i8( +    <vscale x 4 x half> %0, +    ptr %1, +    <vscale x 4 x i8> %2, +    iXLen %3) + +  ret void +} + +declare void @llvm.riscv.vsoxei.mask.nxv4f16.nxv4i8( +  <vscale x 4 x half>, +  ptr, +  <vscale x 4 x i8>, +  <vscale x 4 x i1>, +  iXLen); + +define void @intrinsic_vsoxei_mask_v_nxv4f16_nxv4f16_nxv4i8(<vscale x 4 x half> %0, ptr %1, <vscale x 4 x i8> %2, <vscale x 4 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vsoxei_mask_v_nxv4f16_nxv4f16_nxv4i8: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e16, m1, ta, ma +; CHECK-NEXT:    vsoxei8.v v8, (a0), v9, v0.t +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsoxei.mask.nxv4f16.nxv4i8( +    <vscale x 4 x half> %0, +    ptr %1, +    <vscale x 4 x i8> %2, +    <vscale x 4 x i1> %3, +    iXLen %4) + +  ret void +} + +declare void @llvm.riscv.vsoxei.nxv8f16.nxv8i8( +  <vscale x 8 x half>, +  ptr, +  <vscale x 8 x i8>, +  iXLen); + +define void @intrinsic_vsoxei_v_nxv8f16_nxv8f16_nxv8i8(<vscale x 8 x half> %0, ptr %1, <vscale x 8 x i8> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vsoxei_v_nxv8f16_nxv8f16_nxv8i8: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e16, m2, ta, ma +; CHECK-NEXT:    vsoxei8.v v8, (a0), v10 +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsoxei.nxv8f16.nxv8i8( +    <vscale x 8 x half> %0, +    ptr %1, +    <vscale x 8 x i8> %2, +    iXLen %3) + +  ret void +} + +declare void @llvm.riscv.vsoxei.mask.nxv8f16.nxv8i8( +  <vscale x 8 x half>, +  ptr, +  <vscale x 8 x i8>, +  <vscale x 8 x i1>, +  iXLen); + +define void @intrinsic_vsoxei_mask_v_nxv8f16_nxv8f16_nxv8i8(<vscale x 8 x half> %0, ptr %1, <vscale x 8 x i8> %2, <vscale x 8 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vsoxei_mask_v_nxv8f16_nxv8f16_nxv8i8: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e16, m2, ta, ma +; CHECK-NEXT:    vsoxei8.v v8, (a0), v10, v0.t +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsoxei.mask.nxv8f16.nxv8i8( +    <vscale x 8 x half> %0, +    ptr %1, +    <vscale x 8 x i8> %2, +    <vscale x 8 x i1> %3, +    iXLen %4) + +  ret void +} + +declare void @llvm.riscv.vsoxei.nxv16f16.nxv16i8( +  <vscale x 16 x half>, +  ptr, +  <vscale x 16 x i8>, +  iXLen); + +define void @intrinsic_vsoxei_v_nxv16f16_nxv16f16_nxv16i8(<vscale x 16 x half> %0, ptr %1, <vscale x 16 x i8> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vsoxei_v_nxv16f16_nxv16f16_nxv16i8: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e16, m4, ta, ma +; CHECK-NEXT:    vsoxei8.v v8, (a0), v12 +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsoxei.nxv16f16.nxv16i8( +    <vscale x 16 x half> %0, +    ptr %1, +    <vscale x 16 x i8> %2, +    iXLen %3) + +  ret void +} + +declare void @llvm.riscv.vsoxei.mask.nxv16f16.nxv16i8( +  <vscale x 16 x half>, +  ptr, +  <vscale x 16 x i8>, +  <vscale x 16 x i1>, +  iXLen); + +define void @intrinsic_vsoxei_mask_v_nxv16f16_nxv16f16_nxv16i8(<vscale x 16 x half> %0, ptr %1, <vscale x 16 x i8> %2, <vscale x 16 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vsoxei_mask_v_nxv16f16_nxv16f16_nxv16i8: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e16, m4, ta, ma +; CHECK-NEXT:    vsoxei8.v v8, (a0), v12, v0.t +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsoxei.mask.nxv16f16.nxv16i8( +    <vscale x 16 x half> %0, +    ptr %1, +    <vscale x 16 x i8> %2, +    <vscale x 16 x i1> %3, +    iXLen %4) + +  ret void +} + +declare void @llvm.riscv.vsoxei.nxv32f16.nxv32i8( +  <vscale x 32 x half>, +  ptr, +  <vscale x 32 x i8>, +  iXLen); + +define void @intrinsic_vsoxei_v_nxv32f16_nxv32f16_nxv32i8(<vscale x 32 x half> %0, ptr %1, <vscale x 32 x i8> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vsoxei_v_nxv32f16_nxv32f16_nxv32i8: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e16, m8, ta, ma +; CHECK-NEXT:    vsoxei8.v v8, (a0), v16 +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsoxei.nxv32f16.nxv32i8( +    <vscale x 32 x half> %0, +    ptr %1, +    <vscale x 32 x i8> %2, +    iXLen %3) + +  ret void +} + +declare void @llvm.riscv.vsoxei.mask.nxv32f16.nxv32i8( +  <vscale x 32 x half>, +  ptr, +  <vscale x 32 x i8>, +  <vscale x 32 x i1>, +  iXLen); + +define void @intrinsic_vsoxei_mask_v_nxv32f16_nxv32f16_nxv32i8(<vscale x 32 x half> %0, ptr %1, <vscale x 32 x i8> %2, <vscale x 32 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vsoxei_mask_v_nxv32f16_nxv32f16_nxv32i8: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e16, m8, ta, ma +; CHECK-NEXT:    vsoxei8.v v8, (a0), v16, v0.t +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsoxei.mask.nxv32f16.nxv32i8( +    <vscale x 32 x half> %0, +    ptr %1, +    <vscale x 32 x i8> %2, +    <vscale x 32 x i1> %3, +    iXLen %4) + +  ret void +} + +declare void @llvm.riscv.vsoxei.nxv1f32.nxv1i8( +  <vscale x 1 x float>, +  ptr, +  <vscale x 1 x i8>, +  iXLen); + +define void @intrinsic_vsoxei_v_nxv1f32_nxv1f32_nxv1i8(<vscale x 1 x float> %0, ptr %1, <vscale x 1 x i8> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vsoxei_v_nxv1f32_nxv1f32_nxv1i8: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e32, mf2, ta, ma +; CHECK-NEXT:    vsoxei8.v v8, (a0), v9 +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsoxei.nxv1f32.nxv1i8( +    <vscale x 1 x float> %0, +    ptr %1, +    <vscale x 1 x i8> %2, +    iXLen %3) + +  ret void +} + +declare void @llvm.riscv.vsoxei.mask.nxv1f32.nxv1i8( +  <vscale x 1 x float>, +  ptr, +  <vscale x 1 x i8>, +  <vscale x 1 x i1>, +  iXLen); + +define void @intrinsic_vsoxei_mask_v_nxv1f32_nxv1f32_nxv1i8(<vscale x 1 x float> %0, ptr %1, <vscale x 1 x i8> %2, <vscale x 1 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vsoxei_mask_v_nxv1f32_nxv1f32_nxv1i8: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e32, mf2, ta, ma +; CHECK-NEXT:    vsoxei8.v v8, (a0), v9, v0.t +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsoxei.mask.nxv1f32.nxv1i8( +    <vscale x 1 x float> %0, +    ptr %1, +    <vscale x 1 x i8> %2, +    <vscale x 1 x i1> %3, +    iXLen %4) + +  ret void +} + +declare void @llvm.riscv.vsoxei.nxv2f32.nxv2i8( +  <vscale x 2 x float>, +  ptr, +  <vscale x 2 x i8>, +  iXLen); + +define void @intrinsic_vsoxei_v_nxv2f32_nxv2f32_nxv2i8(<vscale x 2 x float> %0, ptr %1, <vscale x 2 x i8> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vsoxei_v_nxv2f32_nxv2f32_nxv2i8: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e32, m1, ta, ma +; CHECK-NEXT:    vsoxei8.v v8, (a0), v9 +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsoxei.nxv2f32.nxv2i8( +    <vscale x 2 x float> %0, +    ptr %1, +    <vscale x 2 x i8> %2, +    iXLen %3) + +  ret void +} + +declare void @llvm.riscv.vsoxei.mask.nxv2f32.nxv2i8( +  <vscale x 2 x float>, +  ptr, +  <vscale x 2 x i8>, +  <vscale x 2 x i1>, +  iXLen); + +define void @intrinsic_vsoxei_mask_v_nxv2f32_nxv2f32_nxv2i8(<vscale x 2 x float> %0, ptr %1, <vscale x 2 x i8> %2, <vscale x 2 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vsoxei_mask_v_nxv2f32_nxv2f32_nxv2i8: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e32, m1, ta, ma +; CHECK-NEXT:    vsoxei8.v v8, (a0), v9, v0.t +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsoxei.mask.nxv2f32.nxv2i8( +    <vscale x 2 x float> %0, +    ptr %1, +    <vscale x 2 x i8> %2, +    <vscale x 2 x i1> %3, +    iXLen %4) + +  ret void +} + +declare void @llvm.riscv.vsoxei.nxv4f32.nxv4i8( +  <vscale x 4 x float>, +  ptr, +  <vscale x 4 x i8>, +  iXLen); + +define void @intrinsic_vsoxei_v_nxv4f32_nxv4f32_nxv4i8(<vscale x 4 x float> %0, ptr %1, <vscale x 4 x i8> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vsoxei_v_nxv4f32_nxv4f32_nxv4i8: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e32, m2, ta, ma +; CHECK-NEXT:    vsoxei8.v v8, (a0), v10 +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsoxei.nxv4f32.nxv4i8( +    <vscale x 4 x float> %0, +    ptr %1, +    <vscale x 4 x i8> %2, +    iXLen %3) + +  ret void +} + +declare void @llvm.riscv.vsoxei.mask.nxv4f32.nxv4i8( +  <vscale x 4 x float>, +  ptr, +  <vscale x 4 x i8>, +  <vscale x 4 x i1>, +  iXLen); + +define void @intrinsic_vsoxei_mask_v_nxv4f32_nxv4f32_nxv4i8(<vscale x 4 x float> %0, ptr %1, <vscale x 4 x i8> %2, <vscale x 4 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vsoxei_mask_v_nxv4f32_nxv4f32_nxv4i8: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e32, m2, ta, ma +; CHECK-NEXT:    vsoxei8.v v8, (a0), v10, v0.t +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsoxei.mask.nxv4f32.nxv4i8( +    <vscale x 4 x float> %0, +    ptr %1, +    <vscale x 4 x i8> %2, +    <vscale x 4 x i1> %3, +    iXLen %4) + +  ret void +} + +declare void @llvm.riscv.vsoxei.nxv8f32.nxv8i8( +  <vscale x 8 x float>, +  ptr, +  <vscale x 8 x i8>, +  iXLen); + +define void @intrinsic_vsoxei_v_nxv8f32_nxv8f32_nxv8i8(<vscale x 8 x float> %0, ptr %1, <vscale x 8 x i8> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vsoxei_v_nxv8f32_nxv8f32_nxv8i8: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e32, m4, ta, ma +; CHECK-NEXT:    vsoxei8.v v8, (a0), v12 +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsoxei.nxv8f32.nxv8i8( +    <vscale x 8 x float> %0, +    ptr %1, +    <vscale x 8 x i8> %2, +    iXLen %3) + +  ret void +} + +declare void @llvm.riscv.vsoxei.mask.nxv8f32.nxv8i8( +  <vscale x 8 x float>, +  ptr, +  <vscale x 8 x i8>, +  <vscale x 8 x i1>, +  iXLen); + +define void @intrinsic_vsoxei_mask_v_nxv8f32_nxv8f32_nxv8i8(<vscale x 8 x float> %0, ptr %1, <vscale x 8 x i8> %2, <vscale x 8 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vsoxei_mask_v_nxv8f32_nxv8f32_nxv8i8: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e32, m4, ta, ma +; CHECK-NEXT:    vsoxei8.v v8, (a0), v12, v0.t +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsoxei.mask.nxv8f32.nxv8i8( +    <vscale x 8 x float> %0, +    ptr %1, +    <vscale x 8 x i8> %2, +    <vscale x 8 x i1> %3, +    iXLen %4) + +  ret void +} + +declare void @llvm.riscv.vsoxei.nxv16f32.nxv16i8( +  <vscale x 16 x float>, +  ptr, +  <vscale x 16 x i8>, +  iXLen); + +define void @intrinsic_vsoxei_v_nxv16f32_nxv16f32_nxv16i8(<vscale x 16 x float> %0, ptr %1, <vscale x 16 x i8> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vsoxei_v_nxv16f32_nxv16f32_nxv16i8: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e32, m8, ta, ma +; CHECK-NEXT:    vsoxei8.v v8, (a0), v16 +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsoxei.nxv16f32.nxv16i8( +    <vscale x 16 x float> %0, +    ptr %1, +    <vscale x 16 x i8> %2, +    iXLen %3) + +  ret void +} + +declare void @llvm.riscv.vsoxei.mask.nxv16f32.nxv16i8( +  <vscale x 16 x float>, +  ptr, +  <vscale x 16 x i8>, +  <vscale x 16 x i1>, +  iXLen); + +define void @intrinsic_vsoxei_mask_v_nxv16f32_nxv16f32_nxv16i8(<vscale x 16 x float> %0, ptr %1, <vscale x 16 x i8> %2, <vscale x 16 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vsoxei_mask_v_nxv16f32_nxv16f32_nxv16i8: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e32, m8, ta, ma +; CHECK-NEXT:    vsoxei8.v v8, (a0), v16, v0.t +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsoxei.mask.nxv16f32.nxv16i8( +    <vscale x 16 x float> %0, +    ptr %1, +    <vscale x 16 x i8> %2, +    <vscale x 16 x i1> %3, +    iXLen %4) + +  ret void +} + +declare void @llvm.riscv.vsoxei.nxv1f64.nxv1i8( +  <vscale x 1 x double>, +  ptr, +  <vscale x 1 x i8>, +  iXLen); + +define void @intrinsic_vsoxei_v_nxv1f64_nxv1f64_nxv1i8(<vscale x 1 x double> %0, ptr %1, <vscale x 1 x i8> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vsoxei_v_nxv1f64_nxv1f64_nxv1i8: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e64, m1, ta, ma +; CHECK-NEXT:    vsoxei8.v v8, (a0), v9 +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsoxei.nxv1f64.nxv1i8( +    <vscale x 1 x double> %0, +    ptr %1, +    <vscale x 1 x i8> %2, +    iXLen %3) + +  ret void +} + +declare void @llvm.riscv.vsoxei.mask.nxv1f64.nxv1i8( +  <vscale x 1 x double>, +  ptr, +  <vscale x 1 x i8>, +  <vscale x 1 x i1>, +  iXLen); + +define void @intrinsic_vsoxei_mask_v_nxv1f64_nxv1f64_nxv1i8(<vscale x 1 x double> %0, ptr %1, <vscale x 1 x i8> %2, <vscale x 1 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vsoxei_mask_v_nxv1f64_nxv1f64_nxv1i8: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e64, m1, ta, ma +; CHECK-NEXT:    vsoxei8.v v8, (a0), v9, v0.t +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsoxei.mask.nxv1f64.nxv1i8( +    <vscale x 1 x double> %0, +    ptr %1, +    <vscale x 1 x i8> %2, +    <vscale x 1 x i1> %3, +    iXLen %4) + +  ret void +} + +declare void @llvm.riscv.vsoxei.nxv2f64.nxv2i8( +  <vscale x 2 x double>, +  ptr, +  <vscale x 2 x i8>, +  iXLen); + +define void @intrinsic_vsoxei_v_nxv2f64_nxv2f64_nxv2i8(<vscale x 2 x double> %0, ptr %1, <vscale x 2 x i8> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vsoxei_v_nxv2f64_nxv2f64_nxv2i8: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e64, m2, ta, ma +; CHECK-NEXT:    vsoxei8.v v8, (a0), v10 +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsoxei.nxv2f64.nxv2i8( +    <vscale x 2 x double> %0, +    ptr %1, +    <vscale x 2 x i8> %2, +    iXLen %3) + +  ret void +} + +declare void @llvm.riscv.vsoxei.mask.nxv2f64.nxv2i8( +  <vscale x 2 x double>, +  ptr, +  <vscale x 2 x i8>, +  <vscale x 2 x i1>, +  iXLen); + +define void @intrinsic_vsoxei_mask_v_nxv2f64_nxv2f64_nxv2i8(<vscale x 2 x double> %0, ptr %1, <vscale x 2 x i8> %2, <vscale x 2 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vsoxei_mask_v_nxv2f64_nxv2f64_nxv2i8: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e64, m2, ta, ma +; CHECK-NEXT:    vsoxei8.v v8, (a0), v10, v0.t +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsoxei.mask.nxv2f64.nxv2i8( +    <vscale x 2 x double> %0, +    ptr %1, +    <vscale x 2 x i8> %2, +    <vscale x 2 x i1> %3, +    iXLen %4) + +  ret void +} + +declare void @llvm.riscv.vsoxei.nxv4f64.nxv4i8( +  <vscale x 4 x double>, +  ptr, +  <vscale x 4 x i8>, +  iXLen); + +define void @intrinsic_vsoxei_v_nxv4f64_nxv4f64_nxv4i8(<vscale x 4 x double> %0, ptr %1, <vscale x 4 x i8> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vsoxei_v_nxv4f64_nxv4f64_nxv4i8: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e64, m4, ta, ma +; CHECK-NEXT:    vsoxei8.v v8, (a0), v12 +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsoxei.nxv4f64.nxv4i8( +    <vscale x 4 x double> %0, +    ptr %1, +    <vscale x 4 x i8> %2, +    iXLen %3) + +  ret void +} + +declare void @llvm.riscv.vsoxei.mask.nxv4f64.nxv4i8( +  <vscale x 4 x double>, +  ptr, +  <vscale x 4 x i8>, +  <vscale x 4 x i1>, +  iXLen); + +define void @intrinsic_vsoxei_mask_v_nxv4f64_nxv4f64_nxv4i8(<vscale x 4 x double> %0, ptr %1, <vscale x 4 x i8> %2, <vscale x 4 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vsoxei_mask_v_nxv4f64_nxv4f64_nxv4i8: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e64, m4, ta, ma +; CHECK-NEXT:    vsoxei8.v v8, (a0), v12, v0.t +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsoxei.mask.nxv4f64.nxv4i8( +    <vscale x 4 x double> %0, +    ptr %1, +    <vscale x 4 x i8> %2, +    <vscale x 4 x i1> %3, +    iXLen %4) + +  ret void +} + +declare void @llvm.riscv.vsoxei.nxv8f64.nxv8i8( +  <vscale x 8 x double>, +  ptr, +  <vscale x 8 x i8>, +  iXLen); + +define void @intrinsic_vsoxei_v_nxv8f64_nxv8f64_nxv8i8(<vscale x 8 x double> %0, ptr %1, <vscale x 8 x i8> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vsoxei_v_nxv8f64_nxv8f64_nxv8i8: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e64, m8, ta, ma +; CHECK-NEXT:    vsoxei8.v v8, (a0), v16 +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsoxei.nxv8f64.nxv8i8( +    <vscale x 8 x double> %0, +    ptr %1, +    <vscale x 8 x i8> %2, +    iXLen %3) + +  ret void +} + +declare void @llvm.riscv.vsoxei.mask.nxv8f64.nxv8i8( +  <vscale x 8 x double>, +  ptr, +  <vscale x 8 x i8>, +  <vscale x 8 x i1>, +  iXLen); + +define void @intrinsic_vsoxei_mask_v_nxv8f64_nxv8f64_nxv8i8(<vscale x 8 x double> %0, ptr %1, <vscale x 8 x i8> %2, <vscale x 8 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vsoxei_mask_v_nxv8f64_nxv8f64_nxv8i8: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e64, m8, ta, ma +; CHECK-NEXT:    vsoxei8.v v8, (a0), v16, v0.t +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsoxei.mask.nxv8f64.nxv8i8( +    <vscale x 8 x double> %0, +    ptr %1, +    <vscale x 8 x i8> %2, +    <vscale x 8 x i1> %3, +    iXLen %4) + +  ret void +} diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/rvv/vsuxei-rv64.ll b/llvm/test/CodeGen/RISCV/GlobalISel/rvv/vsuxei-rv64.ll new file mode 100644 index 0000000..9bd272a --- /dev/null +++ b/llvm/test/CodeGen/RISCV/GlobalISel/rvv/vsuxei-rv64.ll @@ -0,0 +1,1310 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=riscv64 -mattr=+v,+zvfh,+zvfbfmin -global-isel -verify-machineinstrs \ +; RUN:   < %s | FileCheck %s + +; The intrinsics are not supported with RV32. + +declare void @llvm.riscv.vsuxei.nxv1i8.nxv1i64( +  <vscale x 1 x i8>, +  ptr, +  <vscale x 1 x i64>, +  i64); + +define void @intrinsic_vsuxei_v_nxv1i8_nxv1i8_nxv1i64(<vscale x 1 x i8> %0, ptr %1, <vscale x 1 x i64> %2, i64 %3) nounwind { +; CHECK-LABEL: intrinsic_vsuxei_v_nxv1i8_nxv1i8_nxv1i64: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e8, mf8, ta, ma +; CHECK-NEXT:    vsuxei64.v v8, (a0), v9 +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsuxei.nxv1i8.nxv1i64( +    <vscale x 1 x i8> %0, +    ptr %1, +    <vscale x 1 x i64> %2, +    i64 %3) + +  ret void +} + +declare void @llvm.riscv.vsuxei.mask.nxv1i8.nxv1i64( +  <vscale x 1 x i8>, +  ptr, +  <vscale x 1 x i64>, +  <vscale x 1 x i1>, +  i64); + +define void @intrinsic_vsuxei_mask_v_nxv1i8_nxv1i8_nxv1i64(<vscale x 1 x i8> %0, ptr %1, <vscale x 1 x i64> %2, <vscale x 1 x i1> %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vsuxei_mask_v_nxv1i8_nxv1i8_nxv1i64: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e8, mf8, ta, ma +; CHECK-NEXT:    vsuxei64.v v8, (a0), v9, v0.t +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsuxei.mask.nxv1i8.nxv1i64( +    <vscale x 1 x i8> %0, +    ptr %1, +    <vscale x 1 x i64> %2, +    <vscale x 1 x i1> %3, +    i64 %4) + +  ret void +} + +define void @intrinsic_vsuxei_allonesmask_v_nxv1i8_nxv1i8_nxv1i64(<vscale x 1 x i8> %0, ptr %1, <vscale x 1 x i64> %2, <vscale x 1 x i1> %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vsuxei_allonesmask_v_nxv1i8_nxv1i8_nxv1i64: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e8, mf8, ta, ma +; CHECK-NEXT:    vsuxei64.v v8, (a0), v9 +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsuxei.mask.nxv1i8.nxv1i64( +    <vscale x 1 x i8> %0, +    ptr %1, +    <vscale x 1 x i64> %2, +    <vscale x 1 x i1> splat (i1 true), +    i64 %4) + +  ret void +} + +declare void @llvm.riscv.vsuxei.nxv2i8.nxv2i64( +  <vscale x 2 x i8>, +  ptr, +  <vscale x 2 x i64>, +  i64); + +define void @intrinsic_vsuxei_v_nxv2i8_nxv2i8_nxv2i64(<vscale x 2 x i8> %0, ptr %1, <vscale x 2 x i64> %2, i64 %3) nounwind { +; CHECK-LABEL: intrinsic_vsuxei_v_nxv2i8_nxv2i8_nxv2i64: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e8, mf4, ta, ma +; CHECK-NEXT:    vsuxei64.v v8, (a0), v10 +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsuxei.nxv2i8.nxv2i64( +    <vscale x 2 x i8> %0, +    ptr %1, +    <vscale x 2 x i64> %2, +    i64 %3) + +  ret void +} + +declare void @llvm.riscv.vsuxei.mask.nxv2i8.nxv2i64( +  <vscale x 2 x i8>, +  ptr, +  <vscale x 2 x i64>, +  <vscale x 2 x i1>, +  i64); + +define void @intrinsic_vsuxei_mask_v_nxv2i8_nxv2i8_nxv2i64(<vscale x 2 x i8> %0, ptr %1, <vscale x 2 x i64> %2, <vscale x 2 x i1> %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vsuxei_mask_v_nxv2i8_nxv2i8_nxv2i64: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e8, mf4, ta, ma +; CHECK-NEXT:    vsuxei64.v v8, (a0), v10, v0.t +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsuxei.mask.nxv2i8.nxv2i64( +    <vscale x 2 x i8> %0, +    ptr %1, +    <vscale x 2 x i64> %2, +    <vscale x 2 x i1> %3, +    i64 %4) + +  ret void +} + +declare void @llvm.riscv.vsuxei.nxv4i8.nxv4i64( +  <vscale x 4 x i8>, +  ptr, +  <vscale x 4 x i64>, +  i64); + +define void @intrinsic_vsuxei_v_nxv4i8_nxv4i8_nxv4i64(<vscale x 4 x i8> %0, ptr %1, <vscale x 4 x i64> %2, i64 %3) nounwind { +; CHECK-LABEL: intrinsic_vsuxei_v_nxv4i8_nxv4i8_nxv4i64: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e8, mf2, ta, ma +; CHECK-NEXT:    vsuxei64.v v8, (a0), v12 +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsuxei.nxv4i8.nxv4i64( +    <vscale x 4 x i8> %0, +    ptr %1, +    <vscale x 4 x i64> %2, +    i64 %3) + +  ret void +} + +declare void @llvm.riscv.vsuxei.mask.nxv4i8.nxv4i64( +  <vscale x 4 x i8>, +  ptr, +  <vscale x 4 x i64>, +  <vscale x 4 x i1>, +  i64); + +define void @intrinsic_vsuxei_mask_v_nxv4i8_nxv4i8_nxv4i64(<vscale x 4 x i8> %0, ptr %1, <vscale x 4 x i64> %2, <vscale x 4 x i1> %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vsuxei_mask_v_nxv4i8_nxv4i8_nxv4i64: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e8, mf2, ta, ma +; CHECK-NEXT:    vsuxei64.v v8, (a0), v12, v0.t +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsuxei.mask.nxv4i8.nxv4i64( +    <vscale x 4 x i8> %0, +    ptr %1, +    <vscale x 4 x i64> %2, +    <vscale x 4 x i1> %3, +    i64 %4) + +  ret void +} + +declare void @llvm.riscv.vsuxei.nxv8i8.nxv8i64( +  <vscale x 8 x i8>, +  ptr, +  <vscale x 8 x i64>, +  i64); + +define void @intrinsic_vsuxei_v_nxv8i8_nxv8i8_nxv8i64(<vscale x 8 x i8> %0, ptr %1, <vscale x 8 x i64> %2, i64 %3) nounwind { +; CHECK-LABEL: intrinsic_vsuxei_v_nxv8i8_nxv8i8_nxv8i64: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e8, m1, ta, ma +; CHECK-NEXT:    vsuxei64.v v8, (a0), v16 +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsuxei.nxv8i8.nxv8i64( +    <vscale x 8 x i8> %0, +    ptr %1, +    <vscale x 8 x i64> %2, +    i64 %3) + +  ret void +} + +declare void @llvm.riscv.vsuxei.mask.nxv8i8.nxv8i64( +  <vscale x 8 x i8>, +  ptr, +  <vscale x 8 x i64>, +  <vscale x 8 x i1>, +  i64); + +define void @intrinsic_vsuxei_mask_v_nxv8i8_nxv8i8_nxv8i64(<vscale x 8 x i8> %0, ptr %1, <vscale x 8 x i64> %2, <vscale x 8 x i1> %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vsuxei_mask_v_nxv8i8_nxv8i8_nxv8i64: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e8, m1, ta, ma +; CHECK-NEXT:    vsuxei64.v v8, (a0), v16, v0.t +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsuxei.mask.nxv8i8.nxv8i64( +    <vscale x 8 x i8> %0, +    ptr %1, +    <vscale x 8 x i64> %2, +    <vscale x 8 x i1> %3, +    i64 %4) + +  ret void +} + +declare void @llvm.riscv.vsuxei.nxv1i16.nxv1i64( +  <vscale x 1 x i16>, +  ptr, +  <vscale x 1 x i64>, +  i64); + +define void @intrinsic_vsuxei_v_nxv1i16_nxv1i16_nxv1i64(<vscale x 1 x i16> %0, ptr %1, <vscale x 1 x i64> %2, i64 %3) nounwind { +; CHECK-LABEL: intrinsic_vsuxei_v_nxv1i16_nxv1i16_nxv1i64: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e16, mf4, ta, ma +; CHECK-NEXT:    vsuxei64.v v8, (a0), v9 +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsuxei.nxv1i16.nxv1i64( +    <vscale x 1 x i16> %0, +    ptr %1, +    <vscale x 1 x i64> %2, +    i64 %3) + +  ret void +} + +declare void @llvm.riscv.vsuxei.mask.nxv1i16.nxv1i64( +  <vscale x 1 x i16>, +  ptr, +  <vscale x 1 x i64>, +  <vscale x 1 x i1>, +  i64); + +define void @intrinsic_vsuxei_mask_v_nxv1i16_nxv1i16_nxv1i64(<vscale x 1 x i16> %0, ptr %1, <vscale x 1 x i64> %2, <vscale x 1 x i1> %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vsuxei_mask_v_nxv1i16_nxv1i16_nxv1i64: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e16, mf4, ta, ma +; CHECK-NEXT:    vsuxei64.v v8, (a0), v9, v0.t +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsuxei.mask.nxv1i16.nxv1i64( +    <vscale x 1 x i16> %0, +    ptr %1, +    <vscale x 1 x i64> %2, +    <vscale x 1 x i1> %3, +    i64 %4) + +  ret void +} + +declare void @llvm.riscv.vsuxei.nxv2i16.nxv2i64( +  <vscale x 2 x i16>, +  ptr, +  <vscale x 2 x i64>, +  i64); + +define void @intrinsic_vsuxei_v_nxv2i16_nxv2i16_nxv2i64(<vscale x 2 x i16> %0, ptr %1, <vscale x 2 x i64> %2, i64 %3) nounwind { +; CHECK-LABEL: intrinsic_vsuxei_v_nxv2i16_nxv2i16_nxv2i64: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e16, mf2, ta, ma +; CHECK-NEXT:    vsuxei64.v v8, (a0), v10 +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsuxei.nxv2i16.nxv2i64( +    <vscale x 2 x i16> %0, +    ptr %1, +    <vscale x 2 x i64> %2, +    i64 %3) + +  ret void +} + +declare void @llvm.riscv.vsuxei.mask.nxv2i16.nxv2i64( +  <vscale x 2 x i16>, +  ptr, +  <vscale x 2 x i64>, +  <vscale x 2 x i1>, +  i64); + +define void @intrinsic_vsuxei_mask_v_nxv2i16_nxv2i16_nxv2i64(<vscale x 2 x i16> %0, ptr %1, <vscale x 2 x i64> %2, <vscale x 2 x i1> %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vsuxei_mask_v_nxv2i16_nxv2i16_nxv2i64: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e16, mf2, ta, ma +; CHECK-NEXT:    vsuxei64.v v8, (a0), v10, v0.t +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsuxei.mask.nxv2i16.nxv2i64( +    <vscale x 2 x i16> %0, +    ptr %1, +    <vscale x 2 x i64> %2, +    <vscale x 2 x i1> %3, +    i64 %4) + +  ret void +} + +declare void @llvm.riscv.vsuxei.nxv4i16.nxv4i64( +  <vscale x 4 x i16>, +  ptr, +  <vscale x 4 x i64>, +  i64); + +define void @intrinsic_vsuxei_v_nxv4i16_nxv4i16_nxv4i64(<vscale x 4 x i16> %0, ptr %1, <vscale x 4 x i64> %2, i64 %3) nounwind { +; CHECK-LABEL: intrinsic_vsuxei_v_nxv4i16_nxv4i16_nxv4i64: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e16, m1, ta, ma +; CHECK-NEXT:    vsuxei64.v v8, (a0), v12 +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsuxei.nxv4i16.nxv4i64( +    <vscale x 4 x i16> %0, +    ptr %1, +    <vscale x 4 x i64> %2, +    i64 %3) + +  ret void +} + +declare void @llvm.riscv.vsuxei.mask.nxv4i16.nxv4i64( +  <vscale x 4 x i16>, +  ptr, +  <vscale x 4 x i64>, +  <vscale x 4 x i1>, +  i64); + +define void @intrinsic_vsuxei_mask_v_nxv4i16_nxv4i16_nxv4i64(<vscale x 4 x i16> %0, ptr %1, <vscale x 4 x i64> %2, <vscale x 4 x i1> %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vsuxei_mask_v_nxv4i16_nxv4i16_nxv4i64: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e16, m1, ta, ma +; CHECK-NEXT:    vsuxei64.v v8, (a0), v12, v0.t +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsuxei.mask.nxv4i16.nxv4i64( +    <vscale x 4 x i16> %0, +    ptr %1, +    <vscale x 4 x i64> %2, +    <vscale x 4 x i1> %3, +    i64 %4) + +  ret void +} + +declare void @llvm.riscv.vsuxei.nxv8i16.nxv8i64( +  <vscale x 8 x i16>, +  ptr, +  <vscale x 8 x i64>, +  i64); + +define void @intrinsic_vsuxei_v_nxv8i16_nxv8i16_nxv8i64(<vscale x 8 x i16> %0, ptr %1, <vscale x 8 x i64> %2, i64 %3) nounwind { +; CHECK-LABEL: intrinsic_vsuxei_v_nxv8i16_nxv8i16_nxv8i64: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e16, m2, ta, ma +; CHECK-NEXT:    vsuxei64.v v8, (a0), v16 +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsuxei.nxv8i16.nxv8i64( +    <vscale x 8 x i16> %0, +    ptr %1, +    <vscale x 8 x i64> %2, +    i64 %3) + +  ret void +} + +declare void @llvm.riscv.vsuxei.mask.nxv8i16.nxv8i64( +  <vscale x 8 x i16>, +  ptr, +  <vscale x 8 x i64>, +  <vscale x 8 x i1>, +  i64); + +define void @intrinsic_vsuxei_mask_v_nxv8i16_nxv8i16_nxv8i64(<vscale x 8 x i16> %0, ptr %1, <vscale x 8 x i64> %2, <vscale x 8 x i1> %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vsuxei_mask_v_nxv8i16_nxv8i16_nxv8i64: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e16, m2, ta, ma +; CHECK-NEXT:    vsuxei64.v v8, (a0), v16, v0.t +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsuxei.mask.nxv8i16.nxv8i64( +    <vscale x 8 x i16> %0, +    ptr %1, +    <vscale x 8 x i64> %2, +    <vscale x 8 x i1> %3, +    i64 %4) + +  ret void +} + +declare void @llvm.riscv.vsuxei.nxv1i32.nxv1i64( +  <vscale x 1 x i32>, +  ptr, +  <vscale x 1 x i64>, +  i64); + +define void @intrinsic_vsuxei_v_nxv1i32_nxv1i32_nxv1i64(<vscale x 1 x i32> %0, ptr %1, <vscale x 1 x i64> %2, i64 %3) nounwind { +; CHECK-LABEL: intrinsic_vsuxei_v_nxv1i32_nxv1i32_nxv1i64: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e32, mf2, ta, ma +; CHECK-NEXT:    vsuxei64.v v8, (a0), v9 +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsuxei.nxv1i32.nxv1i64( +    <vscale x 1 x i32> %0, +    ptr %1, +    <vscale x 1 x i64> %2, +    i64 %3) + +  ret void +} + +declare void @llvm.riscv.vsuxei.mask.nxv1i32.nxv1i64( +  <vscale x 1 x i32>, +  ptr, +  <vscale x 1 x i64>, +  <vscale x 1 x i1>, +  i64); + +define void @intrinsic_vsuxei_mask_v_nxv1i32_nxv1i32_nxv1i64(<vscale x 1 x i32> %0, ptr %1, <vscale x 1 x i64> %2, <vscale x 1 x i1> %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vsuxei_mask_v_nxv1i32_nxv1i32_nxv1i64: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e32, mf2, ta, ma +; CHECK-NEXT:    vsuxei64.v v8, (a0), v9, v0.t +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsuxei.mask.nxv1i32.nxv1i64( +    <vscale x 1 x i32> %0, +    ptr %1, +    <vscale x 1 x i64> %2, +    <vscale x 1 x i1> %3, +    i64 %4) + +  ret void +} + +declare void @llvm.riscv.vsuxei.nxv2i32.nxv2i64( +  <vscale x 2 x i32>, +  ptr, +  <vscale x 2 x i64>, +  i64); + +define void @intrinsic_vsuxei_v_nxv2i32_nxv2i32_nxv2i64(<vscale x 2 x i32> %0, ptr %1, <vscale x 2 x i64> %2, i64 %3) nounwind { +; CHECK-LABEL: intrinsic_vsuxei_v_nxv2i32_nxv2i32_nxv2i64: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e32, m1, ta, ma +; CHECK-NEXT:    vsuxei64.v v8, (a0), v10 +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsuxei.nxv2i32.nxv2i64( +    <vscale x 2 x i32> %0, +    ptr %1, +    <vscale x 2 x i64> %2, +    i64 %3) + +  ret void +} + +declare void @llvm.riscv.vsuxei.mask.nxv2i32.nxv2i64( +  <vscale x 2 x i32>, +  ptr, +  <vscale x 2 x i64>, +  <vscale x 2 x i1>, +  i64); + +define void @intrinsic_vsuxei_mask_v_nxv2i32_nxv2i32_nxv2i64(<vscale x 2 x i32> %0, ptr %1, <vscale x 2 x i64> %2, <vscale x 2 x i1> %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vsuxei_mask_v_nxv2i32_nxv2i32_nxv2i64: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e32, m1, ta, ma +; CHECK-NEXT:    vsuxei64.v v8, (a0), v10, v0.t +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsuxei.mask.nxv2i32.nxv2i64( +    <vscale x 2 x i32> %0, +    ptr %1, +    <vscale x 2 x i64> %2, +    <vscale x 2 x i1> %3, +    i64 %4) + +  ret void +} + +declare void @llvm.riscv.vsuxei.nxv4i32.nxv4i64( +  <vscale x 4 x i32>, +  ptr, +  <vscale x 4 x i64>, +  i64); + +define void @intrinsic_vsuxei_v_nxv4i32_nxv4i32_nxv4i64(<vscale x 4 x i32> %0, ptr %1, <vscale x 4 x i64> %2, i64 %3) nounwind { +; CHECK-LABEL: intrinsic_vsuxei_v_nxv4i32_nxv4i32_nxv4i64: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e32, m2, ta, ma +; CHECK-NEXT:    vsuxei64.v v8, (a0), v12 +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsuxei.nxv4i32.nxv4i64( +    <vscale x 4 x i32> %0, +    ptr %1, +    <vscale x 4 x i64> %2, +    i64 %3) + +  ret void +} + +declare void @llvm.riscv.vsuxei.mask.nxv4i32.nxv4i64( +  <vscale x 4 x i32>, +  ptr, +  <vscale x 4 x i64>, +  <vscale x 4 x i1>, +  i64); + +define void @intrinsic_vsuxei_mask_v_nxv4i32_nxv4i32_nxv4i64(<vscale x 4 x i32> %0, ptr %1, <vscale x 4 x i64> %2, <vscale x 4 x i1> %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vsuxei_mask_v_nxv4i32_nxv4i32_nxv4i64: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e32, m2, ta, ma +; CHECK-NEXT:    vsuxei64.v v8, (a0), v12, v0.t +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsuxei.mask.nxv4i32.nxv4i64( +    <vscale x 4 x i32> %0, +    ptr %1, +    <vscale x 4 x i64> %2, +    <vscale x 4 x i1> %3, +    i64 %4) + +  ret void +} + +declare void @llvm.riscv.vsuxei.nxv8i32.nxv8i64( +  <vscale x 8 x i32>, +  ptr, +  <vscale x 8 x i64>, +  i64); + +define void @intrinsic_vsuxei_v_nxv8i32_nxv8i32_nxv8i64(<vscale x 8 x i32> %0, ptr %1, <vscale x 8 x i64> %2, i64 %3) nounwind { +; CHECK-LABEL: intrinsic_vsuxei_v_nxv8i32_nxv8i32_nxv8i64: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e32, m4, ta, ma +; CHECK-NEXT:    vsuxei64.v v8, (a0), v16 +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsuxei.nxv8i32.nxv8i64( +    <vscale x 8 x i32> %0, +    ptr %1, +    <vscale x 8 x i64> %2, +    i64 %3) + +  ret void +} + +declare void @llvm.riscv.vsuxei.mask.nxv8i32.nxv8i64( +  <vscale x 8 x i32>, +  ptr, +  <vscale x 8 x i64>, +  <vscale x 8 x i1>, +  i64); + +define void @intrinsic_vsuxei_mask_v_nxv8i32_nxv8i32_nxv8i64(<vscale x 8 x i32> %0, ptr %1, <vscale x 8 x i64> %2, <vscale x 8 x i1> %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vsuxei_mask_v_nxv8i32_nxv8i32_nxv8i64: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e32, m4, ta, ma +; CHECK-NEXT:    vsuxei64.v v8, (a0), v16, v0.t +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsuxei.mask.nxv8i32.nxv8i64( +    <vscale x 8 x i32> %0, +    ptr %1, +    <vscale x 8 x i64> %2, +    <vscale x 8 x i1> %3, +    i64 %4) + +  ret void +} + +declare void @llvm.riscv.vsuxei.nxv1i64.nxv1i64( +  <vscale x 1 x i64>, +  ptr, +  <vscale x 1 x i64>, +  i64); + +define void @intrinsic_vsuxei_v_nxv1i64_nxv1i64_nxv1i64(<vscale x 1 x i64> %0, ptr %1, <vscale x 1 x i64> %2, i64 %3) nounwind { +; CHECK-LABEL: intrinsic_vsuxei_v_nxv1i64_nxv1i64_nxv1i64: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e64, m1, ta, ma +; CHECK-NEXT:    vsuxei64.v v8, (a0), v9 +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsuxei.nxv1i64.nxv1i64( +    <vscale x 1 x i64> %0, +    ptr %1, +    <vscale x 1 x i64> %2, +    i64 %3) + +  ret void +} + +declare void @llvm.riscv.vsuxei.mask.nxv1i64.nxv1i64( +  <vscale x 1 x i64>, +  ptr, +  <vscale x 1 x i64>, +  <vscale x 1 x i1>, +  i64); + +define void @intrinsic_vsuxei_mask_v_nxv1i64_nxv1i64_nxv1i64(<vscale x 1 x i64> %0, ptr %1, <vscale x 1 x i64> %2, <vscale x 1 x i1> %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vsuxei_mask_v_nxv1i64_nxv1i64_nxv1i64: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e64, m1, ta, ma +; CHECK-NEXT:    vsuxei64.v v8, (a0), v9, v0.t +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsuxei.mask.nxv1i64.nxv1i64( +    <vscale x 1 x i64> %0, +    ptr %1, +    <vscale x 1 x i64> %2, +    <vscale x 1 x i1> %3, +    i64 %4) + +  ret void +} + +declare void @llvm.riscv.vsuxei.nxv2i64.nxv2i64( +  <vscale x 2 x i64>, +  ptr, +  <vscale x 2 x i64>, +  i64); + +define void @intrinsic_vsuxei_v_nxv2i64_nxv2i64_nxv2i64(<vscale x 2 x i64> %0, ptr %1, <vscale x 2 x i64> %2, i64 %3) nounwind { +; CHECK-LABEL: intrinsic_vsuxei_v_nxv2i64_nxv2i64_nxv2i64: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e64, m2, ta, ma +; CHECK-NEXT:    vsuxei64.v v8, (a0), v10 +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsuxei.nxv2i64.nxv2i64( +    <vscale x 2 x i64> %0, +    ptr %1, +    <vscale x 2 x i64> %2, +    i64 %3) + +  ret void +} + +declare void @llvm.riscv.vsuxei.mask.nxv2i64.nxv2i64( +  <vscale x 2 x i64>, +  ptr, +  <vscale x 2 x i64>, +  <vscale x 2 x i1>, +  i64); + +define void @intrinsic_vsuxei_mask_v_nxv2i64_nxv2i64_nxv2i64(<vscale x 2 x i64> %0, ptr %1, <vscale x 2 x i64> %2, <vscale x 2 x i1> %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vsuxei_mask_v_nxv2i64_nxv2i64_nxv2i64: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e64, m2, ta, ma +; CHECK-NEXT:    vsuxei64.v v8, (a0), v10, v0.t +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsuxei.mask.nxv2i64.nxv2i64( +    <vscale x 2 x i64> %0, +    ptr %1, +    <vscale x 2 x i64> %2, +    <vscale x 2 x i1> %3, +    i64 %4) + +  ret void +} + +declare void @llvm.riscv.vsuxei.nxv4i64.nxv4i64( +  <vscale x 4 x i64>, +  ptr, +  <vscale x 4 x i64>, +  i64); + +define void @intrinsic_vsuxei_v_nxv4i64_nxv4i64_nxv4i64(<vscale x 4 x i64> %0, ptr %1, <vscale x 4 x i64> %2, i64 %3) nounwind { +; CHECK-LABEL: intrinsic_vsuxei_v_nxv4i64_nxv4i64_nxv4i64: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e64, m4, ta, ma +; CHECK-NEXT:    vsuxei64.v v8, (a0), v12 +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsuxei.nxv4i64.nxv4i64( +    <vscale x 4 x i64> %0, +    ptr %1, +    <vscale x 4 x i64> %2, +    i64 %3) + +  ret void +} + +declare void @llvm.riscv.vsuxei.mask.nxv4i64.nxv4i64( +  <vscale x 4 x i64>, +  ptr, +  <vscale x 4 x i64>, +  <vscale x 4 x i1>, +  i64); + +define void @intrinsic_vsuxei_mask_v_nxv4i64_nxv4i64_nxv4i64(<vscale x 4 x i64> %0, ptr %1, <vscale x 4 x i64> %2, <vscale x 4 x i1> %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vsuxei_mask_v_nxv4i64_nxv4i64_nxv4i64: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e64, m4, ta, ma +; CHECK-NEXT:    vsuxei64.v v8, (a0), v12, v0.t +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsuxei.mask.nxv4i64.nxv4i64( +    <vscale x 4 x i64> %0, +    ptr %1, +    <vscale x 4 x i64> %2, +    <vscale x 4 x i1> %3, +    i64 %4) + +  ret void +} + +declare void @llvm.riscv.vsuxei.nxv8i64.nxv8i64( +  <vscale x 8 x i64>, +  ptr, +  <vscale x 8 x i64>, +  i64); + +define void @intrinsic_vsuxei_v_nxv8i64_nxv8i64_nxv8i64(<vscale x 8 x i64> %0, ptr %1, <vscale x 8 x i64> %2, i64 %3) nounwind { +; CHECK-LABEL: intrinsic_vsuxei_v_nxv8i64_nxv8i64_nxv8i64: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e64, m8, ta, ma +; CHECK-NEXT:    vsuxei64.v v8, (a0), v16 +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsuxei.nxv8i64.nxv8i64( +    <vscale x 8 x i64> %0, +    ptr %1, +    <vscale x 8 x i64> %2, +    i64 %3) + +  ret void +} + +declare void @llvm.riscv.vsuxei.mask.nxv8i64.nxv8i64( +  <vscale x 8 x i64>, +  ptr, +  <vscale x 8 x i64>, +  <vscale x 8 x i1>, +  i64); + +define void @intrinsic_vsuxei_mask_v_nxv8i64_nxv8i64_nxv8i64(<vscale x 8 x i64> %0, ptr %1, <vscale x 8 x i64> %2, <vscale x 8 x i1> %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vsuxei_mask_v_nxv8i64_nxv8i64_nxv8i64: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e64, m8, ta, ma +; CHECK-NEXT:    vsuxei64.v v8, (a0), v16, v0.t +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsuxei.mask.nxv8i64.nxv8i64( +    <vscale x 8 x i64> %0, +    ptr %1, +    <vscale x 8 x i64> %2, +    <vscale x 8 x i1> %3, +    i64 %4) + +  ret void +} + +declare void @llvm.riscv.vsuxei.nxv1f16.nxv1i64( +  <vscale x 1 x half>, +  ptr, +  <vscale x 1 x i64>, +  i64); + +define void @intrinsic_vsuxei_v_nxv1f16_nxv1f16_nxv1i64(<vscale x 1 x half> %0, ptr %1, <vscale x 1 x i64> %2, i64 %3) nounwind { +; CHECK-LABEL: intrinsic_vsuxei_v_nxv1f16_nxv1f16_nxv1i64: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e16, mf4, ta, ma +; CHECK-NEXT:    vsuxei64.v v8, (a0), v9 +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsuxei.nxv1f16.nxv1i64( +    <vscale x 1 x half> %0, +    ptr %1, +    <vscale x 1 x i64> %2, +    i64 %3) + +  ret void +} + +declare void @llvm.riscv.vsuxei.mask.nxv1f16.nxv1i64( +  <vscale x 1 x half>, +  ptr, +  <vscale x 1 x i64>, +  <vscale x 1 x i1>, +  i64); + +define void @intrinsic_vsuxei_mask_v_nxv1f16_nxv1f16_nxv1i64(<vscale x 1 x half> %0, ptr %1, <vscale x 1 x i64> %2, <vscale x 1 x i1> %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vsuxei_mask_v_nxv1f16_nxv1f16_nxv1i64: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e16, mf4, ta, ma +; CHECK-NEXT:    vsuxei64.v v8, (a0), v9, v0.t +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsuxei.mask.nxv1f16.nxv1i64( +    <vscale x 1 x half> %0, +    ptr %1, +    <vscale x 1 x i64> %2, +    <vscale x 1 x i1> %3, +    i64 %4) + +  ret void +} + +declare void @llvm.riscv.vsuxei.nxv2f16.nxv2i64( +  <vscale x 2 x half>, +  ptr, +  <vscale x 2 x i64>, +  i64); + +define void @intrinsic_vsuxei_v_nxv2f16_nxv2f16_nxv2i64(<vscale x 2 x half> %0, ptr %1, <vscale x 2 x i64> %2, i64 %3) nounwind { +; CHECK-LABEL: intrinsic_vsuxei_v_nxv2f16_nxv2f16_nxv2i64: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e16, mf2, ta, ma +; CHECK-NEXT:    vsuxei64.v v8, (a0), v10 +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsuxei.nxv2f16.nxv2i64( +    <vscale x 2 x half> %0, +    ptr %1, +    <vscale x 2 x i64> %2, +    i64 %3) + +  ret void +} + +declare void @llvm.riscv.vsuxei.mask.nxv2f16.nxv2i64( +  <vscale x 2 x half>, +  ptr, +  <vscale x 2 x i64>, +  <vscale x 2 x i1>, +  i64); + +define void @intrinsic_vsuxei_mask_v_nxv2f16_nxv2f16_nxv2i64(<vscale x 2 x half> %0, ptr %1, <vscale x 2 x i64> %2, <vscale x 2 x i1> %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vsuxei_mask_v_nxv2f16_nxv2f16_nxv2i64: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e16, mf2, ta, ma +; CHECK-NEXT:    vsuxei64.v v8, (a0), v10, v0.t +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsuxei.mask.nxv2f16.nxv2i64( +    <vscale x 2 x half> %0, +    ptr %1, +    <vscale x 2 x i64> %2, +    <vscale x 2 x i1> %3, +    i64 %4) + +  ret void +} + +declare void @llvm.riscv.vsuxei.nxv4f16.nxv4i64( +  <vscale x 4 x half>, +  ptr, +  <vscale x 4 x i64>, +  i64); + +define void @intrinsic_vsuxei_v_nxv4f16_nxv4f16_nxv4i64(<vscale x 4 x half> %0, ptr %1, <vscale x 4 x i64> %2, i64 %3) nounwind { +; CHECK-LABEL: intrinsic_vsuxei_v_nxv4f16_nxv4f16_nxv4i64: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e16, m1, ta, ma +; CHECK-NEXT:    vsuxei64.v v8, (a0), v12 +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsuxei.nxv4f16.nxv4i64( +    <vscale x 4 x half> %0, +    ptr %1, +    <vscale x 4 x i64> %2, +    i64 %3) + +  ret void +} + +declare void @llvm.riscv.vsuxei.mask.nxv4f16.nxv4i64( +  <vscale x 4 x half>, +  ptr, +  <vscale x 4 x i64>, +  <vscale x 4 x i1>, +  i64); + +define void @intrinsic_vsuxei_mask_v_nxv4f16_nxv4f16_nxv4i64(<vscale x 4 x half> %0, ptr %1, <vscale x 4 x i64> %2, <vscale x 4 x i1> %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vsuxei_mask_v_nxv4f16_nxv4f16_nxv4i64: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e16, m1, ta, ma +; CHECK-NEXT:    vsuxei64.v v8, (a0), v12, v0.t +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsuxei.mask.nxv4f16.nxv4i64( +    <vscale x 4 x half> %0, +    ptr %1, +    <vscale x 4 x i64> %2, +    <vscale x 4 x i1> %3, +    i64 %4) + +  ret void +} + +declare void @llvm.riscv.vsuxei.nxv8f16.nxv8i64( +  <vscale x 8 x half>, +  ptr, +  <vscale x 8 x i64>, +  i64); + +define void @intrinsic_vsuxei_v_nxv8f16_nxv8f16_nxv8i64(<vscale x 8 x half> %0, ptr %1, <vscale x 8 x i64> %2, i64 %3) nounwind { +; CHECK-LABEL: intrinsic_vsuxei_v_nxv8f16_nxv8f16_nxv8i64: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e16, m2, ta, ma +; CHECK-NEXT:    vsuxei64.v v8, (a0), v16 +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsuxei.nxv8f16.nxv8i64( +    <vscale x 8 x half> %0, +    ptr %1, +    <vscale x 8 x i64> %2, +    i64 %3) + +  ret void +} + +declare void @llvm.riscv.vsuxei.mask.nxv8f16.nxv8i64( +  <vscale x 8 x half>, +  ptr, +  <vscale x 8 x i64>, +  <vscale x 8 x i1>, +  i64); + +define void @intrinsic_vsuxei_mask_v_nxv8f16_nxv8f16_nxv8i64(<vscale x 8 x half> %0, ptr %1, <vscale x 8 x i64> %2, <vscale x 8 x i1> %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vsuxei_mask_v_nxv8f16_nxv8f16_nxv8i64: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e16, m2, ta, ma +; CHECK-NEXT:    vsuxei64.v v8, (a0), v16, v0.t +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsuxei.mask.nxv8f16.nxv8i64( +    <vscale x 8 x half> %0, +    ptr %1, +    <vscale x 8 x i64> %2, +    <vscale x 8 x i1> %3, +    i64 %4) + +  ret void +} + +declare void @llvm.riscv.vsuxei.nxv1f32.nxv1i64( +  <vscale x 1 x float>, +  ptr, +  <vscale x 1 x i64>, +  i64); + +define void @intrinsic_vsuxei_v_nxv1f32_nxv1f32_nxv1i64(<vscale x 1 x float> %0, ptr %1, <vscale x 1 x i64> %2, i64 %3) nounwind { +; CHECK-LABEL: intrinsic_vsuxei_v_nxv1f32_nxv1f32_nxv1i64: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e32, mf2, ta, ma +; CHECK-NEXT:    vsuxei64.v v8, (a0), v9 +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsuxei.nxv1f32.nxv1i64( +    <vscale x 1 x float> %0, +    ptr %1, +    <vscale x 1 x i64> %2, +    i64 %3) + +  ret void +} + +declare void @llvm.riscv.vsuxei.mask.nxv1f32.nxv1i64( +  <vscale x 1 x float>, +  ptr, +  <vscale x 1 x i64>, +  <vscale x 1 x i1>, +  i64); + +define void @intrinsic_vsuxei_mask_v_nxv1f32_nxv1f32_nxv1i64(<vscale x 1 x float> %0, ptr %1, <vscale x 1 x i64> %2, <vscale x 1 x i1> %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vsuxei_mask_v_nxv1f32_nxv1f32_nxv1i64: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e32, mf2, ta, ma +; CHECK-NEXT:    vsuxei64.v v8, (a0), v9, v0.t +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsuxei.mask.nxv1f32.nxv1i64( +    <vscale x 1 x float> %0, +    ptr %1, +    <vscale x 1 x i64> %2, +    <vscale x 1 x i1> %3, +    i64 %4) + +  ret void +} + +declare void @llvm.riscv.vsuxei.nxv2f32.nxv2i64( +  <vscale x 2 x float>, +  ptr, +  <vscale x 2 x i64>, +  i64); + +define void @intrinsic_vsuxei_v_nxv2f32_nxv2f32_nxv2i64(<vscale x 2 x float> %0, ptr %1, <vscale x 2 x i64> %2, i64 %3) nounwind { +; CHECK-LABEL: intrinsic_vsuxei_v_nxv2f32_nxv2f32_nxv2i64: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e32, m1, ta, ma +; CHECK-NEXT:    vsuxei64.v v8, (a0), v10 +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsuxei.nxv2f32.nxv2i64( +    <vscale x 2 x float> %0, +    ptr %1, +    <vscale x 2 x i64> %2, +    i64 %3) + +  ret void +} + +declare void @llvm.riscv.vsuxei.mask.nxv2f32.nxv2i64( +  <vscale x 2 x float>, +  ptr, +  <vscale x 2 x i64>, +  <vscale x 2 x i1>, +  i64); + +define void @intrinsic_vsuxei_mask_v_nxv2f32_nxv2f32_nxv2i64(<vscale x 2 x float> %0, ptr %1, <vscale x 2 x i64> %2, <vscale x 2 x i1> %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vsuxei_mask_v_nxv2f32_nxv2f32_nxv2i64: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e32, m1, ta, ma +; CHECK-NEXT:    vsuxei64.v v8, (a0), v10, v0.t +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsuxei.mask.nxv2f32.nxv2i64( +    <vscale x 2 x float> %0, +    ptr %1, +    <vscale x 2 x i64> %2, +    <vscale x 2 x i1> %3, +    i64 %4) + +  ret void +} + +declare void @llvm.riscv.vsuxei.nxv4f32.nxv4i64( +  <vscale x 4 x float>, +  ptr, +  <vscale x 4 x i64>, +  i64); + +define void @intrinsic_vsuxei_v_nxv4f32_nxv4f32_nxv4i64(<vscale x 4 x float> %0, ptr %1, <vscale x 4 x i64> %2, i64 %3) nounwind { +; CHECK-LABEL: intrinsic_vsuxei_v_nxv4f32_nxv4f32_nxv4i64: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e32, m2, ta, ma +; CHECK-NEXT:    vsuxei64.v v8, (a0), v12 +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsuxei.nxv4f32.nxv4i64( +    <vscale x 4 x float> %0, +    ptr %1, +    <vscale x 4 x i64> %2, +    i64 %3) + +  ret void +} + +declare void @llvm.riscv.vsuxei.mask.nxv4f32.nxv4i64( +  <vscale x 4 x float>, +  ptr, +  <vscale x 4 x i64>, +  <vscale x 4 x i1>, +  i64); + +define void @intrinsic_vsuxei_mask_v_nxv4f32_nxv4f32_nxv4i64(<vscale x 4 x float> %0, ptr %1, <vscale x 4 x i64> %2, <vscale x 4 x i1> %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vsuxei_mask_v_nxv4f32_nxv4f32_nxv4i64: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e32, m2, ta, ma +; CHECK-NEXT:    vsuxei64.v v8, (a0), v12, v0.t +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsuxei.mask.nxv4f32.nxv4i64( +    <vscale x 4 x float> %0, +    ptr %1, +    <vscale x 4 x i64> %2, +    <vscale x 4 x i1> %3, +    i64 %4) + +  ret void +} + +declare void @llvm.riscv.vsuxei.nxv8f32.nxv8i64( +  <vscale x 8 x float>, +  ptr, +  <vscale x 8 x i64>, +  i64); + +define void @intrinsic_vsuxei_v_nxv8f32_nxv8f32_nxv8i64(<vscale x 8 x float> %0, ptr %1, <vscale x 8 x i64> %2, i64 %3) nounwind { +; CHECK-LABEL: intrinsic_vsuxei_v_nxv8f32_nxv8f32_nxv8i64: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e32, m4, ta, ma +; CHECK-NEXT:    vsuxei64.v v8, (a0), v16 +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsuxei.nxv8f32.nxv8i64( +    <vscale x 8 x float> %0, +    ptr %1, +    <vscale x 8 x i64> %2, +    i64 %3) + +  ret void +} + +declare void @llvm.riscv.vsuxei.mask.nxv8f32.nxv8i64( +  <vscale x 8 x float>, +  ptr, +  <vscale x 8 x i64>, +  <vscale x 8 x i1>, +  i64); + +define void @intrinsic_vsuxei_mask_v_nxv8f32_nxv8f32_nxv8i64(<vscale x 8 x float> %0, ptr %1, <vscale x 8 x i64> %2, <vscale x 8 x i1> %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vsuxei_mask_v_nxv8f32_nxv8f32_nxv8i64: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e32, m4, ta, ma +; CHECK-NEXT:    vsuxei64.v v8, (a0), v16, v0.t +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsuxei.mask.nxv8f32.nxv8i64( +    <vscale x 8 x float> %0, +    ptr %1, +    <vscale x 8 x i64> %2, +    <vscale x 8 x i1> %3, +    i64 %4) + +  ret void +} + +declare void @llvm.riscv.vsuxei.nxv1f64.nxv1i64( +  <vscale x 1 x double>, +  ptr, +  <vscale x 1 x i64>, +  i64); + +define void @intrinsic_vsuxei_v_nxv1f64_nxv1f64_nxv1i64(<vscale x 1 x double> %0, ptr %1, <vscale x 1 x i64> %2, i64 %3) nounwind { +; CHECK-LABEL: intrinsic_vsuxei_v_nxv1f64_nxv1f64_nxv1i64: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e64, m1, ta, ma +; CHECK-NEXT:    vsuxei64.v v8, (a0), v9 +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsuxei.nxv1f64.nxv1i64( +    <vscale x 1 x double> %0, +    ptr %1, +    <vscale x 1 x i64> %2, +    i64 %3) + +  ret void +} + +declare void @llvm.riscv.vsuxei.mask.nxv1f64.nxv1i64( +  <vscale x 1 x double>, +  ptr, +  <vscale x 1 x i64>, +  <vscale x 1 x i1>, +  i64); + +define void @intrinsic_vsuxei_mask_v_nxv1f64_nxv1f64_nxv1i64(<vscale x 1 x double> %0, ptr %1, <vscale x 1 x i64> %2, <vscale x 1 x i1> %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vsuxei_mask_v_nxv1f64_nxv1f64_nxv1i64: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e64, m1, ta, ma +; CHECK-NEXT:    vsuxei64.v v8, (a0), v9, v0.t +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsuxei.mask.nxv1f64.nxv1i64( +    <vscale x 1 x double> %0, +    ptr %1, +    <vscale x 1 x i64> %2, +    <vscale x 1 x i1> %3, +    i64 %4) + +  ret void +} + +declare void @llvm.riscv.vsuxei.nxv2f64.nxv2i64( +  <vscale x 2 x double>, +  ptr, +  <vscale x 2 x i64>, +  i64); + +define void @intrinsic_vsuxei_v_nxv2f64_nxv2f64_nxv2i64(<vscale x 2 x double> %0, ptr %1, <vscale x 2 x i64> %2, i64 %3) nounwind { +; CHECK-LABEL: intrinsic_vsuxei_v_nxv2f64_nxv2f64_nxv2i64: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e64, m2, ta, ma +; CHECK-NEXT:    vsuxei64.v v8, (a0), v10 +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsuxei.nxv2f64.nxv2i64( +    <vscale x 2 x double> %0, +    ptr %1, +    <vscale x 2 x i64> %2, +    i64 %3) + +  ret void +} + +declare void @llvm.riscv.vsuxei.mask.nxv2f64.nxv2i64( +  <vscale x 2 x double>, +  ptr, +  <vscale x 2 x i64>, +  <vscale x 2 x i1>, +  i64); + +define void @intrinsic_vsuxei_mask_v_nxv2f64_nxv2f64_nxv2i64(<vscale x 2 x double> %0, ptr %1, <vscale x 2 x i64> %2, <vscale x 2 x i1> %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vsuxei_mask_v_nxv2f64_nxv2f64_nxv2i64: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e64, m2, ta, ma +; CHECK-NEXT:    vsuxei64.v v8, (a0), v10, v0.t +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsuxei.mask.nxv2f64.nxv2i64( +    <vscale x 2 x double> %0, +    ptr %1, +    <vscale x 2 x i64> %2, +    <vscale x 2 x i1> %3, +    i64 %4) + +  ret void +} + +declare void @llvm.riscv.vsuxei.nxv4f64.nxv4i64( +  <vscale x 4 x double>, +  ptr, +  <vscale x 4 x i64>, +  i64); + +define void @intrinsic_vsuxei_v_nxv4f64_nxv4f64_nxv4i64(<vscale x 4 x double> %0, ptr %1, <vscale x 4 x i64> %2, i64 %3) nounwind { +; CHECK-LABEL: intrinsic_vsuxei_v_nxv4f64_nxv4f64_nxv4i64: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e64, m4, ta, ma +; CHECK-NEXT:    vsuxei64.v v8, (a0), v12 +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsuxei.nxv4f64.nxv4i64( +    <vscale x 4 x double> %0, +    ptr %1, +    <vscale x 4 x i64> %2, +    i64 %3) + +  ret void +} + +declare void @llvm.riscv.vsuxei.mask.nxv4f64.nxv4i64( +  <vscale x 4 x double>, +  ptr, +  <vscale x 4 x i64>, +  <vscale x 4 x i1>, +  i64); + +define void @intrinsic_vsuxei_mask_v_nxv4f64_nxv4f64_nxv4i64(<vscale x 4 x double> %0, ptr %1, <vscale x 4 x i64> %2, <vscale x 4 x i1> %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vsuxei_mask_v_nxv4f64_nxv4f64_nxv4i64: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e64, m4, ta, ma +; CHECK-NEXT:    vsuxei64.v v8, (a0), v12, v0.t +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsuxei.mask.nxv4f64.nxv4i64( +    <vscale x 4 x double> %0, +    ptr %1, +    <vscale x 4 x i64> %2, +    <vscale x 4 x i1> %3, +    i64 %4) + +  ret void +} + +declare void @llvm.riscv.vsuxei.nxv8f64.nxv8i64( +  <vscale x 8 x double>, +  ptr, +  <vscale x 8 x i64>, +  i64); + +define void @intrinsic_vsuxei_v_nxv8f64_nxv8f64_nxv8i64(<vscale x 8 x double> %0, ptr %1, <vscale x 8 x i64> %2, i64 %3) nounwind { +; CHECK-LABEL: intrinsic_vsuxei_v_nxv8f64_nxv8f64_nxv8i64: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e64, m8, ta, ma +; CHECK-NEXT:    vsuxei64.v v8, (a0), v16 +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsuxei.nxv8f64.nxv8i64( +    <vscale x 8 x double> %0, +    ptr %1, +    <vscale x 8 x i64> %2, +    i64 %3) + +  ret void +} + +declare void @llvm.riscv.vsuxei.mask.nxv8f64.nxv8i64( +  <vscale x 8 x double>, +  ptr, +  <vscale x 8 x i64>, +  <vscale x 8 x i1>, +  i64); + +define void @intrinsic_vsuxei_mask_v_nxv8f64_nxv8f64_nxv8i64(<vscale x 8 x double> %0, ptr %1, <vscale x 8 x i64> %2, <vscale x 8 x i1> %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vsuxei_mask_v_nxv8f64_nxv8f64_nxv8i64: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e64, m8, ta, ma +; CHECK-NEXT:    vsuxei64.v v8, (a0), v16, v0.t +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsuxei.mask.nxv8f64.nxv8i64( +    <vscale x 8 x double> %0, +    ptr %1, +    <vscale x 8 x i64> %2, +    <vscale x 8 x i1> %3, +    i64 %4) + +  ret void +} diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/rvv/vsuxei.ll b/llvm/test/CodeGen/RISCV/GlobalISel/rvv/vsuxei.ll new file mode 100644 index 0000000..7cd1545 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/GlobalISel/rvv/vsuxei.ll @@ -0,0 +1,4881 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v,+zvfhmin,+zvfbfmin \ +; RUN:   -global-isel -verify-machineinstrs -target-abi=ilp32d | FileCheck %s +; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v,+zvfhmin,+zvfbfmin \ +; RUN:   -global-isel -verify-machineinstrs -target-abi=lp64d | FileCheck %s + +declare void @llvm.riscv.vsuxei.nxv1i8.nxv1i32( +  <vscale x 1 x i8>, +  ptr, +  <vscale x 1 x i32>, +  iXLen); + +define void @intrinsic_vsuxei_v_nxv1i8_nxv1i8_nxv1i32(<vscale x 1 x i8> %0, ptr %1, <vscale x 1 x i32> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vsuxei_v_nxv1i8_nxv1i8_nxv1i32: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e8, mf8, ta, ma +; CHECK-NEXT:    vsuxei32.v v8, (a0), v9 +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsuxei.nxv1i8.nxv1i32( +    <vscale x 1 x i8> %0, +    ptr %1, +    <vscale x 1 x i32> %2, +    iXLen %3) + +  ret void +} + +declare void @llvm.riscv.vsuxei.mask.nxv1i8.nxv1i32( +  <vscale x 1 x i8>, +  ptr, +  <vscale x 1 x i32>, +  <vscale x 1 x i1>, +  iXLen); + +define void @intrinsic_vsuxei_mask_v_nxv1i8_nxv1i8_nxv1i32(<vscale x 1 x i8> %0, ptr %1, <vscale x 1 x i32> %2, <vscale x 1 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vsuxei_mask_v_nxv1i8_nxv1i8_nxv1i32: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e8, mf8, ta, ma +; CHECK-NEXT:    vsuxei32.v v8, (a0), v9, v0.t +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsuxei.mask.nxv1i8.nxv1i32( +    <vscale x 1 x i8> %0, +    ptr %1, +    <vscale x 1 x i32> %2, +    <vscale x 1 x i1> %3, +    iXLen %4) + +  ret void +} + +declare void @llvm.riscv.vsuxei.nxv2i8.nxv2i32( +  <vscale x 2 x i8>, +  ptr, +  <vscale x 2 x i32>, +  iXLen); + +define void @intrinsic_vsuxei_v_nxv2i8_nxv2i8_nxv2i32(<vscale x 2 x i8> %0, ptr %1, <vscale x 2 x i32> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vsuxei_v_nxv2i8_nxv2i8_nxv2i32: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e8, mf4, ta, ma +; CHECK-NEXT:    vsuxei32.v v8, (a0), v9 +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsuxei.nxv2i8.nxv2i32( +    <vscale x 2 x i8> %0, +    ptr %1, +    <vscale x 2 x i32> %2, +    iXLen %3) + +  ret void +} + +declare void @llvm.riscv.vsuxei.mask.nxv2i8.nxv2i32( +  <vscale x 2 x i8>, +  ptr, +  <vscale x 2 x i32>, +  <vscale x 2 x i1>, +  iXLen); + +define void @intrinsic_vsuxei_mask_v_nxv2i8_nxv2i8_nxv2i32(<vscale x 2 x i8> %0, ptr %1, <vscale x 2 x i32> %2, <vscale x 2 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vsuxei_mask_v_nxv2i8_nxv2i8_nxv2i32: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e8, mf4, ta, ma +; CHECK-NEXT:    vsuxei32.v v8, (a0), v9, v0.t +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsuxei.mask.nxv2i8.nxv2i32( +    <vscale x 2 x i8> %0, +    ptr %1, +    <vscale x 2 x i32> %2, +    <vscale x 2 x i1> %3, +    iXLen %4) + +  ret void +} + +declare void @llvm.riscv.vsuxei.nxv4i8.nxv4i32( +  <vscale x 4 x i8>, +  ptr, +  <vscale x 4 x i32>, +  iXLen); + +define void @intrinsic_vsuxei_v_nxv4i8_nxv4i8_nxv4i32(<vscale x 4 x i8> %0, ptr %1, <vscale x 4 x i32> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vsuxei_v_nxv4i8_nxv4i8_nxv4i32: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e8, mf2, ta, ma +; CHECK-NEXT:    vsuxei32.v v8, (a0), v10 +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsuxei.nxv4i8.nxv4i32( +    <vscale x 4 x i8> %0, +    ptr %1, +    <vscale x 4 x i32> %2, +    iXLen %3) + +  ret void +} + +declare void @llvm.riscv.vsuxei.mask.nxv4i8.nxv4i32( +  <vscale x 4 x i8>, +  ptr, +  <vscale x 4 x i32>, +  <vscale x 4 x i1>, +  iXLen); + +define void @intrinsic_vsuxei_mask_v_nxv4i8_nxv4i8_nxv4i32(<vscale x 4 x i8> %0, ptr %1, <vscale x 4 x i32> %2, <vscale x 4 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vsuxei_mask_v_nxv4i8_nxv4i8_nxv4i32: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e8, mf2, ta, ma +; CHECK-NEXT:    vsuxei32.v v8, (a0), v10, v0.t +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsuxei.mask.nxv4i8.nxv4i32( +    <vscale x 4 x i8> %0, +    ptr %1, +    <vscale x 4 x i32> %2, +    <vscale x 4 x i1> %3, +    iXLen %4) + +  ret void +} + +declare void @llvm.riscv.vsuxei.nxv8i8.nxv8i32( +  <vscale x 8 x i8>, +  ptr, +  <vscale x 8 x i32>, +  iXLen); + +define void @intrinsic_vsuxei_v_nxv8i8_nxv8i8_nxv8i32(<vscale x 8 x i8> %0, ptr %1, <vscale x 8 x i32> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vsuxei_v_nxv8i8_nxv8i8_nxv8i32: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e8, m1, ta, ma +; CHECK-NEXT:    vsuxei32.v v8, (a0), v12 +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsuxei.nxv8i8.nxv8i32( +    <vscale x 8 x i8> %0, +    ptr %1, +    <vscale x 8 x i32> %2, +    iXLen %3) + +  ret void +} + +declare void @llvm.riscv.vsuxei.mask.nxv8i8.nxv8i32( +  <vscale x 8 x i8>, +  ptr, +  <vscale x 8 x i32>, +  <vscale x 8 x i1>, +  iXLen); + +define void @intrinsic_vsuxei_mask_v_nxv8i8_nxv8i8_nxv8i32(<vscale x 8 x i8> %0, ptr %1, <vscale x 8 x i32> %2, <vscale x 8 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vsuxei_mask_v_nxv8i8_nxv8i8_nxv8i32: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e8, m1, ta, ma +; CHECK-NEXT:    vsuxei32.v v8, (a0), v12, v0.t +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsuxei.mask.nxv8i8.nxv8i32( +    <vscale x 8 x i8> %0, +    ptr %1, +    <vscale x 8 x i32> %2, +    <vscale x 8 x i1> %3, +    iXLen %4) + +  ret void +} + +declare void @llvm.riscv.vsuxei.nxv16i8.nxv16i32( +  <vscale x 16 x i8>, +  ptr, +  <vscale x 16 x i32>, +  iXLen); + +define void @intrinsic_vsuxei_v_nxv16i8_nxv16i8_nxv16i32(<vscale x 16 x i8> %0, ptr %1, <vscale x 16 x i32> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vsuxei_v_nxv16i8_nxv16i8_nxv16i32: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e8, m2, ta, ma +; CHECK-NEXT:    vsuxei32.v v8, (a0), v16 +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsuxei.nxv16i8.nxv16i32( +    <vscale x 16 x i8> %0, +    ptr %1, +    <vscale x 16 x i32> %2, +    iXLen %3) + +  ret void +} + +declare void @llvm.riscv.vsuxei.mask.nxv16i8.nxv16i32( +  <vscale x 16 x i8>, +  ptr, +  <vscale x 16 x i32>, +  <vscale x 16 x i1>, +  iXLen); + +define void @intrinsic_vsuxei_mask_v_nxv16i8_nxv16i8_nxv16i32(<vscale x 16 x i8> %0, ptr %1, <vscale x 16 x i32> %2, <vscale x 16 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vsuxei_mask_v_nxv16i8_nxv16i8_nxv16i32: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e8, m2, ta, ma +; CHECK-NEXT:    vsuxei32.v v8, (a0), v16, v0.t +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsuxei.mask.nxv16i8.nxv16i32( +    <vscale x 16 x i8> %0, +    ptr %1, +    <vscale x 16 x i32> %2, +    <vscale x 16 x i1> %3, +    iXLen %4) + +  ret void +} + +declare void @llvm.riscv.vsuxei.nxv1i16.nxv1i32( +  <vscale x 1 x i16>, +  ptr, +  <vscale x 1 x i32>, +  iXLen); + +define void @intrinsic_vsuxei_v_nxv1i16_nxv1i16_nxv1i32(<vscale x 1 x i16> %0, ptr %1, <vscale x 1 x i32> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vsuxei_v_nxv1i16_nxv1i16_nxv1i32: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e16, mf4, ta, ma +; CHECK-NEXT:    vsuxei32.v v8, (a0), v9 +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsuxei.nxv1i16.nxv1i32( +    <vscale x 1 x i16> %0, +    ptr %1, +    <vscale x 1 x i32> %2, +    iXLen %3) + +  ret void +} + +declare void @llvm.riscv.vsuxei.mask.nxv1i16.nxv1i32( +  <vscale x 1 x i16>, +  ptr, +  <vscale x 1 x i32>, +  <vscale x 1 x i1>, +  iXLen); + +define void @intrinsic_vsuxei_mask_v_nxv1i16_nxv1i16_nxv1i32(<vscale x 1 x i16> %0, ptr %1, <vscale x 1 x i32> %2, <vscale x 1 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vsuxei_mask_v_nxv1i16_nxv1i16_nxv1i32: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e16, mf4, ta, ma +; CHECK-NEXT:    vsuxei32.v v8, (a0), v9, v0.t +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsuxei.mask.nxv1i16.nxv1i32( +    <vscale x 1 x i16> %0, +    ptr %1, +    <vscale x 1 x i32> %2, +    <vscale x 1 x i1> %3, +    iXLen %4) + +  ret void +} + +declare void @llvm.riscv.vsuxei.nxv2i16.nxv2i32( +  <vscale x 2 x i16>, +  ptr, +  <vscale x 2 x i32>, +  iXLen); + +define void @intrinsic_vsuxei_v_nxv2i16_nxv2i16_nxv2i32(<vscale x 2 x i16> %0, ptr %1, <vscale x 2 x i32> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vsuxei_v_nxv2i16_nxv2i16_nxv2i32: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e16, mf2, ta, ma +; CHECK-NEXT:    vsuxei32.v v8, (a0), v9 +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsuxei.nxv2i16.nxv2i32( +    <vscale x 2 x i16> %0, +    ptr %1, +    <vscale x 2 x i32> %2, +    iXLen %3) + +  ret void +} + +declare void @llvm.riscv.vsuxei.mask.nxv2i16.nxv2i32( +  <vscale x 2 x i16>, +  ptr, +  <vscale x 2 x i32>, +  <vscale x 2 x i1>, +  iXLen); + +define void @intrinsic_vsuxei_mask_v_nxv2i16_nxv2i16_nxv2i32(<vscale x 2 x i16> %0, ptr %1, <vscale x 2 x i32> %2, <vscale x 2 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vsuxei_mask_v_nxv2i16_nxv2i16_nxv2i32: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e16, mf2, ta, ma +; CHECK-NEXT:    vsuxei32.v v8, (a0), v9, v0.t +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsuxei.mask.nxv2i16.nxv2i32( +    <vscale x 2 x i16> %0, +    ptr %1, +    <vscale x 2 x i32> %2, +    <vscale x 2 x i1> %3, +    iXLen %4) + +  ret void +} + +declare void @llvm.riscv.vsuxei.nxv4i16.nxv4i32( +  <vscale x 4 x i16>, +  ptr, +  <vscale x 4 x i32>, +  iXLen); + +define void @intrinsic_vsuxei_v_nxv4i16_nxv4i16_nxv4i32(<vscale x 4 x i16> %0, ptr %1, <vscale x 4 x i32> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vsuxei_v_nxv4i16_nxv4i16_nxv4i32: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e16, m1, ta, ma +; CHECK-NEXT:    vsuxei32.v v8, (a0), v10 +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsuxei.nxv4i16.nxv4i32( +    <vscale x 4 x i16> %0, +    ptr %1, +    <vscale x 4 x i32> %2, +    iXLen %3) + +  ret void +} + +declare void @llvm.riscv.vsuxei.mask.nxv4i16.nxv4i32( +  <vscale x 4 x i16>, +  ptr, +  <vscale x 4 x i32>, +  <vscale x 4 x i1>, +  iXLen); + +define void @intrinsic_vsuxei_mask_v_nxv4i16_nxv4i16_nxv4i32(<vscale x 4 x i16> %0, ptr %1, <vscale x 4 x i32> %2, <vscale x 4 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vsuxei_mask_v_nxv4i16_nxv4i16_nxv4i32: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e16, m1, ta, ma +; CHECK-NEXT:    vsuxei32.v v8, (a0), v10, v0.t +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsuxei.mask.nxv4i16.nxv4i32( +    <vscale x 4 x i16> %0, +    ptr %1, +    <vscale x 4 x i32> %2, +    <vscale x 4 x i1> %3, +    iXLen %4) + +  ret void +} + +declare void @llvm.riscv.vsuxei.nxv8i16.nxv8i32( +  <vscale x 8 x i16>, +  ptr, +  <vscale x 8 x i32>, +  iXLen); + +define void @intrinsic_vsuxei_v_nxv8i16_nxv8i16_nxv8i32(<vscale x 8 x i16> %0, ptr %1, <vscale x 8 x i32> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vsuxei_v_nxv8i16_nxv8i16_nxv8i32: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e16, m2, ta, ma +; CHECK-NEXT:    vsuxei32.v v8, (a0), v12 +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsuxei.nxv8i16.nxv8i32( +    <vscale x 8 x i16> %0, +    ptr %1, +    <vscale x 8 x i32> %2, +    iXLen %3) + +  ret void +} + +declare void @llvm.riscv.vsuxei.mask.nxv8i16.nxv8i32( +  <vscale x 8 x i16>, +  ptr, +  <vscale x 8 x i32>, +  <vscale x 8 x i1>, +  iXLen); + +define void @intrinsic_vsuxei_mask_v_nxv8i16_nxv8i16_nxv8i32(<vscale x 8 x i16> %0, ptr %1, <vscale x 8 x i32> %2, <vscale x 8 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vsuxei_mask_v_nxv8i16_nxv8i16_nxv8i32: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e16, m2, ta, ma +; CHECK-NEXT:    vsuxei32.v v8, (a0), v12, v0.t +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsuxei.mask.nxv8i16.nxv8i32( +    <vscale x 8 x i16> %0, +    ptr %1, +    <vscale x 8 x i32> %2, +    <vscale x 8 x i1> %3, +    iXLen %4) + +  ret void +} + +declare void @llvm.riscv.vsuxei.nxv16i16.nxv16i32( +  <vscale x 16 x i16>, +  ptr, +  <vscale x 16 x i32>, +  iXLen); + +define void @intrinsic_vsuxei_v_nxv16i16_nxv16i16_nxv16i32(<vscale x 16 x i16> %0, ptr %1, <vscale x 16 x i32> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vsuxei_v_nxv16i16_nxv16i16_nxv16i32: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e16, m4, ta, ma +; CHECK-NEXT:    vsuxei32.v v8, (a0), v16 +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsuxei.nxv16i16.nxv16i32( +    <vscale x 16 x i16> %0, +    ptr %1, +    <vscale x 16 x i32> %2, +    iXLen %3) + +  ret void +} + +declare void @llvm.riscv.vsuxei.mask.nxv16i16.nxv16i32( +  <vscale x 16 x i16>, +  ptr, +  <vscale x 16 x i32>, +  <vscale x 16 x i1>, +  iXLen); + +define void @intrinsic_vsuxei_mask_v_nxv16i16_nxv16i16_nxv16i32(<vscale x 16 x i16> %0, ptr %1, <vscale x 16 x i32> %2, <vscale x 16 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vsuxei_mask_v_nxv16i16_nxv16i16_nxv16i32: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e16, m4, ta, ma +; CHECK-NEXT:    vsuxei32.v v8, (a0), v16, v0.t +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsuxei.mask.nxv16i16.nxv16i32( +    <vscale x 16 x i16> %0, +    ptr %1, +    <vscale x 16 x i32> %2, +    <vscale x 16 x i1> %3, +    iXLen %4) + +  ret void +} + +declare void @llvm.riscv.vsuxei.nxv1i32.nxv1i32( +  <vscale x 1 x i32>, +  ptr, +  <vscale x 1 x i32>, +  iXLen); + +define void @intrinsic_vsuxei_v_nxv1i32_nxv1i32_nxv1i32(<vscale x 1 x i32> %0, ptr %1, <vscale x 1 x i32> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vsuxei_v_nxv1i32_nxv1i32_nxv1i32: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e32, mf2, ta, ma +; CHECK-NEXT:    vsuxei32.v v8, (a0), v9 +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsuxei.nxv1i32.nxv1i32( +    <vscale x 1 x i32> %0, +    ptr %1, +    <vscale x 1 x i32> %2, +    iXLen %3) + +  ret void +} + +declare void @llvm.riscv.vsuxei.mask.nxv1i32.nxv1i32( +  <vscale x 1 x i32>, +  ptr, +  <vscale x 1 x i32>, +  <vscale x 1 x i1>, +  iXLen); + +define void @intrinsic_vsuxei_mask_v_nxv1i32_nxv1i32_nxv1i32(<vscale x 1 x i32> %0, ptr %1, <vscale x 1 x i32> %2, <vscale x 1 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vsuxei_mask_v_nxv1i32_nxv1i32_nxv1i32: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e32, mf2, ta, ma +; CHECK-NEXT:    vsuxei32.v v8, (a0), v9, v0.t +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsuxei.mask.nxv1i32.nxv1i32( +    <vscale x 1 x i32> %0, +    ptr %1, +    <vscale x 1 x i32> %2, +    <vscale x 1 x i1> %3, +    iXLen %4) + +  ret void +} + +declare void @llvm.riscv.vsuxei.nxv2i32.nxv2i32( +  <vscale x 2 x i32>, +  ptr, +  <vscale x 2 x i32>, +  iXLen); + +define void @intrinsic_vsuxei_v_nxv2i32_nxv2i32_nxv2i32(<vscale x 2 x i32> %0, ptr %1, <vscale x 2 x i32> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vsuxei_v_nxv2i32_nxv2i32_nxv2i32: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e32, m1, ta, ma +; CHECK-NEXT:    vsuxei32.v v8, (a0), v9 +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsuxei.nxv2i32.nxv2i32( +    <vscale x 2 x i32> %0, +    ptr %1, +    <vscale x 2 x i32> %2, +    iXLen %3) + +  ret void +} + +declare void @llvm.riscv.vsuxei.mask.nxv2i32.nxv2i32( +  <vscale x 2 x i32>, +  ptr, +  <vscale x 2 x i32>, +  <vscale x 2 x i1>, +  iXLen); + +define void @intrinsic_vsuxei_mask_v_nxv2i32_nxv2i32_nxv2i32(<vscale x 2 x i32> %0, ptr %1, <vscale x 2 x i32> %2, <vscale x 2 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vsuxei_mask_v_nxv2i32_nxv2i32_nxv2i32: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e32, m1, ta, ma +; CHECK-NEXT:    vsuxei32.v v8, (a0), v9, v0.t +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsuxei.mask.nxv2i32.nxv2i32( +    <vscale x 2 x i32> %0, +    ptr %1, +    <vscale x 2 x i32> %2, +    <vscale x 2 x i1> %3, +    iXLen %4) + +  ret void +} + +declare void @llvm.riscv.vsuxei.nxv4i32.nxv4i32( +  <vscale x 4 x i32>, +  ptr, +  <vscale x 4 x i32>, +  iXLen); + +define void @intrinsic_vsuxei_v_nxv4i32_nxv4i32_nxv4i32(<vscale x 4 x i32> %0, ptr %1, <vscale x 4 x i32> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vsuxei_v_nxv4i32_nxv4i32_nxv4i32: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e32, m2, ta, ma +; CHECK-NEXT:    vsuxei32.v v8, (a0), v10 +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsuxei.nxv4i32.nxv4i32( +    <vscale x 4 x i32> %0, +    ptr %1, +    <vscale x 4 x i32> %2, +    iXLen %3) + +  ret void +} + +declare void @llvm.riscv.vsuxei.mask.nxv4i32.nxv4i32( +  <vscale x 4 x i32>, +  ptr, +  <vscale x 4 x i32>, +  <vscale x 4 x i1>, +  iXLen); + +define void @intrinsic_vsuxei_mask_v_nxv4i32_nxv4i32_nxv4i32(<vscale x 4 x i32> %0, ptr %1, <vscale x 4 x i32> %2, <vscale x 4 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vsuxei_mask_v_nxv4i32_nxv4i32_nxv4i32: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e32, m2, ta, ma +; CHECK-NEXT:    vsuxei32.v v8, (a0), v10, v0.t +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsuxei.mask.nxv4i32.nxv4i32( +    <vscale x 4 x i32> %0, +    ptr %1, +    <vscale x 4 x i32> %2, +    <vscale x 4 x i1> %3, +    iXLen %4) + +  ret void +} + +declare void @llvm.riscv.vsuxei.nxv8i32.nxv8i32( +  <vscale x 8 x i32>, +  ptr, +  <vscale x 8 x i32>, +  iXLen); + +define void @intrinsic_vsuxei_v_nxv8i32_nxv8i32_nxv8i32(<vscale x 8 x i32> %0, ptr %1, <vscale x 8 x i32> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vsuxei_v_nxv8i32_nxv8i32_nxv8i32: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e32, m4, ta, ma +; CHECK-NEXT:    vsuxei32.v v8, (a0), v12 +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsuxei.nxv8i32.nxv8i32( +    <vscale x 8 x i32> %0, +    ptr %1, +    <vscale x 8 x i32> %2, +    iXLen %3) + +  ret void +} + +declare void @llvm.riscv.vsuxei.mask.nxv8i32.nxv8i32( +  <vscale x 8 x i32>, +  ptr, +  <vscale x 8 x i32>, +  <vscale x 8 x i1>, +  iXLen); + +define void @intrinsic_vsuxei_mask_v_nxv8i32_nxv8i32_nxv8i32(<vscale x 8 x i32> %0, ptr %1, <vscale x 8 x i32> %2, <vscale x 8 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vsuxei_mask_v_nxv8i32_nxv8i32_nxv8i32: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e32, m4, ta, ma +; CHECK-NEXT:    vsuxei32.v v8, (a0), v12, v0.t +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsuxei.mask.nxv8i32.nxv8i32( +    <vscale x 8 x i32> %0, +    ptr %1, +    <vscale x 8 x i32> %2, +    <vscale x 8 x i1> %3, +    iXLen %4) + +  ret void +} + +declare void @llvm.riscv.vsuxei.nxv16i32.nxv16i32( +  <vscale x 16 x i32>, +  ptr, +  <vscale x 16 x i32>, +  iXLen); + +define void @intrinsic_vsuxei_v_nxv16i32_nxv16i32_nxv16i32(<vscale x 16 x i32> %0, ptr %1, <vscale x 16 x i32> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vsuxei_v_nxv16i32_nxv16i32_nxv16i32: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e32, m8, ta, ma +; CHECK-NEXT:    vsuxei32.v v8, (a0), v16 +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsuxei.nxv16i32.nxv16i32( +    <vscale x 16 x i32> %0, +    ptr %1, +    <vscale x 16 x i32> %2, +    iXLen %3) + +  ret void +} + +declare void @llvm.riscv.vsuxei.mask.nxv16i32.nxv16i32( +  <vscale x 16 x i32>, +  ptr, +  <vscale x 16 x i32>, +  <vscale x 16 x i1>, +  iXLen); + +define void @intrinsic_vsuxei_mask_v_nxv16i32_nxv16i32_nxv16i32(<vscale x 16 x i32> %0, ptr %1, <vscale x 16 x i32> %2, <vscale x 16 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vsuxei_mask_v_nxv16i32_nxv16i32_nxv16i32: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e32, m8, ta, ma +; CHECK-NEXT:    vsuxei32.v v8, (a0), v16, v0.t +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsuxei.mask.nxv16i32.nxv16i32( +    <vscale x 16 x i32> %0, +    ptr %1, +    <vscale x 16 x i32> %2, +    <vscale x 16 x i1> %3, +    iXLen %4) + +  ret void +} + +declare void @llvm.riscv.vsuxei.nxv1i64.nxv1i32( +  <vscale x 1 x i64>, +  ptr, +  <vscale x 1 x i32>, +  iXLen); + +define void @intrinsic_vsuxei_v_nxv1i64_nxv1i64_nxv1i32(<vscale x 1 x i64> %0, ptr %1, <vscale x 1 x i32> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vsuxei_v_nxv1i64_nxv1i64_nxv1i32: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e64, m1, ta, ma +; CHECK-NEXT:    vsuxei32.v v8, (a0), v9 +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsuxei.nxv1i64.nxv1i32( +    <vscale x 1 x i64> %0, +    ptr %1, +    <vscale x 1 x i32> %2, +    iXLen %3) + +  ret void +} + +declare void @llvm.riscv.vsuxei.mask.nxv1i64.nxv1i32( +  <vscale x 1 x i64>, +  ptr, +  <vscale x 1 x i32>, +  <vscale x 1 x i1>, +  iXLen); + +define void @intrinsic_vsuxei_mask_v_nxv1i64_nxv1i64_nxv1i32(<vscale x 1 x i64> %0, ptr %1, <vscale x 1 x i32> %2, <vscale x 1 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vsuxei_mask_v_nxv1i64_nxv1i64_nxv1i32: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e64, m1, ta, ma +; CHECK-NEXT:    vsuxei32.v v8, (a0), v9, v0.t +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsuxei.mask.nxv1i64.nxv1i32( +    <vscale x 1 x i64> %0, +    ptr %1, +    <vscale x 1 x i32> %2, +    <vscale x 1 x i1> %3, +    iXLen %4) + +  ret void +} + +declare void @llvm.riscv.vsuxei.nxv2i64.nxv2i32( +  <vscale x 2 x i64>, +  ptr, +  <vscale x 2 x i32>, +  iXLen); + +define void @intrinsic_vsuxei_v_nxv2i64_nxv2i64_nxv2i32(<vscale x 2 x i64> %0, ptr %1, <vscale x 2 x i32> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vsuxei_v_nxv2i64_nxv2i64_nxv2i32: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e64, m2, ta, ma +; CHECK-NEXT:    vsuxei32.v v8, (a0), v10 +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsuxei.nxv2i64.nxv2i32( +    <vscale x 2 x i64> %0, +    ptr %1, +    <vscale x 2 x i32> %2, +    iXLen %3) + +  ret void +} + +declare void @llvm.riscv.vsuxei.mask.nxv2i64.nxv2i32( +  <vscale x 2 x i64>, +  ptr, +  <vscale x 2 x i32>, +  <vscale x 2 x i1>, +  iXLen); + +define void @intrinsic_vsuxei_mask_v_nxv2i64_nxv2i64_nxv2i32(<vscale x 2 x i64> %0, ptr %1, <vscale x 2 x i32> %2, <vscale x 2 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vsuxei_mask_v_nxv2i64_nxv2i64_nxv2i32: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e64, m2, ta, ma +; CHECK-NEXT:    vsuxei32.v v8, (a0), v10, v0.t +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsuxei.mask.nxv2i64.nxv2i32( +    <vscale x 2 x i64> %0, +    ptr %1, +    <vscale x 2 x i32> %2, +    <vscale x 2 x i1> %3, +    iXLen %4) + +  ret void +} + +declare void @llvm.riscv.vsuxei.nxv4i64.nxv4i32( +  <vscale x 4 x i64>, +  ptr, +  <vscale x 4 x i32>, +  iXLen); + +define void @intrinsic_vsuxei_v_nxv4i64_nxv4i64_nxv4i32(<vscale x 4 x i64> %0, ptr %1, <vscale x 4 x i32> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vsuxei_v_nxv4i64_nxv4i64_nxv4i32: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e64, m4, ta, ma +; CHECK-NEXT:    vsuxei32.v v8, (a0), v12 +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsuxei.nxv4i64.nxv4i32( +    <vscale x 4 x i64> %0, +    ptr %1, +    <vscale x 4 x i32> %2, +    iXLen %3) + +  ret void +} + +declare void @llvm.riscv.vsuxei.mask.nxv4i64.nxv4i32( +  <vscale x 4 x i64>, +  ptr, +  <vscale x 4 x i32>, +  <vscale x 4 x i1>, +  iXLen); + +define void @intrinsic_vsuxei_mask_v_nxv4i64_nxv4i64_nxv4i32(<vscale x 4 x i64> %0, ptr %1, <vscale x 4 x i32> %2, <vscale x 4 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vsuxei_mask_v_nxv4i64_nxv4i64_nxv4i32: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e64, m4, ta, ma +; CHECK-NEXT:    vsuxei32.v v8, (a0), v12, v0.t +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsuxei.mask.nxv4i64.nxv4i32( +    <vscale x 4 x i64> %0, +    ptr %1, +    <vscale x 4 x i32> %2, +    <vscale x 4 x i1> %3, +    iXLen %4) + +  ret void +} + +declare void @llvm.riscv.vsuxei.nxv8i64.nxv8i32( +  <vscale x 8 x i64>, +  ptr, +  <vscale x 8 x i32>, +  iXLen); + +define void @intrinsic_vsuxei_v_nxv8i64_nxv8i64_nxv8i32(<vscale x 8 x i64> %0, ptr %1, <vscale x 8 x i32> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vsuxei_v_nxv8i64_nxv8i64_nxv8i32: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e64, m8, ta, ma +; CHECK-NEXT:    vsuxei32.v v8, (a0), v16 +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsuxei.nxv8i64.nxv8i32( +    <vscale x 8 x i64> %0, +    ptr %1, +    <vscale x 8 x i32> %2, +    iXLen %3) + +  ret void +} + +declare void @llvm.riscv.vsuxei.mask.nxv8i64.nxv8i32( +  <vscale x 8 x i64>, +  ptr, +  <vscale x 8 x i32>, +  <vscale x 8 x i1>, +  iXLen); + +define void @intrinsic_vsuxei_mask_v_nxv8i64_nxv8i64_nxv8i32(<vscale x 8 x i64> %0, ptr %1, <vscale x 8 x i32> %2, <vscale x 8 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vsuxei_mask_v_nxv8i64_nxv8i64_nxv8i32: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e64, m8, ta, ma +; CHECK-NEXT:    vsuxei32.v v8, (a0), v16, v0.t +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsuxei.mask.nxv8i64.nxv8i32( +    <vscale x 8 x i64> %0, +    ptr %1, +    <vscale x 8 x i32> %2, +    <vscale x 8 x i1> %3, +    iXLen %4) + +  ret void +} + +declare void @llvm.riscv.vsuxei.nxv1f16.nxv1i32( +  <vscale x 1 x half>, +  ptr, +  <vscale x 1 x i32>, +  iXLen); + +define void @intrinsic_vsuxei_v_nxv1f16_nxv1f16_nxv1i32(<vscale x 1 x half> %0, ptr %1, <vscale x 1 x i32> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vsuxei_v_nxv1f16_nxv1f16_nxv1i32: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e16, mf4, ta, ma +; CHECK-NEXT:    vsuxei32.v v8, (a0), v9 +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsuxei.nxv1f16.nxv1i32( +    <vscale x 1 x half> %0, +    ptr %1, +    <vscale x 1 x i32> %2, +    iXLen %3) + +  ret void +} + +declare void @llvm.riscv.vsuxei.mask.nxv1f16.nxv1i32( +  <vscale x 1 x half>, +  ptr, +  <vscale x 1 x i32>, +  <vscale x 1 x i1>, +  iXLen); + +define void @intrinsic_vsuxei_mask_v_nxv1f16_nxv1f16_nxv1i32(<vscale x 1 x half> %0, ptr %1, <vscale x 1 x i32> %2, <vscale x 1 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vsuxei_mask_v_nxv1f16_nxv1f16_nxv1i32: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e16, mf4, ta, ma +; CHECK-NEXT:    vsuxei32.v v8, (a0), v9, v0.t +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsuxei.mask.nxv1f16.nxv1i32( +    <vscale x 1 x half> %0, +    ptr %1, +    <vscale x 1 x i32> %2, +    <vscale x 1 x i1> %3, +    iXLen %4) + +  ret void +} + +declare void @llvm.riscv.vsuxei.nxv2f16.nxv2i32( +  <vscale x 2 x half>, +  ptr, +  <vscale x 2 x i32>, +  iXLen); + +define void @intrinsic_vsuxei_v_nxv2f16_nxv2f16_nxv2i32(<vscale x 2 x half> %0, ptr %1, <vscale x 2 x i32> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vsuxei_v_nxv2f16_nxv2f16_nxv2i32: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e16, mf2, ta, ma +; CHECK-NEXT:    vsuxei32.v v8, (a0), v9 +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsuxei.nxv2f16.nxv2i32( +    <vscale x 2 x half> %0, +    ptr %1, +    <vscale x 2 x i32> %2, +    iXLen %3) + +  ret void +} + +declare void @llvm.riscv.vsuxei.mask.nxv2f16.nxv2i32( +  <vscale x 2 x half>, +  ptr, +  <vscale x 2 x i32>, +  <vscale x 2 x i1>, +  iXLen); + +define void @intrinsic_vsuxei_mask_v_nxv2f16_nxv2f16_nxv2i32(<vscale x 2 x half> %0, ptr %1, <vscale x 2 x i32> %2, <vscale x 2 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vsuxei_mask_v_nxv2f16_nxv2f16_nxv2i32: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e16, mf2, ta, ma +; CHECK-NEXT:    vsuxei32.v v8, (a0), v9, v0.t +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsuxei.mask.nxv2f16.nxv2i32( +    <vscale x 2 x half> %0, +    ptr %1, +    <vscale x 2 x i32> %2, +    <vscale x 2 x i1> %3, +    iXLen %4) + +  ret void +} + +declare void @llvm.riscv.vsuxei.nxv4f16.nxv4i32( +  <vscale x 4 x half>, +  ptr, +  <vscale x 4 x i32>, +  iXLen); + +define void @intrinsic_vsuxei_v_nxv4f16_nxv4f16_nxv4i32(<vscale x 4 x half> %0, ptr %1, <vscale x 4 x i32> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vsuxei_v_nxv4f16_nxv4f16_nxv4i32: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e16, m1, ta, ma +; CHECK-NEXT:    vsuxei32.v v8, (a0), v10 +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsuxei.nxv4f16.nxv4i32( +    <vscale x 4 x half> %0, +    ptr %1, +    <vscale x 4 x i32> %2, +    iXLen %3) + +  ret void +} + +declare void @llvm.riscv.vsuxei.mask.nxv4f16.nxv4i32( +  <vscale x 4 x half>, +  ptr, +  <vscale x 4 x i32>, +  <vscale x 4 x i1>, +  iXLen); + +define void @intrinsic_vsuxei_mask_v_nxv4f16_nxv4f16_nxv4i32(<vscale x 4 x half> %0, ptr %1, <vscale x 4 x i32> %2, <vscale x 4 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vsuxei_mask_v_nxv4f16_nxv4f16_nxv4i32: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e16, m1, ta, ma +; CHECK-NEXT:    vsuxei32.v v8, (a0), v10, v0.t +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsuxei.mask.nxv4f16.nxv4i32( +    <vscale x 4 x half> %0, +    ptr %1, +    <vscale x 4 x i32> %2, +    <vscale x 4 x i1> %3, +    iXLen %4) + +  ret void +} + +declare void @llvm.riscv.vsuxei.nxv8f16.nxv8i32( +  <vscale x 8 x half>, +  ptr, +  <vscale x 8 x i32>, +  iXLen); + +define void @intrinsic_vsuxei_v_nxv8f16_nxv8f16_nxv8i32(<vscale x 8 x half> %0, ptr %1, <vscale x 8 x i32> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vsuxei_v_nxv8f16_nxv8f16_nxv8i32: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e16, m2, ta, ma +; CHECK-NEXT:    vsuxei32.v v8, (a0), v12 +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsuxei.nxv8f16.nxv8i32( +    <vscale x 8 x half> %0, +    ptr %1, +    <vscale x 8 x i32> %2, +    iXLen %3) + +  ret void +} + +declare void @llvm.riscv.vsuxei.mask.nxv8f16.nxv8i32( +  <vscale x 8 x half>, +  ptr, +  <vscale x 8 x i32>, +  <vscale x 8 x i1>, +  iXLen); + +define void @intrinsic_vsuxei_mask_v_nxv8f16_nxv8f16_nxv8i32(<vscale x 8 x half> %0, ptr %1, <vscale x 8 x i32> %2, <vscale x 8 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vsuxei_mask_v_nxv8f16_nxv8f16_nxv8i32: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e16, m2, ta, ma +; CHECK-NEXT:    vsuxei32.v v8, (a0), v12, v0.t +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsuxei.mask.nxv8f16.nxv8i32( +    <vscale x 8 x half> %0, +    ptr %1, +    <vscale x 8 x i32> %2, +    <vscale x 8 x i1> %3, +    iXLen %4) + +  ret void +} + +declare void @llvm.riscv.vsuxei.nxv16f16.nxv16i32( +  <vscale x 16 x half>, +  ptr, +  <vscale x 16 x i32>, +  iXLen); + +define void @intrinsic_vsuxei_v_nxv16f16_nxv16f16_nxv16i32(<vscale x 16 x half> %0, ptr %1, <vscale x 16 x i32> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vsuxei_v_nxv16f16_nxv16f16_nxv16i32: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e16, m4, ta, ma +; CHECK-NEXT:    vsuxei32.v v8, (a0), v16 +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsuxei.nxv16f16.nxv16i32( +    <vscale x 16 x half> %0, +    ptr %1, +    <vscale x 16 x i32> %2, +    iXLen %3) + +  ret void +} + +declare void @llvm.riscv.vsuxei.mask.nxv16f16.nxv16i32( +  <vscale x 16 x half>, +  ptr, +  <vscale x 16 x i32>, +  <vscale x 16 x i1>, +  iXLen); + +define void @intrinsic_vsuxei_mask_v_nxv16f16_nxv16f16_nxv16i32(<vscale x 16 x half> %0, ptr %1, <vscale x 16 x i32> %2, <vscale x 16 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vsuxei_mask_v_nxv16f16_nxv16f16_nxv16i32: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e16, m4, ta, ma +; CHECK-NEXT:    vsuxei32.v v8, (a0), v16, v0.t +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsuxei.mask.nxv16f16.nxv16i32( +    <vscale x 16 x half> %0, +    ptr %1, +    <vscale x 16 x i32> %2, +    <vscale x 16 x i1> %3, +    iXLen %4) + +  ret void +} + +declare void @llvm.riscv.vsuxei.nxv1f32.nxv1i32( +  <vscale x 1 x float>, +  ptr, +  <vscale x 1 x i32>, +  iXLen); + +define void @intrinsic_vsuxei_v_nxv1f32_nxv1f32_nxv1i32(<vscale x 1 x float> %0, ptr %1, <vscale x 1 x i32> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vsuxei_v_nxv1f32_nxv1f32_nxv1i32: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e32, mf2, ta, ma +; CHECK-NEXT:    vsuxei32.v v8, (a0), v9 +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsuxei.nxv1f32.nxv1i32( +    <vscale x 1 x float> %0, +    ptr %1, +    <vscale x 1 x i32> %2, +    iXLen %3) + +  ret void +} + +declare void @llvm.riscv.vsuxei.mask.nxv1f32.nxv1i32( +  <vscale x 1 x float>, +  ptr, +  <vscale x 1 x i32>, +  <vscale x 1 x i1>, +  iXLen); + +define void @intrinsic_vsuxei_mask_v_nxv1f32_nxv1f32_nxv1i32(<vscale x 1 x float> %0, ptr %1, <vscale x 1 x i32> %2, <vscale x 1 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vsuxei_mask_v_nxv1f32_nxv1f32_nxv1i32: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e32, mf2, ta, ma +; CHECK-NEXT:    vsuxei32.v v8, (a0), v9, v0.t +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsuxei.mask.nxv1f32.nxv1i32( +    <vscale x 1 x float> %0, +    ptr %1, +    <vscale x 1 x i32> %2, +    <vscale x 1 x i1> %3, +    iXLen %4) + +  ret void +} + +declare void @llvm.riscv.vsuxei.nxv2f32.nxv2i32( +  <vscale x 2 x float>, +  ptr, +  <vscale x 2 x i32>, +  iXLen); + +define void @intrinsic_vsuxei_v_nxv2f32_nxv2f32_nxv2i32(<vscale x 2 x float> %0, ptr %1, <vscale x 2 x i32> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vsuxei_v_nxv2f32_nxv2f32_nxv2i32: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e32, m1, ta, ma +; CHECK-NEXT:    vsuxei32.v v8, (a0), v9 +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsuxei.nxv2f32.nxv2i32( +    <vscale x 2 x float> %0, +    ptr %1, +    <vscale x 2 x i32> %2, +    iXLen %3) + +  ret void +} + +declare void @llvm.riscv.vsuxei.mask.nxv2f32.nxv2i32( +  <vscale x 2 x float>, +  ptr, +  <vscale x 2 x i32>, +  <vscale x 2 x i1>, +  iXLen); + +define void @intrinsic_vsuxei_mask_v_nxv2f32_nxv2f32_nxv2i32(<vscale x 2 x float> %0, ptr %1, <vscale x 2 x i32> %2, <vscale x 2 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vsuxei_mask_v_nxv2f32_nxv2f32_nxv2i32: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e32, m1, ta, ma +; CHECK-NEXT:    vsuxei32.v v8, (a0), v9, v0.t +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsuxei.mask.nxv2f32.nxv2i32( +    <vscale x 2 x float> %0, +    ptr %1, +    <vscale x 2 x i32> %2, +    <vscale x 2 x i1> %3, +    iXLen %4) + +  ret void +} + +declare void @llvm.riscv.vsuxei.nxv4f32.nxv4i32( +  <vscale x 4 x float>, +  ptr, +  <vscale x 4 x i32>, +  iXLen); + +define void @intrinsic_vsuxei_v_nxv4f32_nxv4f32_nxv4i32(<vscale x 4 x float> %0, ptr %1, <vscale x 4 x i32> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vsuxei_v_nxv4f32_nxv4f32_nxv4i32: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e32, m2, ta, ma +; CHECK-NEXT:    vsuxei32.v v8, (a0), v10 +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsuxei.nxv4f32.nxv4i32( +    <vscale x 4 x float> %0, +    ptr %1, +    <vscale x 4 x i32> %2, +    iXLen %3) + +  ret void +} + +declare void @llvm.riscv.vsuxei.mask.nxv4f32.nxv4i32( +  <vscale x 4 x float>, +  ptr, +  <vscale x 4 x i32>, +  <vscale x 4 x i1>, +  iXLen); + +define void @intrinsic_vsuxei_mask_v_nxv4f32_nxv4f32_nxv4i32(<vscale x 4 x float> %0, ptr %1, <vscale x 4 x i32> %2, <vscale x 4 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vsuxei_mask_v_nxv4f32_nxv4f32_nxv4i32: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e32, m2, ta, ma +; CHECK-NEXT:    vsuxei32.v v8, (a0), v10, v0.t +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsuxei.mask.nxv4f32.nxv4i32( +    <vscale x 4 x float> %0, +    ptr %1, +    <vscale x 4 x i32> %2, +    <vscale x 4 x i1> %3, +    iXLen %4) + +  ret void +} + +declare void @llvm.riscv.vsuxei.nxv8f32.nxv8i32( +  <vscale x 8 x float>, +  ptr, +  <vscale x 8 x i32>, +  iXLen); + +define void @intrinsic_vsuxei_v_nxv8f32_nxv8f32_nxv8i32(<vscale x 8 x float> %0, ptr %1, <vscale x 8 x i32> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vsuxei_v_nxv8f32_nxv8f32_nxv8i32: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e32, m4, ta, ma +; CHECK-NEXT:    vsuxei32.v v8, (a0), v12 +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsuxei.nxv8f32.nxv8i32( +    <vscale x 8 x float> %0, +    ptr %1, +    <vscale x 8 x i32> %2, +    iXLen %3) + +  ret void +} + +declare void @llvm.riscv.vsuxei.mask.nxv8f32.nxv8i32( +  <vscale x 8 x float>, +  ptr, +  <vscale x 8 x i32>, +  <vscale x 8 x i1>, +  iXLen); + +define void @intrinsic_vsuxei_mask_v_nxv8f32_nxv8f32_nxv8i32(<vscale x 8 x float> %0, ptr %1, <vscale x 8 x i32> %2, <vscale x 8 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vsuxei_mask_v_nxv8f32_nxv8f32_nxv8i32: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e32, m4, ta, ma +; CHECK-NEXT:    vsuxei32.v v8, (a0), v12, v0.t +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsuxei.mask.nxv8f32.nxv8i32( +    <vscale x 8 x float> %0, +    ptr %1, +    <vscale x 8 x i32> %2, +    <vscale x 8 x i1> %3, +    iXLen %4) + +  ret void +} + +declare void @llvm.riscv.vsuxei.nxv16f32.nxv16i32( +  <vscale x 16 x float>, +  ptr, +  <vscale x 16 x i32>, +  iXLen); + +define void @intrinsic_vsuxei_v_nxv16f32_nxv16f32_nxv16i32(<vscale x 16 x float> %0, ptr %1, <vscale x 16 x i32> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vsuxei_v_nxv16f32_nxv16f32_nxv16i32: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e32, m8, ta, ma +; CHECK-NEXT:    vsuxei32.v v8, (a0), v16 +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsuxei.nxv16f32.nxv16i32( +    <vscale x 16 x float> %0, +    ptr %1, +    <vscale x 16 x i32> %2, +    iXLen %3) + +  ret void +} + +declare void @llvm.riscv.vsuxei.mask.nxv16f32.nxv16i32( +  <vscale x 16 x float>, +  ptr, +  <vscale x 16 x i32>, +  <vscale x 16 x i1>, +  iXLen); + +define void @intrinsic_vsuxei_mask_v_nxv16f32_nxv16f32_nxv16i32(<vscale x 16 x float> %0, ptr %1, <vscale x 16 x i32> %2, <vscale x 16 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vsuxei_mask_v_nxv16f32_nxv16f32_nxv16i32: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e32, m8, ta, ma +; CHECK-NEXT:    vsuxei32.v v8, (a0), v16, v0.t +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsuxei.mask.nxv16f32.nxv16i32( +    <vscale x 16 x float> %0, +    ptr %1, +    <vscale x 16 x i32> %2, +    <vscale x 16 x i1> %3, +    iXLen %4) + +  ret void +} + +declare void @llvm.riscv.vsuxei.nxv1f64.nxv1i32( +  <vscale x 1 x double>, +  ptr, +  <vscale x 1 x i32>, +  iXLen); + +define void @intrinsic_vsuxei_v_nxv1f64_nxv1f64_nxv1i32(<vscale x 1 x double> %0, ptr %1, <vscale x 1 x i32> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vsuxei_v_nxv1f64_nxv1f64_nxv1i32: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e64, m1, ta, ma +; CHECK-NEXT:    vsuxei32.v v8, (a0), v9 +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsuxei.nxv1f64.nxv1i32( +    <vscale x 1 x double> %0, +    ptr %1, +    <vscale x 1 x i32> %2, +    iXLen %3) + +  ret void +} + +declare void @llvm.riscv.vsuxei.mask.nxv1f64.nxv1i32( +  <vscale x 1 x double>, +  ptr, +  <vscale x 1 x i32>, +  <vscale x 1 x i1>, +  iXLen); + +define void @intrinsic_vsuxei_mask_v_nxv1f64_nxv1f64_nxv1i32(<vscale x 1 x double> %0, ptr %1, <vscale x 1 x i32> %2, <vscale x 1 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vsuxei_mask_v_nxv1f64_nxv1f64_nxv1i32: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e64, m1, ta, ma +; CHECK-NEXT:    vsuxei32.v v8, (a0), v9, v0.t +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsuxei.mask.nxv1f64.nxv1i32( +    <vscale x 1 x double> %0, +    ptr %1, +    <vscale x 1 x i32> %2, +    <vscale x 1 x i1> %3, +    iXLen %4) + +  ret void +} + +declare void @llvm.riscv.vsuxei.nxv2f64.nxv2i32( +  <vscale x 2 x double>, +  ptr, +  <vscale x 2 x i32>, +  iXLen); + +define void @intrinsic_vsuxei_v_nxv2f64_nxv2f64_nxv2i32(<vscale x 2 x double> %0, ptr %1, <vscale x 2 x i32> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vsuxei_v_nxv2f64_nxv2f64_nxv2i32: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e64, m2, ta, ma +; CHECK-NEXT:    vsuxei32.v v8, (a0), v10 +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsuxei.nxv2f64.nxv2i32( +    <vscale x 2 x double> %0, +    ptr %1, +    <vscale x 2 x i32> %2, +    iXLen %3) + +  ret void +} + +declare void @llvm.riscv.vsuxei.mask.nxv2f64.nxv2i32( +  <vscale x 2 x double>, +  ptr, +  <vscale x 2 x i32>, +  <vscale x 2 x i1>, +  iXLen); + +define void @intrinsic_vsuxei_mask_v_nxv2f64_nxv2f64_nxv2i32(<vscale x 2 x double> %0, ptr %1, <vscale x 2 x i32> %2, <vscale x 2 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vsuxei_mask_v_nxv2f64_nxv2f64_nxv2i32: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e64, m2, ta, ma +; CHECK-NEXT:    vsuxei32.v v8, (a0), v10, v0.t +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsuxei.mask.nxv2f64.nxv2i32( +    <vscale x 2 x double> %0, +    ptr %1, +    <vscale x 2 x i32> %2, +    <vscale x 2 x i1> %3, +    iXLen %4) + +  ret void +} + +declare void @llvm.riscv.vsuxei.nxv4f64.nxv4i32( +  <vscale x 4 x double>, +  ptr, +  <vscale x 4 x i32>, +  iXLen); + +define void @intrinsic_vsuxei_v_nxv4f64_nxv4f64_nxv4i32(<vscale x 4 x double> %0, ptr %1, <vscale x 4 x i32> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vsuxei_v_nxv4f64_nxv4f64_nxv4i32: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e64, m4, ta, ma +; CHECK-NEXT:    vsuxei32.v v8, (a0), v12 +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsuxei.nxv4f64.nxv4i32( +    <vscale x 4 x double> %0, +    ptr %1, +    <vscale x 4 x i32> %2, +    iXLen %3) + +  ret void +} + +declare void @llvm.riscv.vsuxei.mask.nxv4f64.nxv4i32( +  <vscale x 4 x double>, +  ptr, +  <vscale x 4 x i32>, +  <vscale x 4 x i1>, +  iXLen); + +define void @intrinsic_vsuxei_mask_v_nxv4f64_nxv4f64_nxv4i32(<vscale x 4 x double> %0, ptr %1, <vscale x 4 x i32> %2, <vscale x 4 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vsuxei_mask_v_nxv4f64_nxv4f64_nxv4i32: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e64, m4, ta, ma +; CHECK-NEXT:    vsuxei32.v v8, (a0), v12, v0.t +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsuxei.mask.nxv4f64.nxv4i32( +    <vscale x 4 x double> %0, +    ptr %1, +    <vscale x 4 x i32> %2, +    <vscale x 4 x i1> %3, +    iXLen %4) + +  ret void +} + +declare void @llvm.riscv.vsuxei.nxv8f64.nxv8i32( +  <vscale x 8 x double>, +  ptr, +  <vscale x 8 x i32>, +  iXLen); + +define void @intrinsic_vsuxei_v_nxv8f64_nxv8f64_nxv8i32(<vscale x 8 x double> %0, ptr %1, <vscale x 8 x i32> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vsuxei_v_nxv8f64_nxv8f64_nxv8i32: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e64, m8, ta, ma +; CHECK-NEXT:    vsuxei32.v v8, (a0), v16 +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsuxei.nxv8f64.nxv8i32( +    <vscale x 8 x double> %0, +    ptr %1, +    <vscale x 8 x i32> %2, +    iXLen %3) + +  ret void +} + +declare void @llvm.riscv.vsuxei.mask.nxv8f64.nxv8i32( +  <vscale x 8 x double>, +  ptr, +  <vscale x 8 x i32>, +  <vscale x 8 x i1>, +  iXLen); + +define void @intrinsic_vsuxei_mask_v_nxv8f64_nxv8f64_nxv8i32(<vscale x 8 x double> %0, ptr %1, <vscale x 8 x i32> %2, <vscale x 8 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vsuxei_mask_v_nxv8f64_nxv8f64_nxv8i32: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e64, m8, ta, ma +; CHECK-NEXT:    vsuxei32.v v8, (a0), v16, v0.t +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsuxei.mask.nxv8f64.nxv8i32( +    <vscale x 8 x double> %0, +    ptr %1, +    <vscale x 8 x i32> %2, +    <vscale x 8 x i1> %3, +    iXLen %4) + +  ret void +} + +declare void @llvm.riscv.vsuxei.nxv1i8.nxv1i16( +  <vscale x 1 x i8>, +  ptr, +  <vscale x 1 x i16>, +  iXLen); + +define void @intrinsic_vsuxei_v_nxv1i8_nxv1i8_nxv1i16(<vscale x 1 x i8> %0, ptr %1, <vscale x 1 x i16> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vsuxei_v_nxv1i8_nxv1i8_nxv1i16: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e8, mf8, ta, ma +; CHECK-NEXT:    vsuxei16.v v8, (a0), v9 +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsuxei.nxv1i8.nxv1i16( +    <vscale x 1 x i8> %0, +    ptr %1, +    <vscale x 1 x i16> %2, +    iXLen %3) + +  ret void +} + +declare void @llvm.riscv.vsuxei.mask.nxv1i8.nxv1i16( +  <vscale x 1 x i8>, +  ptr, +  <vscale x 1 x i16>, +  <vscale x 1 x i1>, +  iXLen); + +define void @intrinsic_vsuxei_mask_v_nxv1i8_nxv1i8_nxv1i16(<vscale x 1 x i8> %0, ptr %1, <vscale x 1 x i16> %2, <vscale x 1 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vsuxei_mask_v_nxv1i8_nxv1i8_nxv1i16: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e8, mf8, ta, ma +; CHECK-NEXT:    vsuxei16.v v8, (a0), v9, v0.t +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsuxei.mask.nxv1i8.nxv1i16( +    <vscale x 1 x i8> %0, +    ptr %1, +    <vscale x 1 x i16> %2, +    <vscale x 1 x i1> %3, +    iXLen %4) + +  ret void +} + +declare void @llvm.riscv.vsuxei.nxv2i8.nxv2i16( +  <vscale x 2 x i8>, +  ptr, +  <vscale x 2 x i16>, +  iXLen); + +define void @intrinsic_vsuxei_v_nxv2i8_nxv2i8_nxv2i16(<vscale x 2 x i8> %0, ptr %1, <vscale x 2 x i16> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vsuxei_v_nxv2i8_nxv2i8_nxv2i16: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e8, mf4, ta, ma +; CHECK-NEXT:    vsuxei16.v v8, (a0), v9 +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsuxei.nxv2i8.nxv2i16( +    <vscale x 2 x i8> %0, +    ptr %1, +    <vscale x 2 x i16> %2, +    iXLen %3) + +  ret void +} + +declare void @llvm.riscv.vsuxei.mask.nxv2i8.nxv2i16( +  <vscale x 2 x i8>, +  ptr, +  <vscale x 2 x i16>, +  <vscale x 2 x i1>, +  iXLen); + +define void @intrinsic_vsuxei_mask_v_nxv2i8_nxv2i8_nxv2i16(<vscale x 2 x i8> %0, ptr %1, <vscale x 2 x i16> %2, <vscale x 2 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vsuxei_mask_v_nxv2i8_nxv2i8_nxv2i16: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e8, mf4, ta, ma +; CHECK-NEXT:    vsuxei16.v v8, (a0), v9, v0.t +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsuxei.mask.nxv2i8.nxv2i16( +    <vscale x 2 x i8> %0, +    ptr %1, +    <vscale x 2 x i16> %2, +    <vscale x 2 x i1> %3, +    iXLen %4) + +  ret void +} + +declare void @llvm.riscv.vsuxei.nxv4i8.nxv4i16( +  <vscale x 4 x i8>, +  ptr, +  <vscale x 4 x i16>, +  iXLen); + +define void @intrinsic_vsuxei_v_nxv4i8_nxv4i8_nxv4i16(<vscale x 4 x i8> %0, ptr %1, <vscale x 4 x i16> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vsuxei_v_nxv4i8_nxv4i8_nxv4i16: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e8, mf2, ta, ma +; CHECK-NEXT:    vsuxei16.v v8, (a0), v9 +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsuxei.nxv4i8.nxv4i16( +    <vscale x 4 x i8> %0, +    ptr %1, +    <vscale x 4 x i16> %2, +    iXLen %3) + +  ret void +} + +declare void @llvm.riscv.vsuxei.mask.nxv4i8.nxv4i16( +  <vscale x 4 x i8>, +  ptr, +  <vscale x 4 x i16>, +  <vscale x 4 x i1>, +  iXLen); + +define void @intrinsic_vsuxei_mask_v_nxv4i8_nxv4i8_nxv4i16(<vscale x 4 x i8> %0, ptr %1, <vscale x 4 x i16> %2, <vscale x 4 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vsuxei_mask_v_nxv4i8_nxv4i8_nxv4i16: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e8, mf2, ta, ma +; CHECK-NEXT:    vsuxei16.v v8, (a0), v9, v0.t +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsuxei.mask.nxv4i8.nxv4i16( +    <vscale x 4 x i8> %0, +    ptr %1, +    <vscale x 4 x i16> %2, +    <vscale x 4 x i1> %3, +    iXLen %4) + +  ret void +} + +declare void @llvm.riscv.vsuxei.nxv8i8.nxv8i16( +  <vscale x 8 x i8>, +  ptr, +  <vscale x 8 x i16>, +  iXLen); + +define void @intrinsic_vsuxei_v_nxv8i8_nxv8i8_nxv8i16(<vscale x 8 x i8> %0, ptr %1, <vscale x 8 x i16> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vsuxei_v_nxv8i8_nxv8i8_nxv8i16: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e8, m1, ta, ma +; CHECK-NEXT:    vsuxei16.v v8, (a0), v10 +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsuxei.nxv8i8.nxv8i16( +    <vscale x 8 x i8> %0, +    ptr %1, +    <vscale x 8 x i16> %2, +    iXLen %3) + +  ret void +} + +declare void @llvm.riscv.vsuxei.mask.nxv8i8.nxv8i16( +  <vscale x 8 x i8>, +  ptr, +  <vscale x 8 x i16>, +  <vscale x 8 x i1>, +  iXLen); + +define void @intrinsic_vsuxei_mask_v_nxv8i8_nxv8i8_nxv8i16(<vscale x 8 x i8> %0, ptr %1, <vscale x 8 x i16> %2, <vscale x 8 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vsuxei_mask_v_nxv8i8_nxv8i8_nxv8i16: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e8, m1, ta, ma +; CHECK-NEXT:    vsuxei16.v v8, (a0), v10, v0.t +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsuxei.mask.nxv8i8.nxv8i16( +    <vscale x 8 x i8> %0, +    ptr %1, +    <vscale x 8 x i16> %2, +    <vscale x 8 x i1> %3, +    iXLen %4) + +  ret void +} + +declare void @llvm.riscv.vsuxei.nxv16i8.nxv16i16( +  <vscale x 16 x i8>, +  ptr, +  <vscale x 16 x i16>, +  iXLen); + +define void @intrinsic_vsuxei_v_nxv16i8_nxv16i8_nxv16i16(<vscale x 16 x i8> %0, ptr %1, <vscale x 16 x i16> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vsuxei_v_nxv16i8_nxv16i8_nxv16i16: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e8, m2, ta, ma +; CHECK-NEXT:    vsuxei16.v v8, (a0), v12 +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsuxei.nxv16i8.nxv16i16( +    <vscale x 16 x i8> %0, +    ptr %1, +    <vscale x 16 x i16> %2, +    iXLen %3) + +  ret void +} + +declare void @llvm.riscv.vsuxei.mask.nxv16i8.nxv16i16( +  <vscale x 16 x i8>, +  ptr, +  <vscale x 16 x i16>, +  <vscale x 16 x i1>, +  iXLen); + +define void @intrinsic_vsuxei_mask_v_nxv16i8_nxv16i8_nxv16i16(<vscale x 16 x i8> %0, ptr %1, <vscale x 16 x i16> %2, <vscale x 16 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vsuxei_mask_v_nxv16i8_nxv16i8_nxv16i16: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e8, m2, ta, ma +; CHECK-NEXT:    vsuxei16.v v8, (a0), v12, v0.t +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsuxei.mask.nxv16i8.nxv16i16( +    <vscale x 16 x i8> %0, +    ptr %1, +    <vscale x 16 x i16> %2, +    <vscale x 16 x i1> %3, +    iXLen %4) + +  ret void +} + +declare void @llvm.riscv.vsuxei.nxv32i8.nxv32i16( +  <vscale x 32 x i8>, +  ptr, +  <vscale x 32 x i16>, +  iXLen); + +define void @intrinsic_vsuxei_v_nxv32i8_nxv32i8_nxv32i16(<vscale x 32 x i8> %0, ptr %1, <vscale x 32 x i16> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vsuxei_v_nxv32i8_nxv32i8_nxv32i16: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e8, m4, ta, ma +; CHECK-NEXT:    vsuxei16.v v8, (a0), v16 +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsuxei.nxv32i8.nxv32i16( +    <vscale x 32 x i8> %0, +    ptr %1, +    <vscale x 32 x i16> %2, +    iXLen %3) + +  ret void +} + +declare void @llvm.riscv.vsuxei.mask.nxv32i8.nxv32i16( +  <vscale x 32 x i8>, +  ptr, +  <vscale x 32 x i16>, +  <vscale x 32 x i1>, +  iXLen); + +define void @intrinsic_vsuxei_mask_v_nxv32i8_nxv32i8_nxv32i16(<vscale x 32 x i8> %0, ptr %1, <vscale x 32 x i16> %2, <vscale x 32 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vsuxei_mask_v_nxv32i8_nxv32i8_nxv32i16: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e8, m4, ta, ma +; CHECK-NEXT:    vsuxei16.v v8, (a0), v16, v0.t +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsuxei.mask.nxv32i8.nxv32i16( +    <vscale x 32 x i8> %0, +    ptr %1, +    <vscale x 32 x i16> %2, +    <vscale x 32 x i1> %3, +    iXLen %4) + +  ret void +} + +declare void @llvm.riscv.vsuxei.nxv1i16.nxv1i16( +  <vscale x 1 x i16>, +  ptr, +  <vscale x 1 x i16>, +  iXLen); + +define void @intrinsic_vsuxei_v_nxv1i16_nxv1i16_nxv1i16(<vscale x 1 x i16> %0, ptr %1, <vscale x 1 x i16> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vsuxei_v_nxv1i16_nxv1i16_nxv1i16: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e16, mf4, ta, ma +; CHECK-NEXT:    vsuxei16.v v8, (a0), v9 +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsuxei.nxv1i16.nxv1i16( +    <vscale x 1 x i16> %0, +    ptr %1, +    <vscale x 1 x i16> %2, +    iXLen %3) + +  ret void +} + +declare void @llvm.riscv.vsuxei.mask.nxv1i16.nxv1i16( +  <vscale x 1 x i16>, +  ptr, +  <vscale x 1 x i16>, +  <vscale x 1 x i1>, +  iXLen); + +define void @intrinsic_vsuxei_mask_v_nxv1i16_nxv1i16_nxv1i16(<vscale x 1 x i16> %0, ptr %1, <vscale x 1 x i16> %2, <vscale x 1 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vsuxei_mask_v_nxv1i16_nxv1i16_nxv1i16: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e16, mf4, ta, ma +; CHECK-NEXT:    vsuxei16.v v8, (a0), v9, v0.t +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsuxei.mask.nxv1i16.nxv1i16( +    <vscale x 1 x i16> %0, +    ptr %1, +    <vscale x 1 x i16> %2, +    <vscale x 1 x i1> %3, +    iXLen %4) + +  ret void +} + +declare void @llvm.riscv.vsuxei.nxv2i16.nxv2i16( +  <vscale x 2 x i16>, +  ptr, +  <vscale x 2 x i16>, +  iXLen); + +define void @intrinsic_vsuxei_v_nxv2i16_nxv2i16_nxv2i16(<vscale x 2 x i16> %0, ptr %1, <vscale x 2 x i16> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vsuxei_v_nxv2i16_nxv2i16_nxv2i16: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e16, mf2, ta, ma +; CHECK-NEXT:    vsuxei16.v v8, (a0), v9 +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsuxei.nxv2i16.nxv2i16( +    <vscale x 2 x i16> %0, +    ptr %1, +    <vscale x 2 x i16> %2, +    iXLen %3) + +  ret void +} + +declare void @llvm.riscv.vsuxei.mask.nxv2i16.nxv2i16( +  <vscale x 2 x i16>, +  ptr, +  <vscale x 2 x i16>, +  <vscale x 2 x i1>, +  iXLen); + +define void @intrinsic_vsuxei_mask_v_nxv2i16_nxv2i16_nxv2i16(<vscale x 2 x i16> %0, ptr %1, <vscale x 2 x i16> %2, <vscale x 2 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vsuxei_mask_v_nxv2i16_nxv2i16_nxv2i16: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e16, mf2, ta, ma +; CHECK-NEXT:    vsuxei16.v v8, (a0), v9, v0.t +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsuxei.mask.nxv2i16.nxv2i16( +    <vscale x 2 x i16> %0, +    ptr %1, +    <vscale x 2 x i16> %2, +    <vscale x 2 x i1> %3, +    iXLen %4) + +  ret void +} + +declare void @llvm.riscv.vsuxei.nxv4i16.nxv4i16( +  <vscale x 4 x i16>, +  ptr, +  <vscale x 4 x i16>, +  iXLen); + +define void @intrinsic_vsuxei_v_nxv4i16_nxv4i16_nxv4i16(<vscale x 4 x i16> %0, ptr %1, <vscale x 4 x i16> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vsuxei_v_nxv4i16_nxv4i16_nxv4i16: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e16, m1, ta, ma +; CHECK-NEXT:    vsuxei16.v v8, (a0), v9 +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsuxei.nxv4i16.nxv4i16( +    <vscale x 4 x i16> %0, +    ptr %1, +    <vscale x 4 x i16> %2, +    iXLen %3) + +  ret void +} + +declare void @llvm.riscv.vsuxei.mask.nxv4i16.nxv4i16( +  <vscale x 4 x i16>, +  ptr, +  <vscale x 4 x i16>, +  <vscale x 4 x i1>, +  iXLen); + +define void @intrinsic_vsuxei_mask_v_nxv4i16_nxv4i16_nxv4i16(<vscale x 4 x i16> %0, ptr %1, <vscale x 4 x i16> %2, <vscale x 4 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vsuxei_mask_v_nxv4i16_nxv4i16_nxv4i16: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e16, m1, ta, ma +; CHECK-NEXT:    vsuxei16.v v8, (a0), v9, v0.t +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsuxei.mask.nxv4i16.nxv4i16( +    <vscale x 4 x i16> %0, +    ptr %1, +    <vscale x 4 x i16> %2, +    <vscale x 4 x i1> %3, +    iXLen %4) + +  ret void +} + +declare void @llvm.riscv.vsuxei.nxv8i16.nxv8i16( +  <vscale x 8 x i16>, +  ptr, +  <vscale x 8 x i16>, +  iXLen); + +define void @intrinsic_vsuxei_v_nxv8i16_nxv8i16_nxv8i16(<vscale x 8 x i16> %0, ptr %1, <vscale x 8 x i16> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vsuxei_v_nxv8i16_nxv8i16_nxv8i16: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e16, m2, ta, ma +; CHECK-NEXT:    vsuxei16.v v8, (a0), v10 +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsuxei.nxv8i16.nxv8i16( +    <vscale x 8 x i16> %0, +    ptr %1, +    <vscale x 8 x i16> %2, +    iXLen %3) + +  ret void +} + +declare void @llvm.riscv.vsuxei.mask.nxv8i16.nxv8i16( +  <vscale x 8 x i16>, +  ptr, +  <vscale x 8 x i16>, +  <vscale x 8 x i1>, +  iXLen); + +define void @intrinsic_vsuxei_mask_v_nxv8i16_nxv8i16_nxv8i16(<vscale x 8 x i16> %0, ptr %1, <vscale x 8 x i16> %2, <vscale x 8 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vsuxei_mask_v_nxv8i16_nxv8i16_nxv8i16: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e16, m2, ta, ma +; CHECK-NEXT:    vsuxei16.v v8, (a0), v10, v0.t +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsuxei.mask.nxv8i16.nxv8i16( +    <vscale x 8 x i16> %0, +    ptr %1, +    <vscale x 8 x i16> %2, +    <vscale x 8 x i1> %3, +    iXLen %4) + +  ret void +} + +declare void @llvm.riscv.vsuxei.nxv16i16.nxv16i16( +  <vscale x 16 x i16>, +  ptr, +  <vscale x 16 x i16>, +  iXLen); + +define void @intrinsic_vsuxei_v_nxv16i16_nxv16i16_nxv16i16(<vscale x 16 x i16> %0, ptr %1, <vscale x 16 x i16> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vsuxei_v_nxv16i16_nxv16i16_nxv16i16: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e16, m4, ta, ma +; CHECK-NEXT:    vsuxei16.v v8, (a0), v12 +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsuxei.nxv16i16.nxv16i16( +    <vscale x 16 x i16> %0, +    ptr %1, +    <vscale x 16 x i16> %2, +    iXLen %3) + +  ret void +} + +declare void @llvm.riscv.vsuxei.mask.nxv16i16.nxv16i16( +  <vscale x 16 x i16>, +  ptr, +  <vscale x 16 x i16>, +  <vscale x 16 x i1>, +  iXLen); + +define void @intrinsic_vsuxei_mask_v_nxv16i16_nxv16i16_nxv16i16(<vscale x 16 x i16> %0, ptr %1, <vscale x 16 x i16> %2, <vscale x 16 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vsuxei_mask_v_nxv16i16_nxv16i16_nxv16i16: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e16, m4, ta, ma +; CHECK-NEXT:    vsuxei16.v v8, (a0), v12, v0.t +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsuxei.mask.nxv16i16.nxv16i16( +    <vscale x 16 x i16> %0, +    ptr %1, +    <vscale x 16 x i16> %2, +    <vscale x 16 x i1> %3, +    iXLen %4) + +  ret void +} + +declare void @llvm.riscv.vsuxei.nxv32i16.nxv32i16( +  <vscale x 32 x i16>, +  ptr, +  <vscale x 32 x i16>, +  iXLen); + +define void @intrinsic_vsuxei_v_nxv32i16_nxv32i16_nxv32i16(<vscale x 32 x i16> %0, ptr %1, <vscale x 32 x i16> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vsuxei_v_nxv32i16_nxv32i16_nxv32i16: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e16, m8, ta, ma +; CHECK-NEXT:    vsuxei16.v v8, (a0), v16 +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsuxei.nxv32i16.nxv32i16( +    <vscale x 32 x i16> %0, +    ptr %1, +    <vscale x 32 x i16> %2, +    iXLen %3) + +  ret void +} + +declare void @llvm.riscv.vsuxei.mask.nxv32i16.nxv32i16( +  <vscale x 32 x i16>, +  ptr, +  <vscale x 32 x i16>, +  <vscale x 32 x i1>, +  iXLen); + +define void @intrinsic_vsuxei_mask_v_nxv32i16_nxv32i16_nxv32i16(<vscale x 32 x i16> %0, ptr %1, <vscale x 32 x i16> %2, <vscale x 32 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vsuxei_mask_v_nxv32i16_nxv32i16_nxv32i16: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e16, m8, ta, ma +; CHECK-NEXT:    vsuxei16.v v8, (a0), v16, v0.t +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsuxei.mask.nxv32i16.nxv32i16( +    <vscale x 32 x i16> %0, +    ptr %1, +    <vscale x 32 x i16> %2, +    <vscale x 32 x i1> %3, +    iXLen %4) + +  ret void +} + +declare void @llvm.riscv.vsuxei.nxv1i32.nxv1i16( +  <vscale x 1 x i32>, +  ptr, +  <vscale x 1 x i16>, +  iXLen); + +define void @intrinsic_vsuxei_v_nxv1i32_nxv1i32_nxv1i16(<vscale x 1 x i32> %0, ptr %1, <vscale x 1 x i16> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vsuxei_v_nxv1i32_nxv1i32_nxv1i16: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e32, mf2, ta, ma +; CHECK-NEXT:    vsuxei16.v v8, (a0), v9 +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsuxei.nxv1i32.nxv1i16( +    <vscale x 1 x i32> %0, +    ptr %1, +    <vscale x 1 x i16> %2, +    iXLen %3) + +  ret void +} + +declare void @llvm.riscv.vsuxei.mask.nxv1i32.nxv1i16( +  <vscale x 1 x i32>, +  ptr, +  <vscale x 1 x i16>, +  <vscale x 1 x i1>, +  iXLen); + +define void @intrinsic_vsuxei_mask_v_nxv1i32_nxv1i32_nxv1i16(<vscale x 1 x i32> %0, ptr %1, <vscale x 1 x i16> %2, <vscale x 1 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vsuxei_mask_v_nxv1i32_nxv1i32_nxv1i16: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e32, mf2, ta, ma +; CHECK-NEXT:    vsuxei16.v v8, (a0), v9, v0.t +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsuxei.mask.nxv1i32.nxv1i16( +    <vscale x 1 x i32> %0, +    ptr %1, +    <vscale x 1 x i16> %2, +    <vscale x 1 x i1> %3, +    iXLen %4) + +  ret void +} + +declare void @llvm.riscv.vsuxei.nxv2i32.nxv2i16( +  <vscale x 2 x i32>, +  ptr, +  <vscale x 2 x i16>, +  iXLen); + +define void @intrinsic_vsuxei_v_nxv2i32_nxv2i32_nxv2i16(<vscale x 2 x i32> %0, ptr %1, <vscale x 2 x i16> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vsuxei_v_nxv2i32_nxv2i32_nxv2i16: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e32, m1, ta, ma +; CHECK-NEXT:    vsuxei16.v v8, (a0), v9 +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsuxei.nxv2i32.nxv2i16( +    <vscale x 2 x i32> %0, +    ptr %1, +    <vscale x 2 x i16> %2, +    iXLen %3) + +  ret void +} + +declare void @llvm.riscv.vsuxei.mask.nxv2i32.nxv2i16( +  <vscale x 2 x i32>, +  ptr, +  <vscale x 2 x i16>, +  <vscale x 2 x i1>, +  iXLen); + +define void @intrinsic_vsuxei_mask_v_nxv2i32_nxv2i32_nxv2i16(<vscale x 2 x i32> %0, ptr %1, <vscale x 2 x i16> %2, <vscale x 2 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vsuxei_mask_v_nxv2i32_nxv2i32_nxv2i16: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e32, m1, ta, ma +; CHECK-NEXT:    vsuxei16.v v8, (a0), v9, v0.t +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsuxei.mask.nxv2i32.nxv2i16( +    <vscale x 2 x i32> %0, +    ptr %1, +    <vscale x 2 x i16> %2, +    <vscale x 2 x i1> %3, +    iXLen %4) + +  ret void +} + +declare void @llvm.riscv.vsuxei.nxv4i32.nxv4i16( +  <vscale x 4 x i32>, +  ptr, +  <vscale x 4 x i16>, +  iXLen); + +define void @intrinsic_vsuxei_v_nxv4i32_nxv4i32_nxv4i16(<vscale x 4 x i32> %0, ptr %1, <vscale x 4 x i16> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vsuxei_v_nxv4i32_nxv4i32_nxv4i16: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e32, m2, ta, ma +; CHECK-NEXT:    vsuxei16.v v8, (a0), v10 +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsuxei.nxv4i32.nxv4i16( +    <vscale x 4 x i32> %0, +    ptr %1, +    <vscale x 4 x i16> %2, +    iXLen %3) + +  ret void +} + +declare void @llvm.riscv.vsuxei.mask.nxv4i32.nxv4i16( +  <vscale x 4 x i32>, +  ptr, +  <vscale x 4 x i16>, +  <vscale x 4 x i1>, +  iXLen); + +define void @intrinsic_vsuxei_mask_v_nxv4i32_nxv4i32_nxv4i16(<vscale x 4 x i32> %0, ptr %1, <vscale x 4 x i16> %2, <vscale x 4 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vsuxei_mask_v_nxv4i32_nxv4i32_nxv4i16: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e32, m2, ta, ma +; CHECK-NEXT:    vsuxei16.v v8, (a0), v10, v0.t +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsuxei.mask.nxv4i32.nxv4i16( +    <vscale x 4 x i32> %0, +    ptr %1, +    <vscale x 4 x i16> %2, +    <vscale x 4 x i1> %3, +    iXLen %4) + +  ret void +} + +declare void @llvm.riscv.vsuxei.nxv8i32.nxv8i16( +  <vscale x 8 x i32>, +  ptr, +  <vscale x 8 x i16>, +  iXLen); + +define void @intrinsic_vsuxei_v_nxv8i32_nxv8i32_nxv8i16(<vscale x 8 x i32> %0, ptr %1, <vscale x 8 x i16> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vsuxei_v_nxv8i32_nxv8i32_nxv8i16: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e32, m4, ta, ma +; CHECK-NEXT:    vsuxei16.v v8, (a0), v12 +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsuxei.nxv8i32.nxv8i16( +    <vscale x 8 x i32> %0, +    ptr %1, +    <vscale x 8 x i16> %2, +    iXLen %3) + +  ret void +} + +declare void @llvm.riscv.vsuxei.mask.nxv8i32.nxv8i16( +  <vscale x 8 x i32>, +  ptr, +  <vscale x 8 x i16>, +  <vscale x 8 x i1>, +  iXLen); + +define void @intrinsic_vsuxei_mask_v_nxv8i32_nxv8i32_nxv8i16(<vscale x 8 x i32> %0, ptr %1, <vscale x 8 x i16> %2, <vscale x 8 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vsuxei_mask_v_nxv8i32_nxv8i32_nxv8i16: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e32, m4, ta, ma +; CHECK-NEXT:    vsuxei16.v v8, (a0), v12, v0.t +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsuxei.mask.nxv8i32.nxv8i16( +    <vscale x 8 x i32> %0, +    ptr %1, +    <vscale x 8 x i16> %2, +    <vscale x 8 x i1> %3, +    iXLen %4) + +  ret void +} + +declare void @llvm.riscv.vsuxei.nxv16i32.nxv16i16( +  <vscale x 16 x i32>, +  ptr, +  <vscale x 16 x i16>, +  iXLen); + +define void @intrinsic_vsuxei_v_nxv16i32_nxv16i32_nxv16i16(<vscale x 16 x i32> %0, ptr %1, <vscale x 16 x i16> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vsuxei_v_nxv16i32_nxv16i32_nxv16i16: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e32, m8, ta, ma +; CHECK-NEXT:    vsuxei16.v v8, (a0), v16 +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsuxei.nxv16i32.nxv16i16( +    <vscale x 16 x i32> %0, +    ptr %1, +    <vscale x 16 x i16> %2, +    iXLen %3) + +  ret void +} + +declare void @llvm.riscv.vsuxei.mask.nxv16i32.nxv16i16( +  <vscale x 16 x i32>, +  ptr, +  <vscale x 16 x i16>, +  <vscale x 16 x i1>, +  iXLen); + +define void @intrinsic_vsuxei_mask_v_nxv16i32_nxv16i32_nxv16i16(<vscale x 16 x i32> %0, ptr %1, <vscale x 16 x i16> %2, <vscale x 16 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vsuxei_mask_v_nxv16i32_nxv16i32_nxv16i16: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e32, m8, ta, ma +; CHECK-NEXT:    vsuxei16.v v8, (a0), v16, v0.t +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsuxei.mask.nxv16i32.nxv16i16( +    <vscale x 16 x i32> %0, +    ptr %1, +    <vscale x 16 x i16> %2, +    <vscale x 16 x i1> %3, +    iXLen %4) + +  ret void +} + +declare void @llvm.riscv.vsuxei.nxv1i64.nxv1i16( +  <vscale x 1 x i64>, +  ptr, +  <vscale x 1 x i16>, +  iXLen); + +define void @intrinsic_vsuxei_v_nxv1i64_nxv1i64_nxv1i16(<vscale x 1 x i64> %0, ptr %1, <vscale x 1 x i16> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vsuxei_v_nxv1i64_nxv1i64_nxv1i16: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e64, m1, ta, ma +; CHECK-NEXT:    vsuxei16.v v8, (a0), v9 +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsuxei.nxv1i64.nxv1i16( +    <vscale x 1 x i64> %0, +    ptr %1, +    <vscale x 1 x i16> %2, +    iXLen %3) + +  ret void +} + +declare void @llvm.riscv.vsuxei.mask.nxv1i64.nxv1i16( +  <vscale x 1 x i64>, +  ptr, +  <vscale x 1 x i16>, +  <vscale x 1 x i1>, +  iXLen); + +define void @intrinsic_vsuxei_mask_v_nxv1i64_nxv1i64_nxv1i16(<vscale x 1 x i64> %0, ptr %1, <vscale x 1 x i16> %2, <vscale x 1 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vsuxei_mask_v_nxv1i64_nxv1i64_nxv1i16: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e64, m1, ta, ma +; CHECK-NEXT:    vsuxei16.v v8, (a0), v9, v0.t +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsuxei.mask.nxv1i64.nxv1i16( +    <vscale x 1 x i64> %0, +    ptr %1, +    <vscale x 1 x i16> %2, +    <vscale x 1 x i1> %3, +    iXLen %4) + +  ret void +} + +declare void @llvm.riscv.vsuxei.nxv2i64.nxv2i16( +  <vscale x 2 x i64>, +  ptr, +  <vscale x 2 x i16>, +  iXLen); + +define void @intrinsic_vsuxei_v_nxv2i64_nxv2i64_nxv2i16(<vscale x 2 x i64> %0, ptr %1, <vscale x 2 x i16> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vsuxei_v_nxv2i64_nxv2i64_nxv2i16: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e64, m2, ta, ma +; CHECK-NEXT:    vsuxei16.v v8, (a0), v10 +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsuxei.nxv2i64.nxv2i16( +    <vscale x 2 x i64> %0, +    ptr %1, +    <vscale x 2 x i16> %2, +    iXLen %3) + +  ret void +} + +declare void @llvm.riscv.vsuxei.mask.nxv2i64.nxv2i16( +  <vscale x 2 x i64>, +  ptr, +  <vscale x 2 x i16>, +  <vscale x 2 x i1>, +  iXLen); + +define void @intrinsic_vsuxei_mask_v_nxv2i64_nxv2i64_nxv2i16(<vscale x 2 x i64> %0, ptr %1, <vscale x 2 x i16> %2, <vscale x 2 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vsuxei_mask_v_nxv2i64_nxv2i64_nxv2i16: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e64, m2, ta, ma +; CHECK-NEXT:    vsuxei16.v v8, (a0), v10, v0.t +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsuxei.mask.nxv2i64.nxv2i16( +    <vscale x 2 x i64> %0, +    ptr %1, +    <vscale x 2 x i16> %2, +    <vscale x 2 x i1> %3, +    iXLen %4) + +  ret void +} + +declare void @llvm.riscv.vsuxei.nxv4i64.nxv4i16( +  <vscale x 4 x i64>, +  ptr, +  <vscale x 4 x i16>, +  iXLen); + +define void @intrinsic_vsuxei_v_nxv4i64_nxv4i64_nxv4i16(<vscale x 4 x i64> %0, ptr %1, <vscale x 4 x i16> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vsuxei_v_nxv4i64_nxv4i64_nxv4i16: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e64, m4, ta, ma +; CHECK-NEXT:    vsuxei16.v v8, (a0), v12 +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsuxei.nxv4i64.nxv4i16( +    <vscale x 4 x i64> %0, +    ptr %1, +    <vscale x 4 x i16> %2, +    iXLen %3) + +  ret void +} + +declare void @llvm.riscv.vsuxei.mask.nxv4i64.nxv4i16( +  <vscale x 4 x i64>, +  ptr, +  <vscale x 4 x i16>, +  <vscale x 4 x i1>, +  iXLen); + +define void @intrinsic_vsuxei_mask_v_nxv4i64_nxv4i64_nxv4i16(<vscale x 4 x i64> %0, ptr %1, <vscale x 4 x i16> %2, <vscale x 4 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vsuxei_mask_v_nxv4i64_nxv4i64_nxv4i16: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e64, m4, ta, ma +; CHECK-NEXT:    vsuxei16.v v8, (a0), v12, v0.t +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsuxei.mask.nxv4i64.nxv4i16( +    <vscale x 4 x i64> %0, +    ptr %1, +    <vscale x 4 x i16> %2, +    <vscale x 4 x i1> %3, +    iXLen %4) + +  ret void +} + +declare void @llvm.riscv.vsuxei.nxv8i64.nxv8i16( +  <vscale x 8 x i64>, +  ptr, +  <vscale x 8 x i16>, +  iXLen); + +define void @intrinsic_vsuxei_v_nxv8i64_nxv8i64_nxv8i16(<vscale x 8 x i64> %0, ptr %1, <vscale x 8 x i16> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vsuxei_v_nxv8i64_nxv8i64_nxv8i16: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e64, m8, ta, ma +; CHECK-NEXT:    vsuxei16.v v8, (a0), v16 +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsuxei.nxv8i64.nxv8i16( +    <vscale x 8 x i64> %0, +    ptr %1, +    <vscale x 8 x i16> %2, +    iXLen %3) + +  ret void +} + +declare void @llvm.riscv.vsuxei.mask.nxv8i64.nxv8i16( +  <vscale x 8 x i64>, +  ptr, +  <vscale x 8 x i16>, +  <vscale x 8 x i1>, +  iXLen); + +define void @intrinsic_vsuxei_mask_v_nxv8i64_nxv8i64_nxv8i16(<vscale x 8 x i64> %0, ptr %1, <vscale x 8 x i16> %2, <vscale x 8 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vsuxei_mask_v_nxv8i64_nxv8i64_nxv8i16: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e64, m8, ta, ma +; CHECK-NEXT:    vsuxei16.v v8, (a0), v16, v0.t +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsuxei.mask.nxv8i64.nxv8i16( +    <vscale x 8 x i64> %0, +    ptr %1, +    <vscale x 8 x i16> %2, +    <vscale x 8 x i1> %3, +    iXLen %4) + +  ret void +} + +declare void @llvm.riscv.vsuxei.nxv1f16.nxv1i16( +  <vscale x 1 x half>, +  ptr, +  <vscale x 1 x i16>, +  iXLen); + +define void @intrinsic_vsuxei_v_nxv1f16_nxv1f16_nxv1i16(<vscale x 1 x half> %0, ptr %1, <vscale x 1 x i16> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vsuxei_v_nxv1f16_nxv1f16_nxv1i16: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e16, mf4, ta, ma +; CHECK-NEXT:    vsuxei16.v v8, (a0), v9 +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsuxei.nxv1f16.nxv1i16( +    <vscale x 1 x half> %0, +    ptr %1, +    <vscale x 1 x i16> %2, +    iXLen %3) + +  ret void +} + +declare void @llvm.riscv.vsuxei.mask.nxv1f16.nxv1i16( +  <vscale x 1 x half>, +  ptr, +  <vscale x 1 x i16>, +  <vscale x 1 x i1>, +  iXLen); + +define void @intrinsic_vsuxei_mask_v_nxv1f16_nxv1f16_nxv1i16(<vscale x 1 x half> %0, ptr %1, <vscale x 1 x i16> %2, <vscale x 1 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vsuxei_mask_v_nxv1f16_nxv1f16_nxv1i16: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e16, mf4, ta, ma +; CHECK-NEXT:    vsuxei16.v v8, (a0), v9, v0.t +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsuxei.mask.nxv1f16.nxv1i16( +    <vscale x 1 x half> %0, +    ptr %1, +    <vscale x 1 x i16> %2, +    <vscale x 1 x i1> %3, +    iXLen %4) + +  ret void +} + +declare void @llvm.riscv.vsuxei.nxv2f16.nxv2i16( +  <vscale x 2 x half>, +  ptr, +  <vscale x 2 x i16>, +  iXLen); + +define void @intrinsic_vsuxei_v_nxv2f16_nxv2f16_nxv2i16(<vscale x 2 x half> %0, ptr %1, <vscale x 2 x i16> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vsuxei_v_nxv2f16_nxv2f16_nxv2i16: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e16, mf2, ta, ma +; CHECK-NEXT:    vsuxei16.v v8, (a0), v9 +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsuxei.nxv2f16.nxv2i16( +    <vscale x 2 x half> %0, +    ptr %1, +    <vscale x 2 x i16> %2, +    iXLen %3) + +  ret void +} + +declare void @llvm.riscv.vsuxei.mask.nxv2f16.nxv2i16( +  <vscale x 2 x half>, +  ptr, +  <vscale x 2 x i16>, +  <vscale x 2 x i1>, +  iXLen); + +define void @intrinsic_vsuxei_mask_v_nxv2f16_nxv2f16_nxv2i16(<vscale x 2 x half> %0, ptr %1, <vscale x 2 x i16> %2, <vscale x 2 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vsuxei_mask_v_nxv2f16_nxv2f16_nxv2i16: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e16, mf2, ta, ma +; CHECK-NEXT:    vsuxei16.v v8, (a0), v9, v0.t +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsuxei.mask.nxv2f16.nxv2i16( +    <vscale x 2 x half> %0, +    ptr %1, +    <vscale x 2 x i16> %2, +    <vscale x 2 x i1> %3, +    iXLen %4) + +  ret void +} + +declare void @llvm.riscv.vsuxei.nxv4f16.nxv4i16( +  <vscale x 4 x half>, +  ptr, +  <vscale x 4 x i16>, +  iXLen); + +define void @intrinsic_vsuxei_v_nxv4f16_nxv4f16_nxv4i16(<vscale x 4 x half> %0, ptr %1, <vscale x 4 x i16> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vsuxei_v_nxv4f16_nxv4f16_nxv4i16: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e16, m1, ta, ma +; CHECK-NEXT:    vsuxei16.v v8, (a0), v9 +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsuxei.nxv4f16.nxv4i16( +    <vscale x 4 x half> %0, +    ptr %1, +    <vscale x 4 x i16> %2, +    iXLen %3) + +  ret void +} + +declare void @llvm.riscv.vsuxei.mask.nxv4f16.nxv4i16( +  <vscale x 4 x half>, +  ptr, +  <vscale x 4 x i16>, +  <vscale x 4 x i1>, +  iXLen); + +define void @intrinsic_vsuxei_mask_v_nxv4f16_nxv4f16_nxv4i16(<vscale x 4 x half> %0, ptr %1, <vscale x 4 x i16> %2, <vscale x 4 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vsuxei_mask_v_nxv4f16_nxv4f16_nxv4i16: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e16, m1, ta, ma +; CHECK-NEXT:    vsuxei16.v v8, (a0), v9, v0.t +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsuxei.mask.nxv4f16.nxv4i16( +    <vscale x 4 x half> %0, +    ptr %1, +    <vscale x 4 x i16> %2, +    <vscale x 4 x i1> %3, +    iXLen %4) + +  ret void +} + +declare void @llvm.riscv.vsuxei.nxv8f16.nxv8i16( +  <vscale x 8 x half>, +  ptr, +  <vscale x 8 x i16>, +  iXLen); + +define void @intrinsic_vsuxei_v_nxv8f16_nxv8f16_nxv8i16(<vscale x 8 x half> %0, ptr %1, <vscale x 8 x i16> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vsuxei_v_nxv8f16_nxv8f16_nxv8i16: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e16, m2, ta, ma +; CHECK-NEXT:    vsuxei16.v v8, (a0), v10 +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsuxei.nxv8f16.nxv8i16( +    <vscale x 8 x half> %0, +    ptr %1, +    <vscale x 8 x i16> %2, +    iXLen %3) + +  ret void +} + +declare void @llvm.riscv.vsuxei.mask.nxv8f16.nxv8i16( +  <vscale x 8 x half>, +  ptr, +  <vscale x 8 x i16>, +  <vscale x 8 x i1>, +  iXLen); + +define void @intrinsic_vsuxei_mask_v_nxv8f16_nxv8f16_nxv8i16(<vscale x 8 x half> %0, ptr %1, <vscale x 8 x i16> %2, <vscale x 8 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vsuxei_mask_v_nxv8f16_nxv8f16_nxv8i16: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e16, m2, ta, ma +; CHECK-NEXT:    vsuxei16.v v8, (a0), v10, v0.t +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsuxei.mask.nxv8f16.nxv8i16( +    <vscale x 8 x half> %0, +    ptr %1, +    <vscale x 8 x i16> %2, +    <vscale x 8 x i1> %3, +    iXLen %4) + +  ret void +} + +declare void @llvm.riscv.vsuxei.nxv16f16.nxv16i16( +  <vscale x 16 x half>, +  ptr, +  <vscale x 16 x i16>, +  iXLen); + +define void @intrinsic_vsuxei_v_nxv16f16_nxv16f16_nxv16i16(<vscale x 16 x half> %0, ptr %1, <vscale x 16 x i16> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vsuxei_v_nxv16f16_nxv16f16_nxv16i16: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e16, m4, ta, ma +; CHECK-NEXT:    vsuxei16.v v8, (a0), v12 +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsuxei.nxv16f16.nxv16i16( +    <vscale x 16 x half> %0, +    ptr %1, +    <vscale x 16 x i16> %2, +    iXLen %3) + +  ret void +} + +declare void @llvm.riscv.vsuxei.mask.nxv16f16.nxv16i16( +  <vscale x 16 x half>, +  ptr, +  <vscale x 16 x i16>, +  <vscale x 16 x i1>, +  iXLen); + +define void @intrinsic_vsuxei_mask_v_nxv16f16_nxv16f16_nxv16i16(<vscale x 16 x half> %0, ptr %1, <vscale x 16 x i16> %2, <vscale x 16 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vsuxei_mask_v_nxv16f16_nxv16f16_nxv16i16: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e16, m4, ta, ma +; CHECK-NEXT:    vsuxei16.v v8, (a0), v12, v0.t +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsuxei.mask.nxv16f16.nxv16i16( +    <vscale x 16 x half> %0, +    ptr %1, +    <vscale x 16 x i16> %2, +    <vscale x 16 x i1> %3, +    iXLen %4) + +  ret void +} + +declare void @llvm.riscv.vsuxei.nxv32f16.nxv32i16( +  <vscale x 32 x half>, +  ptr, +  <vscale x 32 x i16>, +  iXLen); + +define void @intrinsic_vsuxei_v_nxv32f16_nxv32f16_nxv32i16(<vscale x 32 x half> %0, ptr %1, <vscale x 32 x i16> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vsuxei_v_nxv32f16_nxv32f16_nxv32i16: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e16, m8, ta, ma +; CHECK-NEXT:    vsuxei16.v v8, (a0), v16 +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsuxei.nxv32f16.nxv32i16( +    <vscale x 32 x half> %0, +    ptr %1, +    <vscale x 32 x i16> %2, +    iXLen %3) + +  ret void +} + +declare void @llvm.riscv.vsuxei.mask.nxv32f16.nxv32i16( +  <vscale x 32 x half>, +  ptr, +  <vscale x 32 x i16>, +  <vscale x 32 x i1>, +  iXLen); + +define void @intrinsic_vsuxei_mask_v_nxv32f16_nxv32f16_nxv32i16(<vscale x 32 x half> %0, ptr %1, <vscale x 32 x i16> %2, <vscale x 32 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vsuxei_mask_v_nxv32f16_nxv32f16_nxv32i16: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e16, m8, ta, ma +; CHECK-NEXT:    vsuxei16.v v8, (a0), v16, v0.t +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsuxei.mask.nxv32f16.nxv32i16( +    <vscale x 32 x half> %0, +    ptr %1, +    <vscale x 32 x i16> %2, +    <vscale x 32 x i1> %3, +    iXLen %4) + +  ret void +} + +declare void @llvm.riscv.vsuxei.nxv1f32.nxv1i16( +  <vscale x 1 x float>, +  ptr, +  <vscale x 1 x i16>, +  iXLen); + +define void @intrinsic_vsuxei_v_nxv1f32_nxv1f32_nxv1i16(<vscale x 1 x float> %0, ptr %1, <vscale x 1 x i16> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vsuxei_v_nxv1f32_nxv1f32_nxv1i16: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e32, mf2, ta, ma +; CHECK-NEXT:    vsuxei16.v v8, (a0), v9 +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsuxei.nxv1f32.nxv1i16( +    <vscale x 1 x float> %0, +    ptr %1, +    <vscale x 1 x i16> %2, +    iXLen %3) + +  ret void +} + +declare void @llvm.riscv.vsuxei.mask.nxv1f32.nxv1i16( +  <vscale x 1 x float>, +  ptr, +  <vscale x 1 x i16>, +  <vscale x 1 x i1>, +  iXLen); + +define void @intrinsic_vsuxei_mask_v_nxv1f32_nxv1f32_nxv1i16(<vscale x 1 x float> %0, ptr %1, <vscale x 1 x i16> %2, <vscale x 1 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vsuxei_mask_v_nxv1f32_nxv1f32_nxv1i16: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e32, mf2, ta, ma +; CHECK-NEXT:    vsuxei16.v v8, (a0), v9, v0.t +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsuxei.mask.nxv1f32.nxv1i16( +    <vscale x 1 x float> %0, +    ptr %1, +    <vscale x 1 x i16> %2, +    <vscale x 1 x i1> %3, +    iXLen %4) + +  ret void +} + +declare void @llvm.riscv.vsuxei.nxv2f32.nxv2i16( +  <vscale x 2 x float>, +  ptr, +  <vscale x 2 x i16>, +  iXLen); + +define void @intrinsic_vsuxei_v_nxv2f32_nxv2f32_nxv2i16(<vscale x 2 x float> %0, ptr %1, <vscale x 2 x i16> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vsuxei_v_nxv2f32_nxv2f32_nxv2i16: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e32, m1, ta, ma +; CHECK-NEXT:    vsuxei16.v v8, (a0), v9 +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsuxei.nxv2f32.nxv2i16( +    <vscale x 2 x float> %0, +    ptr %1, +    <vscale x 2 x i16> %2, +    iXLen %3) + +  ret void +} + +declare void @llvm.riscv.vsuxei.mask.nxv2f32.nxv2i16( +  <vscale x 2 x float>, +  ptr, +  <vscale x 2 x i16>, +  <vscale x 2 x i1>, +  iXLen); + +define void @intrinsic_vsuxei_mask_v_nxv2f32_nxv2f32_nxv2i16(<vscale x 2 x float> %0, ptr %1, <vscale x 2 x i16> %2, <vscale x 2 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vsuxei_mask_v_nxv2f32_nxv2f32_nxv2i16: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e32, m1, ta, ma +; CHECK-NEXT:    vsuxei16.v v8, (a0), v9, v0.t +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsuxei.mask.nxv2f32.nxv2i16( +    <vscale x 2 x float> %0, +    ptr %1, +    <vscale x 2 x i16> %2, +    <vscale x 2 x i1> %3, +    iXLen %4) + +  ret void +} + +declare void @llvm.riscv.vsuxei.nxv4f32.nxv4i16( +  <vscale x 4 x float>, +  ptr, +  <vscale x 4 x i16>, +  iXLen); + +define void @intrinsic_vsuxei_v_nxv4f32_nxv4f32_nxv4i16(<vscale x 4 x float> %0, ptr %1, <vscale x 4 x i16> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vsuxei_v_nxv4f32_nxv4f32_nxv4i16: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e32, m2, ta, ma +; CHECK-NEXT:    vsuxei16.v v8, (a0), v10 +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsuxei.nxv4f32.nxv4i16( +    <vscale x 4 x float> %0, +    ptr %1, +    <vscale x 4 x i16> %2, +    iXLen %3) + +  ret void +} + +declare void @llvm.riscv.vsuxei.mask.nxv4f32.nxv4i16( +  <vscale x 4 x float>, +  ptr, +  <vscale x 4 x i16>, +  <vscale x 4 x i1>, +  iXLen); + +define void @intrinsic_vsuxei_mask_v_nxv4f32_nxv4f32_nxv4i16(<vscale x 4 x float> %0, ptr %1, <vscale x 4 x i16> %2, <vscale x 4 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vsuxei_mask_v_nxv4f32_nxv4f32_nxv4i16: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e32, m2, ta, ma +; CHECK-NEXT:    vsuxei16.v v8, (a0), v10, v0.t +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsuxei.mask.nxv4f32.nxv4i16( +    <vscale x 4 x float> %0, +    ptr %1, +    <vscale x 4 x i16> %2, +    <vscale x 4 x i1> %3, +    iXLen %4) + +  ret void +} + +declare void @llvm.riscv.vsuxei.nxv8f32.nxv8i16( +  <vscale x 8 x float>, +  ptr, +  <vscale x 8 x i16>, +  iXLen); + +define void @intrinsic_vsuxei_v_nxv8f32_nxv8f32_nxv8i16(<vscale x 8 x float> %0, ptr %1, <vscale x 8 x i16> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vsuxei_v_nxv8f32_nxv8f32_nxv8i16: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e32, m4, ta, ma +; CHECK-NEXT:    vsuxei16.v v8, (a0), v12 +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsuxei.nxv8f32.nxv8i16( +    <vscale x 8 x float> %0, +    ptr %1, +    <vscale x 8 x i16> %2, +    iXLen %3) + +  ret void +} + +declare void @llvm.riscv.vsuxei.mask.nxv8f32.nxv8i16( +  <vscale x 8 x float>, +  ptr, +  <vscale x 8 x i16>, +  <vscale x 8 x i1>, +  iXLen); + +define void @intrinsic_vsuxei_mask_v_nxv8f32_nxv8f32_nxv8i16(<vscale x 8 x float> %0, ptr %1, <vscale x 8 x i16> %2, <vscale x 8 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vsuxei_mask_v_nxv8f32_nxv8f32_nxv8i16: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e32, m4, ta, ma +; CHECK-NEXT:    vsuxei16.v v8, (a0), v12, v0.t +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsuxei.mask.nxv8f32.nxv8i16( +    <vscale x 8 x float> %0, +    ptr %1, +    <vscale x 8 x i16> %2, +    <vscale x 8 x i1> %3, +    iXLen %4) + +  ret void +} + +declare void @llvm.riscv.vsuxei.nxv16f32.nxv16i16( +  <vscale x 16 x float>, +  ptr, +  <vscale x 16 x i16>, +  iXLen); + +define void @intrinsic_vsuxei_v_nxv16f32_nxv16f32_nxv16i16(<vscale x 16 x float> %0, ptr %1, <vscale x 16 x i16> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vsuxei_v_nxv16f32_nxv16f32_nxv16i16: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e32, m8, ta, ma +; CHECK-NEXT:    vsuxei16.v v8, (a0), v16 +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsuxei.nxv16f32.nxv16i16( +    <vscale x 16 x float> %0, +    ptr %1, +    <vscale x 16 x i16> %2, +    iXLen %3) + +  ret void +} + +declare void @llvm.riscv.vsuxei.mask.nxv16f32.nxv16i16( +  <vscale x 16 x float>, +  ptr, +  <vscale x 16 x i16>, +  <vscale x 16 x i1>, +  iXLen); + +define void @intrinsic_vsuxei_mask_v_nxv16f32_nxv16f32_nxv16i16(<vscale x 16 x float> %0, ptr %1, <vscale x 16 x i16> %2, <vscale x 16 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vsuxei_mask_v_nxv16f32_nxv16f32_nxv16i16: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e32, m8, ta, ma +; CHECK-NEXT:    vsuxei16.v v8, (a0), v16, v0.t +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsuxei.mask.nxv16f32.nxv16i16( +    <vscale x 16 x float> %0, +    ptr %1, +    <vscale x 16 x i16> %2, +    <vscale x 16 x i1> %3, +    iXLen %4) + +  ret void +} + +declare void @llvm.riscv.vsuxei.nxv1f64.nxv1i16( +  <vscale x 1 x double>, +  ptr, +  <vscale x 1 x i16>, +  iXLen); + +define void @intrinsic_vsuxei_v_nxv1f64_nxv1f64_nxv1i16(<vscale x 1 x double> %0, ptr %1, <vscale x 1 x i16> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vsuxei_v_nxv1f64_nxv1f64_nxv1i16: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e64, m1, ta, ma +; CHECK-NEXT:    vsuxei16.v v8, (a0), v9 +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsuxei.nxv1f64.nxv1i16( +    <vscale x 1 x double> %0, +    ptr %1, +    <vscale x 1 x i16> %2, +    iXLen %3) + +  ret void +} + +declare void @llvm.riscv.vsuxei.mask.nxv1f64.nxv1i16( +  <vscale x 1 x double>, +  ptr, +  <vscale x 1 x i16>, +  <vscale x 1 x i1>, +  iXLen); + +define void @intrinsic_vsuxei_mask_v_nxv1f64_nxv1f64_nxv1i16(<vscale x 1 x double> %0, ptr %1, <vscale x 1 x i16> %2, <vscale x 1 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vsuxei_mask_v_nxv1f64_nxv1f64_nxv1i16: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e64, m1, ta, ma +; CHECK-NEXT:    vsuxei16.v v8, (a0), v9, v0.t +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsuxei.mask.nxv1f64.nxv1i16( +    <vscale x 1 x double> %0, +    ptr %1, +    <vscale x 1 x i16> %2, +    <vscale x 1 x i1> %3, +    iXLen %4) + +  ret void +} + +declare void @llvm.riscv.vsuxei.nxv2f64.nxv2i16( +  <vscale x 2 x double>, +  ptr, +  <vscale x 2 x i16>, +  iXLen); + +define void @intrinsic_vsuxei_v_nxv2f64_nxv2f64_nxv2i16(<vscale x 2 x double> %0, ptr %1, <vscale x 2 x i16> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vsuxei_v_nxv2f64_nxv2f64_nxv2i16: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e64, m2, ta, ma +; CHECK-NEXT:    vsuxei16.v v8, (a0), v10 +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsuxei.nxv2f64.nxv2i16( +    <vscale x 2 x double> %0, +    ptr %1, +    <vscale x 2 x i16> %2, +    iXLen %3) + +  ret void +} + +declare void @llvm.riscv.vsuxei.mask.nxv2f64.nxv2i16( +  <vscale x 2 x double>, +  ptr, +  <vscale x 2 x i16>, +  <vscale x 2 x i1>, +  iXLen); + +define void @intrinsic_vsuxei_mask_v_nxv2f64_nxv2f64_nxv2i16(<vscale x 2 x double> %0, ptr %1, <vscale x 2 x i16> %2, <vscale x 2 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vsuxei_mask_v_nxv2f64_nxv2f64_nxv2i16: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e64, m2, ta, ma +; CHECK-NEXT:    vsuxei16.v v8, (a0), v10, v0.t +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsuxei.mask.nxv2f64.nxv2i16( +    <vscale x 2 x double> %0, +    ptr %1, +    <vscale x 2 x i16> %2, +    <vscale x 2 x i1> %3, +    iXLen %4) + +  ret void +} + +declare void @llvm.riscv.vsuxei.nxv4f64.nxv4i16( +  <vscale x 4 x double>, +  ptr, +  <vscale x 4 x i16>, +  iXLen); + +define void @intrinsic_vsuxei_v_nxv4f64_nxv4f64_nxv4i16(<vscale x 4 x double> %0, ptr %1, <vscale x 4 x i16> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vsuxei_v_nxv4f64_nxv4f64_nxv4i16: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e64, m4, ta, ma +; CHECK-NEXT:    vsuxei16.v v8, (a0), v12 +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsuxei.nxv4f64.nxv4i16( +    <vscale x 4 x double> %0, +    ptr %1, +    <vscale x 4 x i16> %2, +    iXLen %3) + +  ret void +} + +declare void @llvm.riscv.vsuxei.mask.nxv4f64.nxv4i16( +  <vscale x 4 x double>, +  ptr, +  <vscale x 4 x i16>, +  <vscale x 4 x i1>, +  iXLen); + +define void @intrinsic_vsuxei_mask_v_nxv4f64_nxv4f64_nxv4i16(<vscale x 4 x double> %0, ptr %1, <vscale x 4 x i16> %2, <vscale x 4 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vsuxei_mask_v_nxv4f64_nxv4f64_nxv4i16: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e64, m4, ta, ma +; CHECK-NEXT:    vsuxei16.v v8, (a0), v12, v0.t +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsuxei.mask.nxv4f64.nxv4i16( +    <vscale x 4 x double> %0, +    ptr %1, +    <vscale x 4 x i16> %2, +    <vscale x 4 x i1> %3, +    iXLen %4) + +  ret void +} + +declare void @llvm.riscv.vsuxei.nxv8f64.nxv8i16( +  <vscale x 8 x double>, +  ptr, +  <vscale x 8 x i16>, +  iXLen); + +define void @intrinsic_vsuxei_v_nxv8f64_nxv8f64_nxv8i16(<vscale x 8 x double> %0, ptr %1, <vscale x 8 x i16> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vsuxei_v_nxv8f64_nxv8f64_nxv8i16: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e64, m8, ta, ma +; CHECK-NEXT:    vsuxei16.v v8, (a0), v16 +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsuxei.nxv8f64.nxv8i16( +    <vscale x 8 x double> %0, +    ptr %1, +    <vscale x 8 x i16> %2, +    iXLen %3) + +  ret void +} + +declare void @llvm.riscv.vsuxei.mask.nxv8f64.nxv8i16( +  <vscale x 8 x double>, +  ptr, +  <vscale x 8 x i16>, +  <vscale x 8 x i1>, +  iXLen); + +define void @intrinsic_vsuxei_mask_v_nxv8f64_nxv8f64_nxv8i16(<vscale x 8 x double> %0, ptr %1, <vscale x 8 x i16> %2, <vscale x 8 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vsuxei_mask_v_nxv8f64_nxv8f64_nxv8i16: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e64, m8, ta, ma +; CHECK-NEXT:    vsuxei16.v v8, (a0), v16, v0.t +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsuxei.mask.nxv8f64.nxv8i16( +    <vscale x 8 x double> %0, +    ptr %1, +    <vscale x 8 x i16> %2, +    <vscale x 8 x i1> %3, +    iXLen %4) + +  ret void +} + +declare void @llvm.riscv.vsuxei.nxv1i8.nxv1i8( +  <vscale x 1 x i8>, +  ptr, +  <vscale x 1 x i8>, +  iXLen); + +define void @intrinsic_vsuxei_v_nxv1i8_nxv1i8_nxv1i8(<vscale x 1 x i8> %0, ptr %1, <vscale x 1 x i8> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vsuxei_v_nxv1i8_nxv1i8_nxv1i8: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e8, mf8, ta, ma +; CHECK-NEXT:    vsuxei8.v v8, (a0), v9 +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsuxei.nxv1i8.nxv1i8( +    <vscale x 1 x i8> %0, +    ptr %1, +    <vscale x 1 x i8> %2, +    iXLen %3) + +  ret void +} + +declare void @llvm.riscv.vsuxei.mask.nxv1i8.nxv1i8( +  <vscale x 1 x i8>, +  ptr, +  <vscale x 1 x i8>, +  <vscale x 1 x i1>, +  iXLen); + +define void @intrinsic_vsuxei_mask_v_nxv1i8_nxv1i8_nxv1i8(<vscale x 1 x i8> %0, ptr %1, <vscale x 1 x i8> %2, <vscale x 1 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vsuxei_mask_v_nxv1i8_nxv1i8_nxv1i8: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e8, mf8, ta, ma +; CHECK-NEXT:    vsuxei8.v v8, (a0), v9, v0.t +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsuxei.mask.nxv1i8.nxv1i8( +    <vscale x 1 x i8> %0, +    ptr %1, +    <vscale x 1 x i8> %2, +    <vscale x 1 x i1> %3, +    iXLen %4) + +  ret void +} + +declare void @llvm.riscv.vsuxei.nxv2i8.nxv2i8( +  <vscale x 2 x i8>, +  ptr, +  <vscale x 2 x i8>, +  iXLen); + +define void @intrinsic_vsuxei_v_nxv2i8_nxv2i8_nxv2i8(<vscale x 2 x i8> %0, ptr %1, <vscale x 2 x i8> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vsuxei_v_nxv2i8_nxv2i8_nxv2i8: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e8, mf4, ta, ma +; CHECK-NEXT:    vsuxei8.v v8, (a0), v9 +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsuxei.nxv2i8.nxv2i8( +    <vscale x 2 x i8> %0, +    ptr %1, +    <vscale x 2 x i8> %2, +    iXLen %3) + +  ret void +} + +declare void @llvm.riscv.vsuxei.mask.nxv2i8.nxv2i8( +  <vscale x 2 x i8>, +  ptr, +  <vscale x 2 x i8>, +  <vscale x 2 x i1>, +  iXLen); + +define void @intrinsic_vsuxei_mask_v_nxv2i8_nxv2i8_nxv2i8(<vscale x 2 x i8> %0, ptr %1, <vscale x 2 x i8> %2, <vscale x 2 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vsuxei_mask_v_nxv2i8_nxv2i8_nxv2i8: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e8, mf4, ta, ma +; CHECK-NEXT:    vsuxei8.v v8, (a0), v9, v0.t +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsuxei.mask.nxv2i8.nxv2i8( +    <vscale x 2 x i8> %0, +    ptr %1, +    <vscale x 2 x i8> %2, +    <vscale x 2 x i1> %3, +    iXLen %4) + +  ret void +} + +declare void @llvm.riscv.vsuxei.nxv4i8.nxv4i8( +  <vscale x 4 x i8>, +  ptr, +  <vscale x 4 x i8>, +  iXLen); + +define void @intrinsic_vsuxei_v_nxv4i8_nxv4i8_nxv4i8(<vscale x 4 x i8> %0, ptr %1, <vscale x 4 x i8> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vsuxei_v_nxv4i8_nxv4i8_nxv4i8: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e8, mf2, ta, ma +; CHECK-NEXT:    vsuxei8.v v8, (a0), v9 +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsuxei.nxv4i8.nxv4i8( +    <vscale x 4 x i8> %0, +    ptr %1, +    <vscale x 4 x i8> %2, +    iXLen %3) + +  ret void +} + +declare void @llvm.riscv.vsuxei.mask.nxv4i8.nxv4i8( +  <vscale x 4 x i8>, +  ptr, +  <vscale x 4 x i8>, +  <vscale x 4 x i1>, +  iXLen); + +define void @intrinsic_vsuxei_mask_v_nxv4i8_nxv4i8_nxv4i8(<vscale x 4 x i8> %0, ptr %1, <vscale x 4 x i8> %2, <vscale x 4 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vsuxei_mask_v_nxv4i8_nxv4i8_nxv4i8: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e8, mf2, ta, ma +; CHECK-NEXT:    vsuxei8.v v8, (a0), v9, v0.t +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsuxei.mask.nxv4i8.nxv4i8( +    <vscale x 4 x i8> %0, +    ptr %1, +    <vscale x 4 x i8> %2, +    <vscale x 4 x i1> %3, +    iXLen %4) + +  ret void +} + +declare void @llvm.riscv.vsuxei.nxv8i8.nxv8i8( +  <vscale x 8 x i8>, +  ptr, +  <vscale x 8 x i8>, +  iXLen); + +define void @intrinsic_vsuxei_v_nxv8i8_nxv8i8_nxv8i8(<vscale x 8 x i8> %0, ptr %1, <vscale x 8 x i8> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vsuxei_v_nxv8i8_nxv8i8_nxv8i8: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e8, m1, ta, ma +; CHECK-NEXT:    vsuxei8.v v8, (a0), v9 +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsuxei.nxv8i8.nxv8i8( +    <vscale x 8 x i8> %0, +    ptr %1, +    <vscale x 8 x i8> %2, +    iXLen %3) + +  ret void +} + +declare void @llvm.riscv.vsuxei.mask.nxv8i8.nxv8i8( +  <vscale x 8 x i8>, +  ptr, +  <vscale x 8 x i8>, +  <vscale x 8 x i1>, +  iXLen); + +define void @intrinsic_vsuxei_mask_v_nxv8i8_nxv8i8_nxv8i8(<vscale x 8 x i8> %0, ptr %1, <vscale x 8 x i8> %2, <vscale x 8 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vsuxei_mask_v_nxv8i8_nxv8i8_nxv8i8: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e8, m1, ta, ma +; CHECK-NEXT:    vsuxei8.v v8, (a0), v9, v0.t +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsuxei.mask.nxv8i8.nxv8i8( +    <vscale x 8 x i8> %0, +    ptr %1, +    <vscale x 8 x i8> %2, +    <vscale x 8 x i1> %3, +    iXLen %4) + +  ret void +} + +declare void @llvm.riscv.vsuxei.nxv16i8.nxv16i8( +  <vscale x 16 x i8>, +  ptr, +  <vscale x 16 x i8>, +  iXLen); + +define void @intrinsic_vsuxei_v_nxv16i8_nxv16i8_nxv16i8(<vscale x 16 x i8> %0, ptr %1, <vscale x 16 x i8> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vsuxei_v_nxv16i8_nxv16i8_nxv16i8: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e8, m2, ta, ma +; CHECK-NEXT:    vsuxei8.v v8, (a0), v10 +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsuxei.nxv16i8.nxv16i8( +    <vscale x 16 x i8> %0, +    ptr %1, +    <vscale x 16 x i8> %2, +    iXLen %3) + +  ret void +} + +declare void @llvm.riscv.vsuxei.mask.nxv16i8.nxv16i8( +  <vscale x 16 x i8>, +  ptr, +  <vscale x 16 x i8>, +  <vscale x 16 x i1>, +  iXLen); + +define void @intrinsic_vsuxei_mask_v_nxv16i8_nxv16i8_nxv16i8(<vscale x 16 x i8> %0, ptr %1, <vscale x 16 x i8> %2, <vscale x 16 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vsuxei_mask_v_nxv16i8_nxv16i8_nxv16i8: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e8, m2, ta, ma +; CHECK-NEXT:    vsuxei8.v v8, (a0), v10, v0.t +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsuxei.mask.nxv16i8.nxv16i8( +    <vscale x 16 x i8> %0, +    ptr %1, +    <vscale x 16 x i8> %2, +    <vscale x 16 x i1> %3, +    iXLen %4) + +  ret void +} + +declare void @llvm.riscv.vsuxei.nxv32i8.nxv32i8( +  <vscale x 32 x i8>, +  ptr, +  <vscale x 32 x i8>, +  iXLen); + +define void @intrinsic_vsuxei_v_nxv32i8_nxv32i8_nxv32i8(<vscale x 32 x i8> %0, ptr %1, <vscale x 32 x i8> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vsuxei_v_nxv32i8_nxv32i8_nxv32i8: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e8, m4, ta, ma +; CHECK-NEXT:    vsuxei8.v v8, (a0), v12 +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsuxei.nxv32i8.nxv32i8( +    <vscale x 32 x i8> %0, +    ptr %1, +    <vscale x 32 x i8> %2, +    iXLen %3) + +  ret void +} + +declare void @llvm.riscv.vsuxei.mask.nxv32i8.nxv32i8( +  <vscale x 32 x i8>, +  ptr, +  <vscale x 32 x i8>, +  <vscale x 32 x i1>, +  iXLen); + +define void @intrinsic_vsuxei_mask_v_nxv32i8_nxv32i8_nxv32i8(<vscale x 32 x i8> %0, ptr %1, <vscale x 32 x i8> %2, <vscale x 32 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vsuxei_mask_v_nxv32i8_nxv32i8_nxv32i8: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e8, m4, ta, ma +; CHECK-NEXT:    vsuxei8.v v8, (a0), v12, v0.t +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsuxei.mask.nxv32i8.nxv32i8( +    <vscale x 32 x i8> %0, +    ptr %1, +    <vscale x 32 x i8> %2, +    <vscale x 32 x i1> %3, +    iXLen %4) + +  ret void +} + +declare void @llvm.riscv.vsuxei.nxv64i8.nxv64i8( +  <vscale x 64 x i8>, +  ptr, +  <vscale x 64 x i8>, +  iXLen); + +define void @intrinsic_vsuxei_v_nxv64i8_nxv64i8_nxv64i8(<vscale x 64 x i8> %0, ptr %1, <vscale x 64 x i8> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vsuxei_v_nxv64i8_nxv64i8_nxv64i8: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e8, m8, ta, ma +; CHECK-NEXT:    vsuxei8.v v8, (a0), v16 +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsuxei.nxv64i8.nxv64i8( +    <vscale x 64 x i8> %0, +    ptr %1, +    <vscale x 64 x i8> %2, +    iXLen %3) + +  ret void +} + +declare void @llvm.riscv.vsuxei.mask.nxv64i8.nxv64i8( +  <vscale x 64 x i8>, +  ptr, +  <vscale x 64 x i8>, +  <vscale x 64 x i1>, +  iXLen); + +define void @intrinsic_vsuxei_mask_v_nxv64i8_nxv64i8_nxv64i8(<vscale x 64 x i8> %0, ptr %1, <vscale x 64 x i8> %2, <vscale x 64 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vsuxei_mask_v_nxv64i8_nxv64i8_nxv64i8: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e8, m8, ta, ma +; CHECK-NEXT:    vsuxei8.v v8, (a0), v16, v0.t +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsuxei.mask.nxv64i8.nxv64i8( +    <vscale x 64 x i8> %0, +    ptr %1, +    <vscale x 64 x i8> %2, +    <vscale x 64 x i1> %3, +    iXLen %4) + +  ret void +} + +declare void @llvm.riscv.vsuxei.nxv1i16.nxv1i8( +  <vscale x 1 x i16>, +  ptr, +  <vscale x 1 x i8>, +  iXLen); + +define void @intrinsic_vsuxei_v_nxv1i16_nxv1i16_nxv1i8(<vscale x 1 x i16> %0, ptr %1, <vscale x 1 x i8> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vsuxei_v_nxv1i16_nxv1i16_nxv1i8: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e16, mf4, ta, ma +; CHECK-NEXT:    vsuxei8.v v8, (a0), v9 +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsuxei.nxv1i16.nxv1i8( +    <vscale x 1 x i16> %0, +    ptr %1, +    <vscale x 1 x i8> %2, +    iXLen %3) + +  ret void +} + +declare void @llvm.riscv.vsuxei.mask.nxv1i16.nxv1i8( +  <vscale x 1 x i16>, +  ptr, +  <vscale x 1 x i8>, +  <vscale x 1 x i1>, +  iXLen); + +define void @intrinsic_vsuxei_mask_v_nxv1i16_nxv1i16_nxv1i8(<vscale x 1 x i16> %0, ptr %1, <vscale x 1 x i8> %2, <vscale x 1 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vsuxei_mask_v_nxv1i16_nxv1i16_nxv1i8: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e16, mf4, ta, ma +; CHECK-NEXT:    vsuxei8.v v8, (a0), v9, v0.t +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsuxei.mask.nxv1i16.nxv1i8( +    <vscale x 1 x i16> %0, +    ptr %1, +    <vscale x 1 x i8> %2, +    <vscale x 1 x i1> %3, +    iXLen %4) + +  ret void +} + +declare void @llvm.riscv.vsuxei.nxv2i16.nxv2i8( +  <vscale x 2 x i16>, +  ptr, +  <vscale x 2 x i8>, +  iXLen); + +define void @intrinsic_vsuxei_v_nxv2i16_nxv2i16_nxv2i8(<vscale x 2 x i16> %0, ptr %1, <vscale x 2 x i8> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vsuxei_v_nxv2i16_nxv2i16_nxv2i8: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e16, mf2, ta, ma +; CHECK-NEXT:    vsuxei8.v v8, (a0), v9 +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsuxei.nxv2i16.nxv2i8( +    <vscale x 2 x i16> %0, +    ptr %1, +    <vscale x 2 x i8> %2, +    iXLen %3) + +  ret void +} + +declare void @llvm.riscv.vsuxei.mask.nxv2i16.nxv2i8( +  <vscale x 2 x i16>, +  ptr, +  <vscale x 2 x i8>, +  <vscale x 2 x i1>, +  iXLen); + +define void @intrinsic_vsuxei_mask_v_nxv2i16_nxv2i16_nxv2i8(<vscale x 2 x i16> %0, ptr %1, <vscale x 2 x i8> %2, <vscale x 2 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vsuxei_mask_v_nxv2i16_nxv2i16_nxv2i8: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e16, mf2, ta, ma +; CHECK-NEXT:    vsuxei8.v v8, (a0), v9, v0.t +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsuxei.mask.nxv2i16.nxv2i8( +    <vscale x 2 x i16> %0, +    ptr %1, +    <vscale x 2 x i8> %2, +    <vscale x 2 x i1> %3, +    iXLen %4) + +  ret void +} + +declare void @llvm.riscv.vsuxei.nxv4i16.nxv4i8( +  <vscale x 4 x i16>, +  ptr, +  <vscale x 4 x i8>, +  iXLen); + +define void @intrinsic_vsuxei_v_nxv4i16_nxv4i16_nxv4i8(<vscale x 4 x i16> %0, ptr %1, <vscale x 4 x i8> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vsuxei_v_nxv4i16_nxv4i16_nxv4i8: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e16, m1, ta, ma +; CHECK-NEXT:    vsuxei8.v v8, (a0), v9 +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsuxei.nxv4i16.nxv4i8( +    <vscale x 4 x i16> %0, +    ptr %1, +    <vscale x 4 x i8> %2, +    iXLen %3) + +  ret void +} + +declare void @llvm.riscv.vsuxei.mask.nxv4i16.nxv4i8( +  <vscale x 4 x i16>, +  ptr, +  <vscale x 4 x i8>, +  <vscale x 4 x i1>, +  iXLen); + +define void @intrinsic_vsuxei_mask_v_nxv4i16_nxv4i16_nxv4i8(<vscale x 4 x i16> %0, ptr %1, <vscale x 4 x i8> %2, <vscale x 4 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vsuxei_mask_v_nxv4i16_nxv4i16_nxv4i8: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e16, m1, ta, ma +; CHECK-NEXT:    vsuxei8.v v8, (a0), v9, v0.t +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsuxei.mask.nxv4i16.nxv4i8( +    <vscale x 4 x i16> %0, +    ptr %1, +    <vscale x 4 x i8> %2, +    <vscale x 4 x i1> %3, +    iXLen %4) + +  ret void +} + +declare void @llvm.riscv.vsuxei.nxv8i16.nxv8i8( +  <vscale x 8 x i16>, +  ptr, +  <vscale x 8 x i8>, +  iXLen); + +define void @intrinsic_vsuxei_v_nxv8i16_nxv8i16_nxv8i8(<vscale x 8 x i16> %0, ptr %1, <vscale x 8 x i8> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vsuxei_v_nxv8i16_nxv8i16_nxv8i8: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e16, m2, ta, ma +; CHECK-NEXT:    vsuxei8.v v8, (a0), v10 +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsuxei.nxv8i16.nxv8i8( +    <vscale x 8 x i16> %0, +    ptr %1, +    <vscale x 8 x i8> %2, +    iXLen %3) + +  ret void +} + +declare void @llvm.riscv.vsuxei.mask.nxv8i16.nxv8i8( +  <vscale x 8 x i16>, +  ptr, +  <vscale x 8 x i8>, +  <vscale x 8 x i1>, +  iXLen); + +define void @intrinsic_vsuxei_mask_v_nxv8i16_nxv8i16_nxv8i8(<vscale x 8 x i16> %0, ptr %1, <vscale x 8 x i8> %2, <vscale x 8 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vsuxei_mask_v_nxv8i16_nxv8i16_nxv8i8: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e16, m2, ta, ma +; CHECK-NEXT:    vsuxei8.v v8, (a0), v10, v0.t +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsuxei.mask.nxv8i16.nxv8i8( +    <vscale x 8 x i16> %0, +    ptr %1, +    <vscale x 8 x i8> %2, +    <vscale x 8 x i1> %3, +    iXLen %4) + +  ret void +} + +declare void @llvm.riscv.vsuxei.nxv16i16.nxv16i8( +  <vscale x 16 x i16>, +  ptr, +  <vscale x 16 x i8>, +  iXLen); + +define void @intrinsic_vsuxei_v_nxv16i16_nxv16i16_nxv16i8(<vscale x 16 x i16> %0, ptr %1, <vscale x 16 x i8> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vsuxei_v_nxv16i16_nxv16i16_nxv16i8: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e16, m4, ta, ma +; CHECK-NEXT:    vsuxei8.v v8, (a0), v12 +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsuxei.nxv16i16.nxv16i8( +    <vscale x 16 x i16> %0, +    ptr %1, +    <vscale x 16 x i8> %2, +    iXLen %3) + +  ret void +} + +declare void @llvm.riscv.vsuxei.mask.nxv16i16.nxv16i8( +  <vscale x 16 x i16>, +  ptr, +  <vscale x 16 x i8>, +  <vscale x 16 x i1>, +  iXLen); + +define void @intrinsic_vsuxei_mask_v_nxv16i16_nxv16i16_nxv16i8(<vscale x 16 x i16> %0, ptr %1, <vscale x 16 x i8> %2, <vscale x 16 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vsuxei_mask_v_nxv16i16_nxv16i16_nxv16i8: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e16, m4, ta, ma +; CHECK-NEXT:    vsuxei8.v v8, (a0), v12, v0.t +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsuxei.mask.nxv16i16.nxv16i8( +    <vscale x 16 x i16> %0, +    ptr %1, +    <vscale x 16 x i8> %2, +    <vscale x 16 x i1> %3, +    iXLen %4) + +  ret void +} + +declare void @llvm.riscv.vsuxei.nxv32i16.nxv32i8( +  <vscale x 32 x i16>, +  ptr, +  <vscale x 32 x i8>, +  iXLen); + +define void @intrinsic_vsuxei_v_nxv32i16_nxv32i16_nxv32i8(<vscale x 32 x i16> %0, ptr %1, <vscale x 32 x i8> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vsuxei_v_nxv32i16_nxv32i16_nxv32i8: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e16, m8, ta, ma +; CHECK-NEXT:    vsuxei8.v v8, (a0), v16 +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsuxei.nxv32i16.nxv32i8( +    <vscale x 32 x i16> %0, +    ptr %1, +    <vscale x 32 x i8> %2, +    iXLen %3) + +  ret void +} + +declare void @llvm.riscv.vsuxei.mask.nxv32i16.nxv32i8( +  <vscale x 32 x i16>, +  ptr, +  <vscale x 32 x i8>, +  <vscale x 32 x i1>, +  iXLen); + +define void @intrinsic_vsuxei_mask_v_nxv32i16_nxv32i16_nxv32i8(<vscale x 32 x i16> %0, ptr %1, <vscale x 32 x i8> %2, <vscale x 32 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vsuxei_mask_v_nxv32i16_nxv32i16_nxv32i8: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e16, m8, ta, ma +; CHECK-NEXT:    vsuxei8.v v8, (a0), v16, v0.t +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsuxei.mask.nxv32i16.nxv32i8( +    <vscale x 32 x i16> %0, +    ptr %1, +    <vscale x 32 x i8> %2, +    <vscale x 32 x i1> %3, +    iXLen %4) + +  ret void +} + +declare void @llvm.riscv.vsuxei.nxv1i32.nxv1i8( +  <vscale x 1 x i32>, +  ptr, +  <vscale x 1 x i8>, +  iXLen); + +define void @intrinsic_vsuxei_v_nxv1i32_nxv1i32_nxv1i8(<vscale x 1 x i32> %0, ptr %1, <vscale x 1 x i8> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vsuxei_v_nxv1i32_nxv1i32_nxv1i8: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e32, mf2, ta, ma +; CHECK-NEXT:    vsuxei8.v v8, (a0), v9 +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsuxei.nxv1i32.nxv1i8( +    <vscale x 1 x i32> %0, +    ptr %1, +    <vscale x 1 x i8> %2, +    iXLen %3) + +  ret void +} + +declare void @llvm.riscv.vsuxei.mask.nxv1i32.nxv1i8( +  <vscale x 1 x i32>, +  ptr, +  <vscale x 1 x i8>, +  <vscale x 1 x i1>, +  iXLen); + +define void @intrinsic_vsuxei_mask_v_nxv1i32_nxv1i32_nxv1i8(<vscale x 1 x i32> %0, ptr %1, <vscale x 1 x i8> %2, <vscale x 1 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vsuxei_mask_v_nxv1i32_nxv1i32_nxv1i8: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e32, mf2, ta, ma +; CHECK-NEXT:    vsuxei8.v v8, (a0), v9, v0.t +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsuxei.mask.nxv1i32.nxv1i8( +    <vscale x 1 x i32> %0, +    ptr %1, +    <vscale x 1 x i8> %2, +    <vscale x 1 x i1> %3, +    iXLen %4) + +  ret void +} + +declare void @llvm.riscv.vsuxei.nxv2i32.nxv2i8( +  <vscale x 2 x i32>, +  ptr, +  <vscale x 2 x i8>, +  iXLen); + +define void @intrinsic_vsuxei_v_nxv2i32_nxv2i32_nxv2i8(<vscale x 2 x i32> %0, ptr %1, <vscale x 2 x i8> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vsuxei_v_nxv2i32_nxv2i32_nxv2i8: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e32, m1, ta, ma +; CHECK-NEXT:    vsuxei8.v v8, (a0), v9 +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsuxei.nxv2i32.nxv2i8( +    <vscale x 2 x i32> %0, +    ptr %1, +    <vscale x 2 x i8> %2, +    iXLen %3) + +  ret void +} + +declare void @llvm.riscv.vsuxei.mask.nxv2i32.nxv2i8( +  <vscale x 2 x i32>, +  ptr, +  <vscale x 2 x i8>, +  <vscale x 2 x i1>, +  iXLen); + +define void @intrinsic_vsuxei_mask_v_nxv2i32_nxv2i32_nxv2i8(<vscale x 2 x i32> %0, ptr %1, <vscale x 2 x i8> %2, <vscale x 2 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vsuxei_mask_v_nxv2i32_nxv2i32_nxv2i8: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e32, m1, ta, ma +; CHECK-NEXT:    vsuxei8.v v8, (a0), v9, v0.t +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsuxei.mask.nxv2i32.nxv2i8( +    <vscale x 2 x i32> %0, +    ptr %1, +    <vscale x 2 x i8> %2, +    <vscale x 2 x i1> %3, +    iXLen %4) + +  ret void +} + +declare void @llvm.riscv.vsuxei.nxv4i32.nxv4i8( +  <vscale x 4 x i32>, +  ptr, +  <vscale x 4 x i8>, +  iXLen); + +define void @intrinsic_vsuxei_v_nxv4i32_nxv4i32_nxv4i8(<vscale x 4 x i32> %0, ptr %1, <vscale x 4 x i8> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vsuxei_v_nxv4i32_nxv4i32_nxv4i8: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e32, m2, ta, ma +; CHECK-NEXT:    vsuxei8.v v8, (a0), v10 +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsuxei.nxv4i32.nxv4i8( +    <vscale x 4 x i32> %0, +    ptr %1, +    <vscale x 4 x i8> %2, +    iXLen %3) + +  ret void +} + +declare void @llvm.riscv.vsuxei.mask.nxv4i32.nxv4i8( +  <vscale x 4 x i32>, +  ptr, +  <vscale x 4 x i8>, +  <vscale x 4 x i1>, +  iXLen); + +define void @intrinsic_vsuxei_mask_v_nxv4i32_nxv4i32_nxv4i8(<vscale x 4 x i32> %0, ptr %1, <vscale x 4 x i8> %2, <vscale x 4 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vsuxei_mask_v_nxv4i32_nxv4i32_nxv4i8: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e32, m2, ta, ma +; CHECK-NEXT:    vsuxei8.v v8, (a0), v10, v0.t +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsuxei.mask.nxv4i32.nxv4i8( +    <vscale x 4 x i32> %0, +    ptr %1, +    <vscale x 4 x i8> %2, +    <vscale x 4 x i1> %3, +    iXLen %4) + +  ret void +} + +declare void @llvm.riscv.vsuxei.nxv8i32.nxv8i8( +  <vscale x 8 x i32>, +  ptr, +  <vscale x 8 x i8>, +  iXLen); + +define void @intrinsic_vsuxei_v_nxv8i32_nxv8i32_nxv8i8(<vscale x 8 x i32> %0, ptr %1, <vscale x 8 x i8> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vsuxei_v_nxv8i32_nxv8i32_nxv8i8: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e32, m4, ta, ma +; CHECK-NEXT:    vsuxei8.v v8, (a0), v12 +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsuxei.nxv8i32.nxv8i8( +    <vscale x 8 x i32> %0, +    ptr %1, +    <vscale x 8 x i8> %2, +    iXLen %3) + +  ret void +} + +declare void @llvm.riscv.vsuxei.mask.nxv8i32.nxv8i8( +  <vscale x 8 x i32>, +  ptr, +  <vscale x 8 x i8>, +  <vscale x 8 x i1>, +  iXLen); + +define void @intrinsic_vsuxei_mask_v_nxv8i32_nxv8i32_nxv8i8(<vscale x 8 x i32> %0, ptr %1, <vscale x 8 x i8> %2, <vscale x 8 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vsuxei_mask_v_nxv8i32_nxv8i32_nxv8i8: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e32, m4, ta, ma +; CHECK-NEXT:    vsuxei8.v v8, (a0), v12, v0.t +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsuxei.mask.nxv8i32.nxv8i8( +    <vscale x 8 x i32> %0, +    ptr %1, +    <vscale x 8 x i8> %2, +    <vscale x 8 x i1> %3, +    iXLen %4) + +  ret void +} + +declare void @llvm.riscv.vsuxei.nxv16i32.nxv16i8( +  <vscale x 16 x i32>, +  ptr, +  <vscale x 16 x i8>, +  iXLen); + +define void @intrinsic_vsuxei_v_nxv16i32_nxv16i32_nxv16i8(<vscale x 16 x i32> %0, ptr %1, <vscale x 16 x i8> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vsuxei_v_nxv16i32_nxv16i32_nxv16i8: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e32, m8, ta, ma +; CHECK-NEXT:    vsuxei8.v v8, (a0), v16 +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsuxei.nxv16i32.nxv16i8( +    <vscale x 16 x i32> %0, +    ptr %1, +    <vscale x 16 x i8> %2, +    iXLen %3) + +  ret void +} + +declare void @llvm.riscv.vsuxei.mask.nxv16i32.nxv16i8( +  <vscale x 16 x i32>, +  ptr, +  <vscale x 16 x i8>, +  <vscale x 16 x i1>, +  iXLen); + +define void @intrinsic_vsuxei_mask_v_nxv16i32_nxv16i32_nxv16i8(<vscale x 16 x i32> %0, ptr %1, <vscale x 16 x i8> %2, <vscale x 16 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vsuxei_mask_v_nxv16i32_nxv16i32_nxv16i8: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e32, m8, ta, ma +; CHECK-NEXT:    vsuxei8.v v8, (a0), v16, v0.t +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsuxei.mask.nxv16i32.nxv16i8( +    <vscale x 16 x i32> %0, +    ptr %1, +    <vscale x 16 x i8> %2, +    <vscale x 16 x i1> %3, +    iXLen %4) + +  ret void +} + +declare void @llvm.riscv.vsuxei.nxv1i64.nxv1i8( +  <vscale x 1 x i64>, +  ptr, +  <vscale x 1 x i8>, +  iXLen); + +define void @intrinsic_vsuxei_v_nxv1i64_nxv1i64_nxv1i8(<vscale x 1 x i64> %0, ptr %1, <vscale x 1 x i8> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vsuxei_v_nxv1i64_nxv1i64_nxv1i8: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e64, m1, ta, ma +; CHECK-NEXT:    vsuxei8.v v8, (a0), v9 +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsuxei.nxv1i64.nxv1i8( +    <vscale x 1 x i64> %0, +    ptr %1, +    <vscale x 1 x i8> %2, +    iXLen %3) + +  ret void +} + +declare void @llvm.riscv.vsuxei.mask.nxv1i64.nxv1i8( +  <vscale x 1 x i64>, +  ptr, +  <vscale x 1 x i8>, +  <vscale x 1 x i1>, +  iXLen); + +define void @intrinsic_vsuxei_mask_v_nxv1i64_nxv1i64_nxv1i8(<vscale x 1 x i64> %0, ptr %1, <vscale x 1 x i8> %2, <vscale x 1 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vsuxei_mask_v_nxv1i64_nxv1i64_nxv1i8: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e64, m1, ta, ma +; CHECK-NEXT:    vsuxei8.v v8, (a0), v9, v0.t +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsuxei.mask.nxv1i64.nxv1i8( +    <vscale x 1 x i64> %0, +    ptr %1, +    <vscale x 1 x i8> %2, +    <vscale x 1 x i1> %3, +    iXLen %4) + +  ret void +} + +declare void @llvm.riscv.vsuxei.nxv2i64.nxv2i8( +  <vscale x 2 x i64>, +  ptr, +  <vscale x 2 x i8>, +  iXLen); + +define void @intrinsic_vsuxei_v_nxv2i64_nxv2i64_nxv2i8(<vscale x 2 x i64> %0, ptr %1, <vscale x 2 x i8> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vsuxei_v_nxv2i64_nxv2i64_nxv2i8: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e64, m2, ta, ma +; CHECK-NEXT:    vsuxei8.v v8, (a0), v10 +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsuxei.nxv2i64.nxv2i8( +    <vscale x 2 x i64> %0, +    ptr %1, +    <vscale x 2 x i8> %2, +    iXLen %3) + +  ret void +} + +declare void @llvm.riscv.vsuxei.mask.nxv2i64.nxv2i8( +  <vscale x 2 x i64>, +  ptr, +  <vscale x 2 x i8>, +  <vscale x 2 x i1>, +  iXLen); + +define void @intrinsic_vsuxei_mask_v_nxv2i64_nxv2i64_nxv2i8(<vscale x 2 x i64> %0, ptr %1, <vscale x 2 x i8> %2, <vscale x 2 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vsuxei_mask_v_nxv2i64_nxv2i64_nxv2i8: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e64, m2, ta, ma +; CHECK-NEXT:    vsuxei8.v v8, (a0), v10, v0.t +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsuxei.mask.nxv2i64.nxv2i8( +    <vscale x 2 x i64> %0, +    ptr %1, +    <vscale x 2 x i8> %2, +    <vscale x 2 x i1> %3, +    iXLen %4) + +  ret void +} + +declare void @llvm.riscv.vsuxei.nxv4i64.nxv4i8( +  <vscale x 4 x i64>, +  ptr, +  <vscale x 4 x i8>, +  iXLen); + +define void @intrinsic_vsuxei_v_nxv4i64_nxv4i64_nxv4i8(<vscale x 4 x i64> %0, ptr %1, <vscale x 4 x i8> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vsuxei_v_nxv4i64_nxv4i64_nxv4i8: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e64, m4, ta, ma +; CHECK-NEXT:    vsuxei8.v v8, (a0), v12 +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsuxei.nxv4i64.nxv4i8( +    <vscale x 4 x i64> %0, +    ptr %1, +    <vscale x 4 x i8> %2, +    iXLen %3) + +  ret void +} + +declare void @llvm.riscv.vsuxei.mask.nxv4i64.nxv4i8( +  <vscale x 4 x i64>, +  ptr, +  <vscale x 4 x i8>, +  <vscale x 4 x i1>, +  iXLen); + +define void @intrinsic_vsuxei_mask_v_nxv4i64_nxv4i64_nxv4i8(<vscale x 4 x i64> %0, ptr %1, <vscale x 4 x i8> %2, <vscale x 4 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vsuxei_mask_v_nxv4i64_nxv4i64_nxv4i8: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e64, m4, ta, ma +; CHECK-NEXT:    vsuxei8.v v8, (a0), v12, v0.t +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsuxei.mask.nxv4i64.nxv4i8( +    <vscale x 4 x i64> %0, +    ptr %1, +    <vscale x 4 x i8> %2, +    <vscale x 4 x i1> %3, +    iXLen %4) + +  ret void +} + +declare void @llvm.riscv.vsuxei.nxv8i64.nxv8i8( +  <vscale x 8 x i64>, +  ptr, +  <vscale x 8 x i8>, +  iXLen); + +define void @intrinsic_vsuxei_v_nxv8i64_nxv8i64_nxv8i8(<vscale x 8 x i64> %0, ptr %1, <vscale x 8 x i8> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vsuxei_v_nxv8i64_nxv8i64_nxv8i8: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e64, m8, ta, ma +; CHECK-NEXT:    vsuxei8.v v8, (a0), v16 +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsuxei.nxv8i64.nxv8i8( +    <vscale x 8 x i64> %0, +    ptr %1, +    <vscale x 8 x i8> %2, +    iXLen %3) + +  ret void +} + +declare void @llvm.riscv.vsuxei.mask.nxv8i64.nxv8i8( +  <vscale x 8 x i64>, +  ptr, +  <vscale x 8 x i8>, +  <vscale x 8 x i1>, +  iXLen); + +define void @intrinsic_vsuxei_mask_v_nxv8i64_nxv8i64_nxv8i8(<vscale x 8 x i64> %0, ptr %1, <vscale x 8 x i8> %2, <vscale x 8 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vsuxei_mask_v_nxv8i64_nxv8i64_nxv8i8: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e64, m8, ta, ma +; CHECK-NEXT:    vsuxei8.v v8, (a0), v16, v0.t +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsuxei.mask.nxv8i64.nxv8i8( +    <vscale x 8 x i64> %0, +    ptr %1, +    <vscale x 8 x i8> %2, +    <vscale x 8 x i1> %3, +    iXLen %4) + +  ret void +} + +declare void @llvm.riscv.vsuxei.nxv1f16.nxv1i8( +  <vscale x 1 x half>, +  ptr, +  <vscale x 1 x i8>, +  iXLen); + +define void @intrinsic_vsuxei_v_nxv1f16_nxv1f16_nxv1i8(<vscale x 1 x half> %0, ptr %1, <vscale x 1 x i8> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vsuxei_v_nxv1f16_nxv1f16_nxv1i8: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e16, mf4, ta, ma +; CHECK-NEXT:    vsuxei8.v v8, (a0), v9 +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsuxei.nxv1f16.nxv1i8( +    <vscale x 1 x half> %0, +    ptr %1, +    <vscale x 1 x i8> %2, +    iXLen %3) + +  ret void +} + +declare void @llvm.riscv.vsuxei.mask.nxv1f16.nxv1i8( +  <vscale x 1 x half>, +  ptr, +  <vscale x 1 x i8>, +  <vscale x 1 x i1>, +  iXLen); + +define void @intrinsic_vsuxei_mask_v_nxv1f16_nxv1f16_nxv1i8(<vscale x 1 x half> %0, ptr %1, <vscale x 1 x i8> %2, <vscale x 1 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vsuxei_mask_v_nxv1f16_nxv1f16_nxv1i8: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e16, mf4, ta, ma +; CHECK-NEXT:    vsuxei8.v v8, (a0), v9, v0.t +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsuxei.mask.nxv1f16.nxv1i8( +    <vscale x 1 x half> %0, +    ptr %1, +    <vscale x 1 x i8> %2, +    <vscale x 1 x i1> %3, +    iXLen %4) + +  ret void +} + +declare void @llvm.riscv.vsuxei.nxv2f16.nxv2i8( +  <vscale x 2 x half>, +  ptr, +  <vscale x 2 x i8>, +  iXLen); + +define void @intrinsic_vsuxei_v_nxv2f16_nxv2f16_nxv2i8(<vscale x 2 x half> %0, ptr %1, <vscale x 2 x i8> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vsuxei_v_nxv2f16_nxv2f16_nxv2i8: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e16, mf2, ta, ma +; CHECK-NEXT:    vsuxei8.v v8, (a0), v9 +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsuxei.nxv2f16.nxv2i8( +    <vscale x 2 x half> %0, +    ptr %1, +    <vscale x 2 x i8> %2, +    iXLen %3) + +  ret void +} + +declare void @llvm.riscv.vsuxei.mask.nxv2f16.nxv2i8( +  <vscale x 2 x half>, +  ptr, +  <vscale x 2 x i8>, +  <vscale x 2 x i1>, +  iXLen); + +define void @intrinsic_vsuxei_mask_v_nxv2f16_nxv2f16_nxv2i8(<vscale x 2 x half> %0, ptr %1, <vscale x 2 x i8> %2, <vscale x 2 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vsuxei_mask_v_nxv2f16_nxv2f16_nxv2i8: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e16, mf2, ta, ma +; CHECK-NEXT:    vsuxei8.v v8, (a0), v9, v0.t +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsuxei.mask.nxv2f16.nxv2i8( +    <vscale x 2 x half> %0, +    ptr %1, +    <vscale x 2 x i8> %2, +    <vscale x 2 x i1> %3, +    iXLen %4) + +  ret void +} + +declare void @llvm.riscv.vsuxei.nxv4f16.nxv4i8( +  <vscale x 4 x half>, +  ptr, +  <vscale x 4 x i8>, +  iXLen); + +define void @intrinsic_vsuxei_v_nxv4f16_nxv4f16_nxv4i8(<vscale x 4 x half> %0, ptr %1, <vscale x 4 x i8> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vsuxei_v_nxv4f16_nxv4f16_nxv4i8: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e16, m1, ta, ma +; CHECK-NEXT:    vsuxei8.v v8, (a0), v9 +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsuxei.nxv4f16.nxv4i8( +    <vscale x 4 x half> %0, +    ptr %1, +    <vscale x 4 x i8> %2, +    iXLen %3) + +  ret void +} + +declare void @llvm.riscv.vsuxei.mask.nxv4f16.nxv4i8( +  <vscale x 4 x half>, +  ptr, +  <vscale x 4 x i8>, +  <vscale x 4 x i1>, +  iXLen); + +define void @intrinsic_vsuxei_mask_v_nxv4f16_nxv4f16_nxv4i8(<vscale x 4 x half> %0, ptr %1, <vscale x 4 x i8> %2, <vscale x 4 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vsuxei_mask_v_nxv4f16_nxv4f16_nxv4i8: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e16, m1, ta, ma +; CHECK-NEXT:    vsuxei8.v v8, (a0), v9, v0.t +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsuxei.mask.nxv4f16.nxv4i8( +    <vscale x 4 x half> %0, +    ptr %1, +    <vscale x 4 x i8> %2, +    <vscale x 4 x i1> %3, +    iXLen %4) + +  ret void +} + +declare void @llvm.riscv.vsuxei.nxv8f16.nxv8i8( +  <vscale x 8 x half>, +  ptr, +  <vscale x 8 x i8>, +  iXLen); + +define void @intrinsic_vsuxei_v_nxv8f16_nxv8f16_nxv8i8(<vscale x 8 x half> %0, ptr %1, <vscale x 8 x i8> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vsuxei_v_nxv8f16_nxv8f16_nxv8i8: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e16, m2, ta, ma +; CHECK-NEXT:    vsuxei8.v v8, (a0), v10 +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsuxei.nxv8f16.nxv8i8( +    <vscale x 8 x half> %0, +    ptr %1, +    <vscale x 8 x i8> %2, +    iXLen %3) + +  ret void +} + +declare void @llvm.riscv.vsuxei.mask.nxv8f16.nxv8i8( +  <vscale x 8 x half>, +  ptr, +  <vscale x 8 x i8>, +  <vscale x 8 x i1>, +  iXLen); + +define void @intrinsic_vsuxei_mask_v_nxv8f16_nxv8f16_nxv8i8(<vscale x 8 x half> %0, ptr %1, <vscale x 8 x i8> %2, <vscale x 8 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vsuxei_mask_v_nxv8f16_nxv8f16_nxv8i8: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e16, m2, ta, ma +; CHECK-NEXT:    vsuxei8.v v8, (a0), v10, v0.t +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsuxei.mask.nxv8f16.nxv8i8( +    <vscale x 8 x half> %0, +    ptr %1, +    <vscale x 8 x i8> %2, +    <vscale x 8 x i1> %3, +    iXLen %4) + +  ret void +} + +declare void @llvm.riscv.vsuxei.nxv16f16.nxv16i8( +  <vscale x 16 x half>, +  ptr, +  <vscale x 16 x i8>, +  iXLen); + +define void @intrinsic_vsuxei_v_nxv16f16_nxv16f16_nxv16i8(<vscale x 16 x half> %0, ptr %1, <vscale x 16 x i8> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vsuxei_v_nxv16f16_nxv16f16_nxv16i8: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e16, m4, ta, ma +; CHECK-NEXT:    vsuxei8.v v8, (a0), v12 +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsuxei.nxv16f16.nxv16i8( +    <vscale x 16 x half> %0, +    ptr %1, +    <vscale x 16 x i8> %2, +    iXLen %3) + +  ret void +} + +declare void @llvm.riscv.vsuxei.mask.nxv16f16.nxv16i8( +  <vscale x 16 x half>, +  ptr, +  <vscale x 16 x i8>, +  <vscale x 16 x i1>, +  iXLen); + +define void @intrinsic_vsuxei_mask_v_nxv16f16_nxv16f16_nxv16i8(<vscale x 16 x half> %0, ptr %1, <vscale x 16 x i8> %2, <vscale x 16 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vsuxei_mask_v_nxv16f16_nxv16f16_nxv16i8: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e16, m4, ta, ma +; CHECK-NEXT:    vsuxei8.v v8, (a0), v12, v0.t +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsuxei.mask.nxv16f16.nxv16i8( +    <vscale x 16 x half> %0, +    ptr %1, +    <vscale x 16 x i8> %2, +    <vscale x 16 x i1> %3, +    iXLen %4) + +  ret void +} + +declare void @llvm.riscv.vsuxei.nxv32f16.nxv32i8( +  <vscale x 32 x half>, +  ptr, +  <vscale x 32 x i8>, +  iXLen); + +define void @intrinsic_vsuxei_v_nxv32f16_nxv32f16_nxv32i8(<vscale x 32 x half> %0, ptr %1, <vscale x 32 x i8> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vsuxei_v_nxv32f16_nxv32f16_nxv32i8: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e16, m8, ta, ma +; CHECK-NEXT:    vsuxei8.v v8, (a0), v16 +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsuxei.nxv32f16.nxv32i8( +    <vscale x 32 x half> %0, +    ptr %1, +    <vscale x 32 x i8> %2, +    iXLen %3) + +  ret void +} + +declare void @llvm.riscv.vsuxei.mask.nxv32f16.nxv32i8( +  <vscale x 32 x half>, +  ptr, +  <vscale x 32 x i8>, +  <vscale x 32 x i1>, +  iXLen); + +define void @intrinsic_vsuxei_mask_v_nxv32f16_nxv32f16_nxv32i8(<vscale x 32 x half> %0, ptr %1, <vscale x 32 x i8> %2, <vscale x 32 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vsuxei_mask_v_nxv32f16_nxv32f16_nxv32i8: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e16, m8, ta, ma +; CHECK-NEXT:    vsuxei8.v v8, (a0), v16, v0.t +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsuxei.mask.nxv32f16.nxv32i8( +    <vscale x 32 x half> %0, +    ptr %1, +    <vscale x 32 x i8> %2, +    <vscale x 32 x i1> %3, +    iXLen %4) + +  ret void +} + +declare void @llvm.riscv.vsuxei.nxv1f32.nxv1i8( +  <vscale x 1 x float>, +  ptr, +  <vscale x 1 x i8>, +  iXLen); + +define void @intrinsic_vsuxei_v_nxv1f32_nxv1f32_nxv1i8(<vscale x 1 x float> %0, ptr %1, <vscale x 1 x i8> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vsuxei_v_nxv1f32_nxv1f32_nxv1i8: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e32, mf2, ta, ma +; CHECK-NEXT:    vsuxei8.v v8, (a0), v9 +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsuxei.nxv1f32.nxv1i8( +    <vscale x 1 x float> %0, +    ptr %1, +    <vscale x 1 x i8> %2, +    iXLen %3) + +  ret void +} + +declare void @llvm.riscv.vsuxei.mask.nxv1f32.nxv1i8( +  <vscale x 1 x float>, +  ptr, +  <vscale x 1 x i8>, +  <vscale x 1 x i1>, +  iXLen); + +define void @intrinsic_vsuxei_mask_v_nxv1f32_nxv1f32_nxv1i8(<vscale x 1 x float> %0, ptr %1, <vscale x 1 x i8> %2, <vscale x 1 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vsuxei_mask_v_nxv1f32_nxv1f32_nxv1i8: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e32, mf2, ta, ma +; CHECK-NEXT:    vsuxei8.v v8, (a0), v9, v0.t +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsuxei.mask.nxv1f32.nxv1i8( +    <vscale x 1 x float> %0, +    ptr %1, +    <vscale x 1 x i8> %2, +    <vscale x 1 x i1> %3, +    iXLen %4) + +  ret void +} + +declare void @llvm.riscv.vsuxei.nxv2f32.nxv2i8( +  <vscale x 2 x float>, +  ptr, +  <vscale x 2 x i8>, +  iXLen); + +define void @intrinsic_vsuxei_v_nxv2f32_nxv2f32_nxv2i8(<vscale x 2 x float> %0, ptr %1, <vscale x 2 x i8> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vsuxei_v_nxv2f32_nxv2f32_nxv2i8: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e32, m1, ta, ma +; CHECK-NEXT:    vsuxei8.v v8, (a0), v9 +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsuxei.nxv2f32.nxv2i8( +    <vscale x 2 x float> %0, +    ptr %1, +    <vscale x 2 x i8> %2, +    iXLen %3) + +  ret void +} + +declare void @llvm.riscv.vsuxei.mask.nxv2f32.nxv2i8( +  <vscale x 2 x float>, +  ptr, +  <vscale x 2 x i8>, +  <vscale x 2 x i1>, +  iXLen); + +define void @intrinsic_vsuxei_mask_v_nxv2f32_nxv2f32_nxv2i8(<vscale x 2 x float> %0, ptr %1, <vscale x 2 x i8> %2, <vscale x 2 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vsuxei_mask_v_nxv2f32_nxv2f32_nxv2i8: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e32, m1, ta, ma +; CHECK-NEXT:    vsuxei8.v v8, (a0), v9, v0.t +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsuxei.mask.nxv2f32.nxv2i8( +    <vscale x 2 x float> %0, +    ptr %1, +    <vscale x 2 x i8> %2, +    <vscale x 2 x i1> %3, +    iXLen %4) + +  ret void +} + +declare void @llvm.riscv.vsuxei.nxv4f32.nxv4i8( +  <vscale x 4 x float>, +  ptr, +  <vscale x 4 x i8>, +  iXLen); + +define void @intrinsic_vsuxei_v_nxv4f32_nxv4f32_nxv4i8(<vscale x 4 x float> %0, ptr %1, <vscale x 4 x i8> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vsuxei_v_nxv4f32_nxv4f32_nxv4i8: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e32, m2, ta, ma +; CHECK-NEXT:    vsuxei8.v v8, (a0), v10 +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsuxei.nxv4f32.nxv4i8( +    <vscale x 4 x float> %0, +    ptr %1, +    <vscale x 4 x i8> %2, +    iXLen %3) + +  ret void +} + +declare void @llvm.riscv.vsuxei.mask.nxv4f32.nxv4i8( +  <vscale x 4 x float>, +  ptr, +  <vscale x 4 x i8>, +  <vscale x 4 x i1>, +  iXLen); + +define void @intrinsic_vsuxei_mask_v_nxv4f32_nxv4f32_nxv4i8(<vscale x 4 x float> %0, ptr %1, <vscale x 4 x i8> %2, <vscale x 4 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vsuxei_mask_v_nxv4f32_nxv4f32_nxv4i8: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e32, m2, ta, ma +; CHECK-NEXT:    vsuxei8.v v8, (a0), v10, v0.t +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsuxei.mask.nxv4f32.nxv4i8( +    <vscale x 4 x float> %0, +    ptr %1, +    <vscale x 4 x i8> %2, +    <vscale x 4 x i1> %3, +    iXLen %4) + +  ret void +} + +declare void @llvm.riscv.vsuxei.nxv8f32.nxv8i8( +  <vscale x 8 x float>, +  ptr, +  <vscale x 8 x i8>, +  iXLen); + +define void @intrinsic_vsuxei_v_nxv8f32_nxv8f32_nxv8i8(<vscale x 8 x float> %0, ptr %1, <vscale x 8 x i8> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vsuxei_v_nxv8f32_nxv8f32_nxv8i8: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e32, m4, ta, ma +; CHECK-NEXT:    vsuxei8.v v8, (a0), v12 +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsuxei.nxv8f32.nxv8i8( +    <vscale x 8 x float> %0, +    ptr %1, +    <vscale x 8 x i8> %2, +    iXLen %3) + +  ret void +} + +declare void @llvm.riscv.vsuxei.mask.nxv8f32.nxv8i8( +  <vscale x 8 x float>, +  ptr, +  <vscale x 8 x i8>, +  <vscale x 8 x i1>, +  iXLen); + +define void @intrinsic_vsuxei_mask_v_nxv8f32_nxv8f32_nxv8i8(<vscale x 8 x float> %0, ptr %1, <vscale x 8 x i8> %2, <vscale x 8 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vsuxei_mask_v_nxv8f32_nxv8f32_nxv8i8: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e32, m4, ta, ma +; CHECK-NEXT:    vsuxei8.v v8, (a0), v12, v0.t +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsuxei.mask.nxv8f32.nxv8i8( +    <vscale x 8 x float> %0, +    ptr %1, +    <vscale x 8 x i8> %2, +    <vscale x 8 x i1> %3, +    iXLen %4) + +  ret void +} + +declare void @llvm.riscv.vsuxei.nxv16f32.nxv16i8( +  <vscale x 16 x float>, +  ptr, +  <vscale x 16 x i8>, +  iXLen); + +define void @intrinsic_vsuxei_v_nxv16f32_nxv16f32_nxv16i8(<vscale x 16 x float> %0, ptr %1, <vscale x 16 x i8> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vsuxei_v_nxv16f32_nxv16f32_nxv16i8: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e32, m8, ta, ma +; CHECK-NEXT:    vsuxei8.v v8, (a0), v16 +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsuxei.nxv16f32.nxv16i8( +    <vscale x 16 x float> %0, +    ptr %1, +    <vscale x 16 x i8> %2, +    iXLen %3) + +  ret void +} + +declare void @llvm.riscv.vsuxei.mask.nxv16f32.nxv16i8( +  <vscale x 16 x float>, +  ptr, +  <vscale x 16 x i8>, +  <vscale x 16 x i1>, +  iXLen); + +define void @intrinsic_vsuxei_mask_v_nxv16f32_nxv16f32_nxv16i8(<vscale x 16 x float> %0, ptr %1, <vscale x 16 x i8> %2, <vscale x 16 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vsuxei_mask_v_nxv16f32_nxv16f32_nxv16i8: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e32, m8, ta, ma +; CHECK-NEXT:    vsuxei8.v v8, (a0), v16, v0.t +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsuxei.mask.nxv16f32.nxv16i8( +    <vscale x 16 x float> %0, +    ptr %1, +    <vscale x 16 x i8> %2, +    <vscale x 16 x i1> %3, +    iXLen %4) + +  ret void +} + +declare void @llvm.riscv.vsuxei.nxv1f64.nxv1i8( +  <vscale x 1 x double>, +  ptr, +  <vscale x 1 x i8>, +  iXLen); + +define void @intrinsic_vsuxei_v_nxv1f64_nxv1f64_nxv1i8(<vscale x 1 x double> %0, ptr %1, <vscale x 1 x i8> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vsuxei_v_nxv1f64_nxv1f64_nxv1i8: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e64, m1, ta, ma +; CHECK-NEXT:    vsuxei8.v v8, (a0), v9 +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsuxei.nxv1f64.nxv1i8( +    <vscale x 1 x double> %0, +    ptr %1, +    <vscale x 1 x i8> %2, +    iXLen %3) + +  ret void +} + +declare void @llvm.riscv.vsuxei.mask.nxv1f64.nxv1i8( +  <vscale x 1 x double>, +  ptr, +  <vscale x 1 x i8>, +  <vscale x 1 x i1>, +  iXLen); + +define void @intrinsic_vsuxei_mask_v_nxv1f64_nxv1f64_nxv1i8(<vscale x 1 x double> %0, ptr %1, <vscale x 1 x i8> %2, <vscale x 1 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vsuxei_mask_v_nxv1f64_nxv1f64_nxv1i8: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e64, m1, ta, ma +; CHECK-NEXT:    vsuxei8.v v8, (a0), v9, v0.t +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsuxei.mask.nxv1f64.nxv1i8( +    <vscale x 1 x double> %0, +    ptr %1, +    <vscale x 1 x i8> %2, +    <vscale x 1 x i1> %3, +    iXLen %4) + +  ret void +} + +declare void @llvm.riscv.vsuxei.nxv2f64.nxv2i8( +  <vscale x 2 x double>, +  ptr, +  <vscale x 2 x i8>, +  iXLen); + +define void @intrinsic_vsuxei_v_nxv2f64_nxv2f64_nxv2i8(<vscale x 2 x double> %0, ptr %1, <vscale x 2 x i8> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vsuxei_v_nxv2f64_nxv2f64_nxv2i8: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e64, m2, ta, ma +; CHECK-NEXT:    vsuxei8.v v8, (a0), v10 +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsuxei.nxv2f64.nxv2i8( +    <vscale x 2 x double> %0, +    ptr %1, +    <vscale x 2 x i8> %2, +    iXLen %3) + +  ret void +} + +declare void @llvm.riscv.vsuxei.mask.nxv2f64.nxv2i8( +  <vscale x 2 x double>, +  ptr, +  <vscale x 2 x i8>, +  <vscale x 2 x i1>, +  iXLen); + +define void @intrinsic_vsuxei_mask_v_nxv2f64_nxv2f64_nxv2i8(<vscale x 2 x double> %0, ptr %1, <vscale x 2 x i8> %2, <vscale x 2 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vsuxei_mask_v_nxv2f64_nxv2f64_nxv2i8: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e64, m2, ta, ma +; CHECK-NEXT:    vsuxei8.v v8, (a0), v10, v0.t +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsuxei.mask.nxv2f64.nxv2i8( +    <vscale x 2 x double> %0, +    ptr %1, +    <vscale x 2 x i8> %2, +    <vscale x 2 x i1> %3, +    iXLen %4) + +  ret void +} + +declare void @llvm.riscv.vsuxei.nxv4f64.nxv4i8( +  <vscale x 4 x double>, +  ptr, +  <vscale x 4 x i8>, +  iXLen); + +define void @intrinsic_vsuxei_v_nxv4f64_nxv4f64_nxv4i8(<vscale x 4 x double> %0, ptr %1, <vscale x 4 x i8> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vsuxei_v_nxv4f64_nxv4f64_nxv4i8: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e64, m4, ta, ma +; CHECK-NEXT:    vsuxei8.v v8, (a0), v12 +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsuxei.nxv4f64.nxv4i8( +    <vscale x 4 x double> %0, +    ptr %1, +    <vscale x 4 x i8> %2, +    iXLen %3) + +  ret void +} + +declare void @llvm.riscv.vsuxei.mask.nxv4f64.nxv4i8( +  <vscale x 4 x double>, +  ptr, +  <vscale x 4 x i8>, +  <vscale x 4 x i1>, +  iXLen); + +define void @intrinsic_vsuxei_mask_v_nxv4f64_nxv4f64_nxv4i8(<vscale x 4 x double> %0, ptr %1, <vscale x 4 x i8> %2, <vscale x 4 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vsuxei_mask_v_nxv4f64_nxv4f64_nxv4i8: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e64, m4, ta, ma +; CHECK-NEXT:    vsuxei8.v v8, (a0), v12, v0.t +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsuxei.mask.nxv4f64.nxv4i8( +    <vscale x 4 x double> %0, +    ptr %1, +    <vscale x 4 x i8> %2, +    <vscale x 4 x i1> %3, +    iXLen %4) + +  ret void +} + +declare void @llvm.riscv.vsuxei.nxv8f64.nxv8i8( +  <vscale x 8 x double>, +  ptr, +  <vscale x 8 x i8>, +  iXLen); + +define void @intrinsic_vsuxei_v_nxv8f64_nxv8f64_nxv8i8(<vscale x 8 x double> %0, ptr %1, <vscale x 8 x i8> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vsuxei_v_nxv8f64_nxv8f64_nxv8i8: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e64, m8, ta, ma +; CHECK-NEXT:    vsuxei8.v v8, (a0), v16 +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsuxei.nxv8f64.nxv8i8( +    <vscale x 8 x double> %0, +    ptr %1, +    <vscale x 8 x i8> %2, +    iXLen %3) + +  ret void +} + +declare void @llvm.riscv.vsuxei.mask.nxv8f64.nxv8i8( +  <vscale x 8 x double>, +  ptr, +  <vscale x 8 x i8>, +  <vscale x 8 x i1>, +  iXLen); + +define void @intrinsic_vsuxei_mask_v_nxv8f64_nxv8f64_nxv8i8(<vscale x 8 x double> %0, ptr %1, <vscale x 8 x i8> %2, <vscale x 8 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vsuxei_mask_v_nxv8f64_nxv8f64_nxv8i8: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e64, m8, ta, ma +; CHECK-NEXT:    vsuxei8.v v8, (a0), v16, v0.t +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsuxei.mask.nxv8f64.nxv8i8( +    <vscale x 8 x double> %0, +    ptr %1, +    <vscale x 8 x i8> %2, +    <vscale x 8 x i1> %3, +    iXLen %4) + +  ret void +} diff --git a/llvm/test/CodeGen/RISCV/rv64-stackmap-fp.ll b/llvm/test/CodeGen/RISCV/rv64-stackmap-fp.ll new file mode 100644 index 0000000..bf0a2e5 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rv64-stackmap-fp.ll @@ -0,0 +1,41 @@ +; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh < %s | FileCheck %s + +; CHECK-LABEL:  .section	.llvm_stackmaps +; CHECK-NEXT:  __LLVM_StackMaps: +; Header +; CHECK-NEXT:   .byte   3 +; CHECK-NEXT:   .byte   0 +; CHECK-NEXT:   .half   0 +; Num Functions +; CHECK-NEXT:   .word   1 +; Num LargeConstants +; CHECK-NEXT:   .word   0 +; Num Callsites +; CHECK-NEXT:   .word   1 + +; Functions and stack size +; CHECK-NEXT:   .quad   liveArgs +; CHECK-NEXT:   .quad   0 +; CHECK-NEXT:   .quad   1 + +; Spilled stack map values. +; +; Verify 3 stack map entries. +; +; CHECK-LABEL:  .word   .L{{.*}}-liveArgs +; CHECK-NEXT:   .half   0 +; CHECK-NEXT:   .half   25 +; +; Check that at least one is a spilled entry from SP. +; Location: Indirect SP + ... +; CHECK:        .byte   3 +; CHECK-NEXT:   .byte   0 +; CHECK-NEXT:   .half   8 +; CHECK-NEXT:   .half   2 +; CHECK-NEXT:   .half   0 +; CHECK-NEXT:   .word +define void @liveArgs(double %arg0, double %arg1, double %arg2, double %arg3, double %arg4, double %arg5, double %arg6, double %arg7, double %arg8, double %arg9, double %arg10, double %arg11, double %arg12, double %arg13, double %arg14, double %arg15, double %arg16, double %arg17, double %arg18, double %arg19, double %arg20, double %arg21, double %arg22, double %arg23, half %arg24, half %arg25, half %arg26, half %arg27, half %arg28, bfloat %arg29) { +entry: +  call void (i64, i32, ptr, i32, ...) @llvm.experimental.patchpoint.void(i64 11, i32 28, ptr null, i32 5, double %arg0, double %arg1, double %arg2, double %arg3, double %arg4, double %arg5, double %arg6, double %arg7, double %arg8, double %arg9, double %arg10, double %arg11, double %arg12, double %arg13, double %arg14, double %arg15, double %arg16, double %arg17, double %arg18, double %arg19, double %arg20, double %arg21, double %arg22, double %arg23, half %arg24, half %arg25, half %arg26, half %arg27, half %arg28, bfloat %arg29) +  ret void +} diff --git a/llvm/test/CodeGen/RISCV/rv64-stackmap.ll b/llvm/test/CodeGen/RISCV/rv64-stackmap.ll index c50a0fb3..320a3aa 100644 --- a/llvm/test/CodeGen/RISCV/rv64-stackmap.ll +++ b/llvm/test/CodeGen/RISCV/rv64-stackmap.ll @@ -286,8 +286,8 @@ define void @liveConstant() {  ; CHECK-NEXT:   .half   0  ; CHECK-NEXT:   .half   28  ; -; Check that at least one is a spilled entry from RBP. -; Location: Indirect RBP + ... +; Check that at least one is a spilled entry from SP. +; Location: Indirect SP + ...  ; CHECK:        .byte   3  ; CHECK-NEXT:   .byte   0  ; CHECK-NEXT:   .half   8 @@ -307,7 +307,7 @@ entry:  ; CHECK-NEXT:   .half   0  ; 1 location  ; CHECK-NEXT:   .half   1 -; Loc 0: Direct RBP - ofs +; Loc 0: Direct SP + ofs  ; CHECK-NEXT:   .byte   2  ; CHECK-NEXT:   .byte   0  ; CHECK-NEXT:   .half   8 @@ -320,14 +320,14 @@ entry:  ; CHECK-NEXT:   .half   0  ; 2 locations  ; CHECK-NEXT:   .half   2 -; Loc 0: Direct RBP - ofs +; Loc 0: Direct SP + ofs  ; CHECK-NEXT:   .byte   2  ; CHECK-NEXT:   .byte   0  ; CHECK-NEXT:   .half   8  ; CHECK-NEXT:   .half   2  ; CHECK-NEXT:   .half   0  ; CHECK-NEXT:   .word -; Loc 1: Direct RBP - ofs +; Loc 1: Direct SP + ofs  ; CHECK-NEXT:   .byte   2  ; CHECK-NEXT:   .byte   0  ; CHECK-NEXT:   .half   8 diff --git a/llvm/test/CodeGen/SPIRV/pointers/composite-fun-fix-ptr-arg.ll b/llvm/test/CodeGen/SPIRV/pointers/composite-fun-fix-ptr-arg.ll index 73c46b1..c9b2968 100644 --- a/llvm/test/CodeGen/SPIRV/pointers/composite-fun-fix-ptr-arg.ll +++ b/llvm/test/CodeGen/SPIRV/pointers/composite-fun-fix-ptr-arg.ll @@ -10,6 +10,7 @@  ; CHECK-DAG: %[[#Int8:]] = OpTypeInt 8 0  ; CHECK-DAG: %[[#Half:]] = OpTypeFloat 16 +; CHECK-DAG: %[[#Float:]] = OpTypeFloat 32  ; CHECK-DAG: %[[#Struct:]] = OpTypeStruct %[[#Half]]  ; CHECK-DAG: %[[#Void:]] = OpTypeVoid  ; CHECK-DAG: %[[#PtrInt8:]] = OpTypePointer CrossWorkgroup %[[#Int8:]] @@ -17,12 +18,20 @@  ; CHECK-DAG: %[[#Int64:]] = OpTypeInt 64 0  ; CHECK-DAG: %[[#PtrInt64:]] = OpTypePointer CrossWorkgroup %[[#Int64]]  ; CHECK-DAG: %[[#BarType:]] = OpTypeFunction %[[#Void]] %[[#PtrInt64]] %[[#Struct]] +; CHECK-DAG: %[[#BazType:]] = OpTypeFunction %[[#Void]] %[[#PtrInt8]] %[[#Struct]] %[[#Int8]] %[[#Struct]] %[[#Float]] %[[#Struct]]  ; CHECK: OpFunction %[[#Void]] None %[[#FooType]]  ; CHECK: OpFunctionParameter %[[#PtrInt8]]  ; CHECK: OpFunctionParameter %[[#Struct]]  ; CHECK: OpFunction %[[#Void]] None %[[#BarType]]  ; CHECK: OpFunctionParameter %[[#PtrInt64]]  ; CHECK: OpFunctionParameter %[[#Struct]] +; CHECK: OpFunction %[[#Void]] None %[[#BazType]] +; CHECK: OpFunctionParameter %[[#PtrInt8]] +; CHECK: OpFunctionParameter %[[#Struct]] +; CHECK: OpFunctionParameter %[[#Int8]] +; CHECK: OpFunctionParameter %[[#Struct]] +; CHECK: OpFunctionParameter %[[#Float]] +; CHECK: OpFunctionParameter %[[#Struct]]  %t_half = type { half } @@ -38,4 +47,9 @@ entry:    ret void  } +define spir_kernel void @baz(ptr addrspace(1) %a, %t_half %b, i8 %c, %t_half %d, float %e, %t_half %f) { +entry: +  ret void +} +  declare spir_func %t_half @_Z29__spirv_SpecConstantComposite(half) diff --git a/llvm/test/CodeGen/X86/bittest-big-integer.ll b/llvm/test/CodeGen/X86/bittest-big-integer.ll index 8007d9d..c311ab8 100644 --- a/llvm/test/CodeGen/X86/bittest-big-integer.ll +++ b/llvm/test/CodeGen/X86/bittest-big-integer.ll @@ -203,24 +203,14 @@ define i1 @init_eq_i32(ptr %word, i32 %position, i1 zeroext %value) nounwind {  define i1 @test_ne_i64(ptr %word, i32 %position) nounwind {  ; X86-LABEL: test_ne_i64:  ; X86:       # %bb.0: -; X86-NEXT:    pushl %esi  ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax -; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx -; X86-NEXT:    movl $1, %edx -; X86-NEXT:    xorl %esi, %esi -; X86-NEXT:    shldl %cl, %edx, %esi -; X86-NEXT:    shll %cl, %edx -; X86-NEXT:    testb $32, %cl -; X86-NEXT:    je .LBB5_2 -; X86-NEXT:  # %bb.1: -; X86-NEXT:    movl %edx, %esi -; X86-NEXT:    xorl %edx, %edx -; X86-NEXT:  .LBB5_2: -; X86-NEXT:    andl 4(%eax), %esi -; X86-NEXT:    andl (%eax), %edx -; X86-NEXT:    orl %esi, %edx -; X86-NEXT:    setne %al -; X86-NEXT:    popl %esi +; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT:    movl %ecx, %edx +; X86-NEXT:    andl $32, %edx +; X86-NEXT:    shrl $3, %edx +; X86-NEXT:    movl (%eax,%edx), %eax +; X86-NEXT:    btl %ecx, %eax +; X86-NEXT:    setb %al  ; X86-NEXT:    retl  ;  ; X64-LABEL: test_ne_i64: @@ -242,38 +232,20 @@ define i1 @test_ne_i64(ptr %word, i32 %position) nounwind {  define i1 @complement_ne_i64(ptr %word, i32 %position) nounwind {  ; X86-LABEL: complement_ne_i64:  ; X86:       # %bb.0: -; X86-NEXT:    pushl %ebp -; X86-NEXT:    pushl %ebx  ; X86-NEXT:    pushl %edi  ; X86-NEXT:    pushl %esi +; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx  ; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx -; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx -; X86-NEXT:    movl $1, %eax -; X86-NEXT:    xorl %esi, %esi -; X86-NEXT:    shldl %cl, %eax, %esi -; X86-NEXT:    shll %cl, %eax -; X86-NEXT:    testb $32, %cl -; X86-NEXT:    je .LBB6_2 -; X86-NEXT:  # %bb.1: -; X86-NEXT:    movl %eax, %esi -; X86-NEXT:    xorl %eax, %eax -; X86-NEXT:  .LBB6_2: -; X86-NEXT:    movl (%edx), %ecx -; X86-NEXT:    movl 4(%edx), %edi -; X86-NEXT:    movl %edi, %ebx -; X86-NEXT:    andl %esi, %ebx -; X86-NEXT:    movl %ecx, %ebp -; X86-NEXT:    andl %eax, %ebp -; X86-NEXT:    xorl %esi, %edi -; X86-NEXT:    xorl %eax, %ecx -; X86-NEXT:    orl %ebx, %ebp -; X86-NEXT:    setne %al -; X86-NEXT:    movl %ecx, (%edx) -; X86-NEXT:    movl %edi, 4(%edx) +; X86-NEXT:    movl %edx, %esi +; X86-NEXT:    andl $32, %esi +; X86-NEXT:    shrl $3, %esi +; X86-NEXT:    movl (%ecx,%esi), %edi +; X86-NEXT:    btl %edx, %edi +; X86-NEXT:    setb %al +; X86-NEXT:    btcl %edx, %edi +; X86-NEXT:    movl %edi, (%ecx,%esi)  ; X86-NEXT:    popl %esi  ; X86-NEXT:    popl %edi -; X86-NEXT:    popl %ebx -; X86-NEXT:    popl %ebp  ; X86-NEXT:    retl  ;  ; X64-LABEL: complement_ne_i64: @@ -300,40 +272,20 @@ define i1 @complement_ne_i64(ptr %word, i32 %position) nounwind {  define i1 @reset_eq_i64(ptr %word, i32 %position) nounwind {  ; X86-LABEL: reset_eq_i64:  ; X86:       # %bb.0: -; X86-NEXT:    pushl %ebp -; X86-NEXT:    pushl %ebx  ; X86-NEXT:    pushl %edi  ; X86-NEXT:    pushl %esi +; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx  ; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx -; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx -; X86-NEXT:    movl $1, %esi -; X86-NEXT:    xorl %edi, %edi -; X86-NEXT:    shldl %cl, %esi, %edi -; X86-NEXT:    shll %cl, %esi -; X86-NEXT:    testb $32, %cl -; X86-NEXT:    je .LBB7_2 -; X86-NEXT:  # %bb.1: -; X86-NEXT:    movl %esi, %edi -; X86-NEXT:    xorl %esi, %esi -; X86-NEXT:  .LBB7_2: -; X86-NEXT:    movl (%edx), %eax -; X86-NEXT:    movl 4(%edx), %ecx -; X86-NEXT:    movl %ecx, %ebx -; X86-NEXT:    andl %edi, %ebx -; X86-NEXT:    notl %edi -; X86-NEXT:    movl %eax, %ebp -; X86-NEXT:    andl %esi, %ebp -; X86-NEXT:    notl %esi -; X86-NEXT:    andl %ecx, %edi -; X86-NEXT:    andl %eax, %esi -; X86-NEXT:    orl %ebx, %ebp -; X86-NEXT:    sete %al -; X86-NEXT:    movl %esi, (%edx) -; X86-NEXT:    movl %edi, 4(%edx) +; X86-NEXT:    movl %edx, %esi +; X86-NEXT:    andl $32, %esi +; X86-NEXT:    shrl $3, %esi +; X86-NEXT:    movl (%ecx,%esi), %edi +; X86-NEXT:    btl %edx, %edi +; X86-NEXT:    setae %al +; X86-NEXT:    btrl %edx, %edi +; X86-NEXT:    movl %edi, (%ecx,%esi)  ; X86-NEXT:    popl %esi  ; X86-NEXT:    popl %edi -; X86-NEXT:    popl %ebx -; X86-NEXT:    popl %ebp  ; X86-NEXT:    retl  ;  ; X64-LABEL: reset_eq_i64: @@ -361,38 +313,20 @@ define i1 @reset_eq_i64(ptr %word, i32 %position) nounwind {  define i1 @set_ne_i64(ptr %word, i32 %position) nounwind {  ; X86-LABEL: set_ne_i64:  ; X86:       # %bb.0: -; X86-NEXT:    pushl %ebp -; X86-NEXT:    pushl %ebx  ; X86-NEXT:    pushl %edi  ; X86-NEXT:    pushl %esi +; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx  ; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx -; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx -; X86-NEXT:    movl $1, %eax -; X86-NEXT:    xorl %esi, %esi -; X86-NEXT:    shldl %cl, %eax, %esi -; X86-NEXT:    shll %cl, %eax -; X86-NEXT:    testb $32, %cl -; X86-NEXT:    je .LBB8_2 -; X86-NEXT:  # %bb.1: -; X86-NEXT:    movl %eax, %esi -; X86-NEXT:    xorl %eax, %eax -; X86-NEXT:  .LBB8_2: -; X86-NEXT:    movl (%edx), %ecx -; X86-NEXT:    movl 4(%edx), %edi -; X86-NEXT:    movl %edi, %ebx -; X86-NEXT:    andl %esi, %ebx -; X86-NEXT:    movl %ecx, %ebp -; X86-NEXT:    andl %eax, %ebp -; X86-NEXT:    orl %esi, %edi -; X86-NEXT:    orl %eax, %ecx -; X86-NEXT:    orl %ebx, %ebp -; X86-NEXT:    setne %al -; X86-NEXT:    movl %ecx, (%edx) -; X86-NEXT:    movl %edi, 4(%edx) +; X86-NEXT:    movl %edx, %esi +; X86-NEXT:    andl $32, %esi +; X86-NEXT:    shrl $3, %esi +; X86-NEXT:    movl (%ecx,%esi), %edi +; X86-NEXT:    btl %edx, %edi +; X86-NEXT:    setb %al +; X86-NEXT:    btsl %edx, %edi +; X86-NEXT:    movl %edi, (%ecx,%esi)  ; X86-NEXT:    popl %esi  ; X86-NEXT:    popl %edi -; X86-NEXT:    popl %ebx -; X86-NEXT:    popl %ebp  ; X86-NEXT:    retl  ;  ; X64-LABEL: set_ne_i64: @@ -419,52 +353,47 @@ define i1 @set_ne_i64(ptr %word, i32 %position) nounwind {  define i1 @init_eq_i64(ptr %word, i32 %position, i1 zeroext %value) nounwind {  ; X86-LABEL: init_eq_i64:  ; X86:       # %bb.0: -; X86-NEXT:    pushl %ebp  ; X86-NEXT:    pushl %ebx  ; X86-NEXT:    pushl %edi  ; X86-NEXT:    pushl %esi -; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx -; X86-NEXT:    movl $1, %eax -; X86-NEXT:    xorl %edx, %edx -; X86-NEXT:    shldl %cl, %eax, %edx -; X86-NEXT:    shll %cl, %eax -; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %esi +; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT:    movl $1, %edx +; X86-NEXT:    xorl %esi, %esi +; X86-NEXT:    shldl %cl, %edx, %esi +; X86-NEXT:    shll %cl, %edx +; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax  ; X86-NEXT:    xorl %edi, %edi -; X86-NEXT:    shldl %cl, %esi, %edi -; X86-NEXT:    shll %cl, %esi +; X86-NEXT:    shldl %cl, %eax, %edi +; X86-NEXT:    shll %cl, %eax  ; X86-NEXT:    testb $32, %cl  ; X86-NEXT:    je .LBB9_2  ; X86-NEXT:  # %bb.1: -; X86-NEXT:    movl %eax, %edx -; X86-NEXT:    movl $0, %eax +; X86-NEXT:    movl %edx, %esi +; X86-NEXT:    movl $0, %edx  ; X86-NEXT:  .LBB9_2: -; X86-NEXT:    movl %edx, %ebx -; X86-NEXT:    notl %ebx -; X86-NEXT:    movl %eax, %ebp -; X86-NEXT:    notl %ebp +; X86-NEXT:    movl {{[0-9]+}}(%esp), %ebx +; X86-NEXT:    notl %esi +; X86-NEXT:    notl %edx  ; X86-NEXT:    je .LBB9_4  ; X86-NEXT:  # %bb.3: -; X86-NEXT:    movl %esi, %edi -; X86-NEXT:    xorl %esi, %esi +; X86-NEXT:    movl %eax, %edi +; X86-NEXT:    xorl %eax, %eax  ; X86-NEXT:  .LBB9_4: -; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx -; X86-NEXT:    movl 4(%ecx), %ecx -; X86-NEXT:    andl %ecx, %edx -; X86-NEXT:    andl %ecx, %ebx -; X86-NEXT:    orl %edi, %ebx -; X86-NEXT:    movl {{[0-9]+}}(%esp), %edi -; X86-NEXT:    movl (%edi), %ecx -; X86-NEXT:    andl %ecx, %eax -; X86-NEXT:    andl %ecx, %ebp -; X86-NEXT:    orl %esi, %ebp -; X86-NEXT:    orl %edx, %eax -; X86-NEXT:    movl %ebp, (%edi) -; X86-NEXT:    movl %ebx, 4(%edi) -; X86-NEXT:    sete %al +; X86-NEXT:    andl 4(%ebx), %esi +; X86-NEXT:    orl %edi, %esi +; X86-NEXT:    andl (%ebx), %edx +; X86-NEXT:    orl %eax, %edx +; X86-NEXT:    movl %ecx, %eax +; X86-NEXT:    andl $32, %eax +; X86-NEXT:    shrl $3, %eax +; X86-NEXT:    movl (%ebx,%eax), %eax +; X86-NEXT:    btl %ecx, %eax +; X86-NEXT:    setae %al +; X86-NEXT:    movl %edx, (%ebx) +; X86-NEXT:    movl %esi, 4(%ebx)  ; X86-NEXT:    popl %esi  ; X86-NEXT:    popl %edi  ; X86-NEXT:    popl %ebx -; X86-NEXT:    popl %ebp  ; X86-NEXT:    retl  ;  ; SSE-LABEL: init_eq_i64: @@ -516,101 +445,25 @@ define i1 @init_eq_i64(ptr %word, i32 %position, i1 zeroext %value) nounwind {  define i1 @test_ne_i128(ptr %word, i32 %position) nounwind {  ; X86-LABEL: test_ne_i128:  ; X86:       # %bb.0: -; X86-NEXT:    pushl %ebp -; X86-NEXT:    movl %esp, %ebp -; X86-NEXT:    pushl %ebx -; X86-NEXT:    pushl %edi -; X86-NEXT:    pushl %esi -; X86-NEXT:    andl $-16, %esp -; X86-NEXT:    subl $48, %esp -; X86-NEXT:    movzbl 12(%ebp), %ecx -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $1, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, (%esp) -; X86-NEXT:    movl %ecx, %eax -; X86-NEXT:    shrb $3, %al -; X86-NEXT:    andb $12, %al -; X86-NEXT:    negb %al -; X86-NEXT:    movsbl %al, %esi -; X86-NEXT:    movl 24(%esp,%esi), %edi -; X86-NEXT:    movl 28(%esp,%esi), %eax -; X86-NEXT:    shldl %cl, %edi, %eax -; X86-NEXT:    movl 16(%esp,%esi), %edx -; X86-NEXT:    movl 20(%esp,%esi), %esi -; X86-NEXT:    shldl %cl, %esi, %edi -; X86-NEXT:    shldl %cl, %edx, %esi -; X86-NEXT:    movl 8(%ebp), %ebx -; X86-NEXT:    shll %cl, %edx -; X86-NEXT:    andl 8(%ebx), %edi -; X86-NEXT:    andl (%ebx), %edx -; X86-NEXT:    orl %edi, %edx -; X86-NEXT:    andl 12(%ebx), %eax -; X86-NEXT:    andl 4(%ebx), %esi -; X86-NEXT:    orl %eax, %esi -; X86-NEXT:    orl %edx, %esi -; X86-NEXT:    setne %al -; X86-NEXT:    leal -12(%ebp), %esp -; X86-NEXT:    popl %esi -; X86-NEXT:    popl %edi -; X86-NEXT:    popl %ebx -; X86-NEXT:    popl %ebp +; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax +; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT:    movl %ecx, %edx +; X86-NEXT:    andl $96, %edx +; X86-NEXT:    shrl $3, %edx +; X86-NEXT:    movl (%eax,%edx), %eax +; X86-NEXT:    btl %ecx, %eax +; X86-NEXT:    setb %al  ; X86-NEXT:    retl  ; -; SSE-LABEL: test_ne_i128: -; SSE:       # %bb.0: -; SSE-NEXT:    movl %esi, %ecx -; SSE-NEXT:    movl $1, %eax -; SSE-NEXT:    xorl %edx, %edx -; SSE-NEXT:    shldq %cl, %rax, %rdx -; SSE-NEXT:    xorl %esi, %esi -; SSE-NEXT:    shlq %cl, %rax -; SSE-NEXT:    testb $64, %cl -; SSE-NEXT:    cmovneq %rax, %rdx -; SSE-NEXT:    cmovneq %rsi, %rax -; SSE-NEXT:    andq 8(%rdi), %rdx -; SSE-NEXT:    andq (%rdi), %rax -; SSE-NEXT:    orq %rdx, %rax -; SSE-NEXT:    setne %al -; SSE-NEXT:    retq -; -; AVX2-LABEL: test_ne_i128: -; AVX2:       # %bb.0: -; AVX2-NEXT:    movl %esi, %ecx -; AVX2-NEXT:    xorl %eax, %eax -; AVX2-NEXT:    movl $1, %edx -; AVX2-NEXT:    xorl %esi, %esi -; AVX2-NEXT:    shldq %cl, %rdx, %rsi -; AVX2-NEXT:    shlxq %rcx, %rdx, %rdx -; AVX2-NEXT:    testb $64, %cl -; AVX2-NEXT:    cmovneq %rdx, %rsi -; AVX2-NEXT:    cmovneq %rax, %rdx -; AVX2-NEXT:    andq 8(%rdi), %rsi -; AVX2-NEXT:    andq (%rdi), %rdx -; AVX2-NEXT:    orq %rsi, %rdx -; AVX2-NEXT:    setne %al -; AVX2-NEXT:    retq -; -; AVX512-LABEL: test_ne_i128: -; AVX512:       # %bb.0: -; AVX512-NEXT:    movl %esi, %ecx -; AVX512-NEXT:    movl $1, %eax -; AVX512-NEXT:    xorl %edx, %edx -; AVX512-NEXT:    shldq %cl, %rax, %rdx -; AVX512-NEXT:    xorl %esi, %esi -; AVX512-NEXT:    shlxq %rcx, %rax, %rax -; AVX512-NEXT:    testb $64, %cl -; AVX512-NEXT:    cmovneq %rax, %rdx -; AVX512-NEXT:    cmovneq %rsi, %rax -; AVX512-NEXT:    andq 8(%rdi), %rdx -; AVX512-NEXT:    andq (%rdi), %rax -; AVX512-NEXT:    orq %rdx, %rax -; AVX512-NEXT:    setne %al -; AVX512-NEXT:    retq +; X64-LABEL: test_ne_i128: +; X64:       # %bb.0: +; X64-NEXT:    movl %esi, %eax +; X64-NEXT:    andl $96, %eax +; X64-NEXT:    shrl $3, %eax +; X64-NEXT:    movl (%rdi,%rax), %eax +; X64-NEXT:    btl %esi, %eax +; X64-NEXT:    setb %al +; X64-NEXT:    retq    %rem = and i32 %position, 127    %ofs = zext nneg i32 %rem to i128    %bit = shl nuw i128 1, %ofs @@ -623,124 +476,33 @@ define i1 @test_ne_i128(ptr %word, i32 %position) nounwind {  define i1 @complement_ne_i128(ptr %word, i32 %position) nounwind {  ; X86-LABEL: complement_ne_i128:  ; X86:       # %bb.0: -; X86-NEXT:    pushl %ebp -; X86-NEXT:    movl %esp, %ebp -; X86-NEXT:    pushl %ebx  ; X86-NEXT:    pushl %edi  ; X86-NEXT:    pushl %esi -; X86-NEXT:    andl $-16, %esp -; X86-NEXT:    subl $80, %esp -; X86-NEXT:    movzbl 12(%ebp), %ecx -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $1, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl %ecx, %eax -; X86-NEXT:    shrb $3, %al -; X86-NEXT:    andb $12, %al -; X86-NEXT:    negb %al -; X86-NEXT:    movsbl %al, %eax -; X86-NEXT:    movl 56(%esp,%eax), %esi -; X86-NEXT:    movl 60(%esp,%eax), %edx -; X86-NEXT:    shldl %cl, %esi, %edx -; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    movl 48(%esp,%eax), %edi -; X86-NEXT:    movl 52(%esp,%eax), %ebx -; X86-NEXT:    shldl %cl, %ebx, %esi -; X86-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    shldl %cl, %edi, %ebx -; X86-NEXT:    movl 8(%ebp), %eax -; X86-NEXT:    shll %cl, %edi -; X86-NEXT:    movl %eax, %ecx -; X86-NEXT:    movl 8(%eax), %eax -; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    andl %esi, %eax -; X86-NEXT:    movl (%ecx), %edx -; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    movl %ecx, %esi -; X86-NEXT:    movl %edx, %ecx -; X86-NEXT:    andl %edi, %ecx -; X86-NEXT:    orl %eax, %ecx -; X86-NEXT:    movl 12(%esi), %edx -; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    andl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X86-NEXT:    movl 4(%esi), %eax -; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    andl %ebx, %eax -; X86-NEXT:    orl %edx, %eax -; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X86-NEXT:    xorl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload -; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X86-NEXT:    xorl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X86-NEXT:    xorl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload -; X86-NEXT:    xorl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload -; X86-NEXT:    orl %ecx, %eax -; X86-NEXT:    movl 8(%ebp), %eax -; X86-NEXT:    movl %edx, 8(%eax) -; X86-NEXT:    movl %esi, 12(%eax) -; X86-NEXT:    movl %edi, (%eax) -; X86-NEXT:    movl %ebx, 4(%eax) -; X86-NEXT:    setne %al -; X86-NEXT:    leal -12(%ebp), %esp +; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx +; X86-NEXT:    movl %edx, %esi +; X86-NEXT:    andl $96, %esi +; X86-NEXT:    shrl $3, %esi +; X86-NEXT:    movl (%ecx,%esi), %edi +; X86-NEXT:    btl %edx, %edi +; X86-NEXT:    setb %al +; X86-NEXT:    btcl %edx, %edi +; X86-NEXT:    movl %edi, (%ecx,%esi)  ; X86-NEXT:    popl %esi  ; X86-NEXT:    popl %edi -; X86-NEXT:    popl %ebx -; X86-NEXT:    popl %ebp  ; X86-NEXT:    retl  ; -; SSE-LABEL: complement_ne_i128: -; SSE:       # %bb.0: -; SSE-NEXT:    movl %esi, %ecx -; SSE-NEXT:    movl $1, %edx -; SSE-NEXT:    xorl %esi, %esi -; SSE-NEXT:    shldq %cl, %rdx, %rsi -; SSE-NEXT:    shlq %cl, %rdx -; SSE-NEXT:    xorl %eax, %eax -; SSE-NEXT:    testb $64, %cl -; SSE-NEXT:    cmovneq %rdx, %rsi -; SSE-NEXT:    cmovneq %rax, %rdx -; SSE-NEXT:    movq (%rdi), %rax -; SSE-NEXT:    movq 8(%rdi), %rcx -; SSE-NEXT:    movq %rcx, %r8 -; SSE-NEXT:    andq %rsi, %r8 -; SSE-NEXT:    movq %rax, %r9 -; SSE-NEXT:    andq %rdx, %r9 -; SSE-NEXT:    xorq %rcx, %rsi -; SSE-NEXT:    xorq %rax, %rdx -; SSE-NEXT:    orq %r8, %r9 -; SSE-NEXT:    setne %al -; SSE-NEXT:    movq %rdx, (%rdi) -; SSE-NEXT:    movq %rsi, 8(%rdi) -; SSE-NEXT:    retq -; -; AVX-LABEL: complement_ne_i128: -; AVX:       # %bb.0: -; AVX-NEXT:    movl %esi, %ecx -; AVX-NEXT:    xorl %eax, %eax -; AVX-NEXT:    movl $1, %edx -; AVX-NEXT:    xorl %esi, %esi -; AVX-NEXT:    shldq %cl, %rdx, %rsi -; AVX-NEXT:    shlxq %rcx, %rdx, %rdx -; AVX-NEXT:    testb $64, %cl -; AVX-NEXT:    cmovneq %rdx, %rsi -; AVX-NEXT:    cmovneq %rax, %rdx -; AVX-NEXT:    movq (%rdi), %rax -; AVX-NEXT:    movq 8(%rdi), %rcx -; AVX-NEXT:    movq %rcx, %r8 -; AVX-NEXT:    andq %rsi, %r8 -; AVX-NEXT:    movq %rax, %r9 -; AVX-NEXT:    andq %rdx, %r9 -; AVX-NEXT:    xorq %rcx, %rsi -; AVX-NEXT:    xorq %rax, %rdx -; AVX-NEXT:    orq %r8, %r9 -; AVX-NEXT:    setne %al -; AVX-NEXT:    movq %rdx, (%rdi) -; AVX-NEXT:    movq %rsi, 8(%rdi) -; AVX-NEXT:    retq +; X64-LABEL: complement_ne_i128: +; X64:       # %bb.0: +; X64-NEXT:    movl %esi, %ecx +; X64-NEXT:    andl $96, %ecx +; X64-NEXT:    shrl $3, %ecx +; X64-NEXT:    movl (%rdi,%rcx), %edx +; X64-NEXT:    btl %esi, %edx +; X64-NEXT:    setb %al +; X64-NEXT:    btcl %esi, %edx +; X64-NEXT:    movl %edx, (%rdi,%rcx) +; X64-NEXT:    retq    %rem = and i32 %position, 127    %ofs = zext nneg i32 %rem to i128    %bit = shl nuw i128 1, %ofs @@ -755,124 +517,33 @@ define i1 @complement_ne_i128(ptr %word, i32 %position) nounwind {  define i1 @reset_eq_i128(ptr %word, i32 %position) nounwind {  ; X86-LABEL: reset_eq_i128:  ; X86:       # %bb.0: -; X86-NEXT:    pushl %ebp -; X86-NEXT:    movl %esp, %ebp -; X86-NEXT:    pushl %ebx  ; X86-NEXT:    pushl %edi  ; X86-NEXT:    pushl %esi -; X86-NEXT:    andl $-16, %esp -; X86-NEXT:    subl $80, %esp -; X86-NEXT:    movzbl 12(%ebp), %ecx -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $1, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl %ecx, %eax -; X86-NEXT:    shrb $3, %al -; X86-NEXT:    andb $12, %al -; X86-NEXT:    negb %al -; X86-NEXT:    movsbl %al, %eax -; X86-NEXT:    movl 56(%esp,%eax), %edx -; X86-NEXT:    movl 60(%esp,%eax), %esi -; X86-NEXT:    shldl %cl, %edx, %esi -; X86-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    movl 48(%esp,%eax), %esi -; X86-NEXT:    movl 52(%esp,%eax), %edi -; X86-NEXT:    shldl %cl, %edi, %edx -; X86-NEXT:    shldl %cl, %esi, %edi -; X86-NEXT:    movl 8(%ebp), %ebx -; X86-NEXT:    shll %cl, %esi -; X86-NEXT:    movl 8(%ebx), %eax -; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    andl %edx, %eax -; X86-NEXT:    movl (%ebx), %ecx -; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    andl %esi, %ecx -; X86-NEXT:    orl %eax, %ecx -; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    movl 12(%ebx), %eax -; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    andl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X86-NEXT:    movl %edi, %ecx -; X86-NEXT:    movl 4(%ebx), %ebx -; X86-NEXT:    andl %ebx, %edi -; X86-NEXT:    orl %eax, %edi -; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT:    notl %eax -; X86-NEXT:    andl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X86-NEXT:    notl %edx -; X86-NEXT:    andl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X86-NEXT:    notl %ecx -; X86-NEXT:    andl %ebx, %ecx -; X86-NEXT:    notl %esi -; X86-NEXT:    andl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload -; X86-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload -; X86-NEXT:    movl 8(%ebp), %edi -; X86-NEXT:    movl %edx, 8(%edi) -; X86-NEXT:    movl %eax, 12(%edi) -; X86-NEXT:    movl %esi, (%edi) -; X86-NEXT:    movl %ecx, 4(%edi) -; X86-NEXT:    sete %al -; X86-NEXT:    leal -12(%ebp), %esp +; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx +; X86-NEXT:    movl %edx, %esi +; X86-NEXT:    andl $96, %esi +; X86-NEXT:    shrl $3, %esi +; X86-NEXT:    movl (%ecx,%esi), %edi +; X86-NEXT:    btl %edx, %edi +; X86-NEXT:    setae %al +; X86-NEXT:    btrl %edx, %edi +; X86-NEXT:    movl %edi, (%ecx,%esi)  ; X86-NEXT:    popl %esi  ; X86-NEXT:    popl %edi -; X86-NEXT:    popl %ebx -; X86-NEXT:    popl %ebp  ; X86-NEXT:    retl  ; -; SSE-LABEL: reset_eq_i128: -; SSE:       # %bb.0: -; SSE-NEXT:    movl %esi, %ecx -; SSE-NEXT:    movl $1, %edx -; SSE-NEXT:    xorl %esi, %esi -; SSE-NEXT:    shldq %cl, %rdx, %rsi -; SSE-NEXT:    xorl %eax, %eax -; SSE-NEXT:    shlq %cl, %rdx -; SSE-NEXT:    testb $64, %cl -; SSE-NEXT:    cmovneq %rdx, %rsi -; SSE-NEXT:    cmovneq %rax, %rdx -; SSE-NEXT:    movq (%rdi), %rax -; SSE-NEXT:    movq 8(%rdi), %rcx -; SSE-NEXT:    movq %rcx, %r8 -; SSE-NEXT:    andq %rsi, %r8 -; SSE-NEXT:    notq %rsi -; SSE-NEXT:    movq %rax, %r9 -; SSE-NEXT:    andq %rdx, %r9 -; SSE-NEXT:    notq %rdx -; SSE-NEXT:    andq %rcx, %rsi -; SSE-NEXT:    andq %rax, %rdx -; SSE-NEXT:    orq %r8, %r9 -; SSE-NEXT:    sete %al -; SSE-NEXT:    movq %rdx, (%rdi) -; SSE-NEXT:    movq %rsi, 8(%rdi) -; SSE-NEXT:    retq -; -; AVX-LABEL: reset_eq_i128: -; AVX:       # %bb.0: -; AVX-NEXT:    movl %esi, %ecx -; AVX-NEXT:    xorl %eax, %eax -; AVX-NEXT:    movl $1, %edx -; AVX-NEXT:    xorl %esi, %esi -; AVX-NEXT:    shldq %cl, %rdx, %rsi -; AVX-NEXT:    shlxq %rcx, %rdx, %rdx -; AVX-NEXT:    testb $64, %cl -; AVX-NEXT:    cmovneq %rdx, %rsi -; AVX-NEXT:    cmovneq %rax, %rdx -; AVX-NEXT:    movq (%rdi), %rax -; AVX-NEXT:    movq 8(%rdi), %rcx -; AVX-NEXT:    andnq %rcx, %rsi, %r8 -; AVX-NEXT:    andq %rsi, %rcx -; AVX-NEXT:    andnq %rax, %rdx, %rsi -; AVX-NEXT:    andq %rdx, %rax -; AVX-NEXT:    orq %rcx, %rax -; AVX-NEXT:    sete %al -; AVX-NEXT:    movq %rsi, (%rdi) -; AVX-NEXT:    movq %r8, 8(%rdi) -; AVX-NEXT:    retq +; X64-LABEL: reset_eq_i128: +; X64:       # %bb.0: +; X64-NEXT:    movl %esi, %ecx +; X64-NEXT:    andl $96, %ecx +; X64-NEXT:    shrl $3, %ecx +; X64-NEXT:    movl (%rdi,%rcx), %edx +; X64-NEXT:    btl %esi, %edx +; X64-NEXT:    setae %al +; X64-NEXT:    btrl %esi, %edx +; X64-NEXT:    movl %edx, (%rdi,%rcx) +; X64-NEXT:    retq    %rem = and i32 %position, 127    %ofs = zext nneg i32 %rem to i128    %bit = shl nuw i128 1, %ofs @@ -888,124 +559,33 @@ define i1 @reset_eq_i128(ptr %word, i32 %position) nounwind {  define i1 @set_ne_i128(ptr %word, i32 %position) nounwind {  ; X86-LABEL: set_ne_i128:  ; X86:       # %bb.0: -; X86-NEXT:    pushl %ebp -; X86-NEXT:    movl %esp, %ebp -; X86-NEXT:    pushl %ebx  ; X86-NEXT:    pushl %edi  ; X86-NEXT:    pushl %esi -; X86-NEXT:    andl $-16, %esp -; X86-NEXT:    subl $80, %esp -; X86-NEXT:    movzbl 12(%ebp), %ecx -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $1, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl %ecx, %eax -; X86-NEXT:    shrb $3, %al -; X86-NEXT:    andb $12, %al -; X86-NEXT:    negb %al -; X86-NEXT:    movsbl %al, %eax -; X86-NEXT:    movl 56(%esp,%eax), %esi -; X86-NEXT:    movl 60(%esp,%eax), %edx -; X86-NEXT:    shldl %cl, %esi, %edx -; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    movl 48(%esp,%eax), %edi -; X86-NEXT:    movl 52(%esp,%eax), %ebx -; X86-NEXT:    shldl %cl, %ebx, %esi -; X86-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    shldl %cl, %edi, %ebx -; X86-NEXT:    movl 8(%ebp), %eax -; X86-NEXT:    shll %cl, %edi -; X86-NEXT:    movl %eax, %ecx -; X86-NEXT:    movl 8(%eax), %eax -; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    andl %esi, %eax -; X86-NEXT:    movl (%ecx), %edx -; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    movl %ecx, %esi -; X86-NEXT:    movl %edx, %ecx -; X86-NEXT:    andl %edi, %ecx -; X86-NEXT:    orl %eax, %ecx -; X86-NEXT:    movl 12(%esi), %edx -; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    andl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X86-NEXT:    movl 4(%esi), %eax -; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    andl %ebx, %eax -; X86-NEXT:    orl %edx, %eax -; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X86-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload -; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X86-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X86-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload -; X86-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload -; X86-NEXT:    orl %ecx, %eax -; X86-NEXT:    movl 8(%ebp), %eax -; X86-NEXT:    movl %edx, 8(%eax) -; X86-NEXT:    movl %esi, 12(%eax) -; X86-NEXT:    movl %edi, (%eax) -; X86-NEXT:    movl %ebx, 4(%eax) -; X86-NEXT:    setne %al -; X86-NEXT:    leal -12(%ebp), %esp +; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx +; X86-NEXT:    movl %edx, %esi +; X86-NEXT:    andl $96, %esi +; X86-NEXT:    shrl $3, %esi +; X86-NEXT:    movl (%ecx,%esi), %edi +; X86-NEXT:    btl %edx, %edi +; X86-NEXT:    setb %al +; X86-NEXT:    btsl %edx, %edi +; X86-NEXT:    movl %edi, (%ecx,%esi)  ; X86-NEXT:    popl %esi  ; X86-NEXT:    popl %edi -; X86-NEXT:    popl %ebx -; X86-NEXT:    popl %ebp  ; X86-NEXT:    retl  ; -; SSE-LABEL: set_ne_i128: -; SSE:       # %bb.0: -; SSE-NEXT:    movl %esi, %ecx -; SSE-NEXT:    movl $1, %edx -; SSE-NEXT:    xorl %esi, %esi -; SSE-NEXT:    shldq %cl, %rdx, %rsi -; SSE-NEXT:    shlq %cl, %rdx -; SSE-NEXT:    xorl %eax, %eax -; SSE-NEXT:    testb $64, %cl -; SSE-NEXT:    cmovneq %rdx, %rsi -; SSE-NEXT:    cmovneq %rax, %rdx -; SSE-NEXT:    movq (%rdi), %rax -; SSE-NEXT:    movq 8(%rdi), %rcx -; SSE-NEXT:    movq %rcx, %r8 -; SSE-NEXT:    andq %rsi, %r8 -; SSE-NEXT:    movq %rax, %r9 -; SSE-NEXT:    andq %rdx, %r9 -; SSE-NEXT:    orq %rcx, %rsi -; SSE-NEXT:    orq %rax, %rdx -; SSE-NEXT:    orq %r8, %r9 -; SSE-NEXT:    setne %al -; SSE-NEXT:    movq %rdx, (%rdi) -; SSE-NEXT:    movq %rsi, 8(%rdi) -; SSE-NEXT:    retq -; -; AVX-LABEL: set_ne_i128: -; AVX:       # %bb.0: -; AVX-NEXT:    movl %esi, %ecx -; AVX-NEXT:    xorl %eax, %eax -; AVX-NEXT:    movl $1, %edx -; AVX-NEXT:    xorl %esi, %esi -; AVX-NEXT:    shldq %cl, %rdx, %rsi -; AVX-NEXT:    shlxq %rcx, %rdx, %rdx -; AVX-NEXT:    testb $64, %cl -; AVX-NEXT:    cmovneq %rdx, %rsi -; AVX-NEXT:    cmovneq %rax, %rdx -; AVX-NEXT:    movq (%rdi), %rax -; AVX-NEXT:    movq 8(%rdi), %rcx -; AVX-NEXT:    movq %rcx, %r8 -; AVX-NEXT:    andq %rsi, %r8 -; AVX-NEXT:    movq %rax, %r9 -; AVX-NEXT:    andq %rdx, %r9 -; AVX-NEXT:    orq %rcx, %rsi -; AVX-NEXT:    orq %rax, %rdx -; AVX-NEXT:    orq %r8, %r9 -; AVX-NEXT:    setne %al -; AVX-NEXT:    movq %rdx, (%rdi) -; AVX-NEXT:    movq %rsi, 8(%rdi) -; AVX-NEXT:    retq +; X64-LABEL: set_ne_i128: +; X64:       # %bb.0: +; X64-NEXT:    movl %esi, %ecx +; X64-NEXT:    andl $96, %ecx +; X64-NEXT:    shrl $3, %ecx +; X64-NEXT:    movl (%rdi,%rcx), %edx +; X64-NEXT:    btl %esi, %edx +; X64-NEXT:    setb %al +; X64-NEXT:    btsl %esi, %edx +; X64-NEXT:    movl %edx, (%rdi,%rcx) +; X64-NEXT:    retq    %rem = and i32 %position, 127    %ofs = zext nneg i32 %rem to i128    %bit = shl nuw i128 1, %ofs @@ -1026,9 +606,9 @@ define i1 @init_eq_i128(ptr %word, i32 %position, i1 zeroext %value) nounwind {  ; X86-NEXT:    pushl %edi  ; X86-NEXT:    pushl %esi  ; X86-NEXT:    andl $-16, %esp -; X86-NEXT:    subl $128, %esp -; X86-NEXT:    movzbl 12(%ebp), %ecx -; X86-NEXT:    movzbl 16(%ebp), %eax +; X86-NEXT:    subl $96, %esp +; X86-NEXT:    movl 12(%ebp), %ecx +; X86-NEXT:    movzbl 16(%ebp), %ebx  ; X86-NEXT:    movl $0, {{[0-9]+}}(%esp)  ; X86-NEXT:    movl $0, {{[0-9]+}}(%esp)  ; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) @@ -1037,25 +617,30 @@ define i1 @init_eq_i128(ptr %word, i32 %position, i1 zeroext %value) nounwind {  ; X86-NEXT:    movl $0, {{[0-9]+}}(%esp)  ; X86-NEXT:    movl $0, {{[0-9]+}}(%esp)  ; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl %ecx, %edx -; X86-NEXT:    shrb $3, %dl -; X86-NEXT:    andb $12, %dl -; X86-NEXT:    negb %dl -; X86-NEXT:    movsbl %dl, %esi -; X86-NEXT:    movl 64(%esp,%esi), %edx -; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    movl 68(%esp,%esi), %edx -; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    movl 72(%esp,%esi), %ebx +; X86-NEXT:    movl %ecx, %eax +; X86-NEXT:    shrb $3, %al +; X86-NEXT:    andb $12, %al +; X86-NEXT:    negb %al +; X86-NEXT:    movsbl %al, %eax +; X86-NEXT:    movl 64(%esp,%eax), %edx +; X86-NEXT:    movl 68(%esp,%eax), %esi  ; X86-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    movzbl %al, %eax -; X86-NEXT:    movl 76(%esp,%esi), %edi +; X86-NEXT:    movl %eax, %esi +; X86-NEXT:    movl %eax, (%esp) # 4-byte Spill +; X86-NEXT:    movzbl %bl, %eax +; X86-NEXT:    movl 72(%esp,%esi), %ebx +; X86-NEXT:    movl 76(%esp,%esi), %esi  ; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT:    movl %ebx, %eax +; X86-NEXT:    movl %ebx, %edi +; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X86-NEXT:    shldl %cl, %eax, %edi +; X86-NEXT:    shldl %cl, %ebx, %esi +; X86-NEXT:    movl %edx, %ebx +; X86-NEXT:    shll %cl, %ebx +; X86-NEXT:    # kill: def $cl killed $cl killed $ecx  ; X86-NEXT:    shldl %cl, %edx, %eax -; X86-NEXT:    shldl %cl, %ebx, %edi -; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X86-NEXT:    shll %cl, %edx +; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT:    notl %edi  ; X86-NEXT:    movl $0, {{[0-9]+}}(%esp)  ; X86-NEXT:    movl $0, {{[0-9]+}}(%esp)  ; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) @@ -1063,72 +648,59 @@ define i1 @init_eq_i128(ptr %word, i32 %position, i1 zeroext %value) nounwind {  ; X86-NEXT:    movl $0, {{[0-9]+}}(%esp)  ; X86-NEXT:    movl $0, {{[0-9]+}}(%esp)  ; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl 8(%ebp), %esi +; X86-NEXT:    movl (%esp), %ecx # 4-byte Reload +; X86-NEXT:    movl 36(%esp,%ecx), %eax  ; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    movl 8(%esi), %ebx -; X86-NEXT:    movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    andl %ebx, %eax +; X86-NEXT:    movl 40(%esp,%ecx), %edx  ; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    movl (%esi), %ebx -; X86-NEXT:    movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    andl %ebx, %edx -; X86-NEXT:    orl %eax, %edx -; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X86-NEXT:    movl 12(%ebp), %ecx +; X86-NEXT:    # kill: def $cl killed $cl killed $ecx +; X86-NEXT:    shldl %cl, %eax, %edx +; X86-NEXT:    movl 8(%ebp), %eax +; X86-NEXT:    andl 8(%eax), %edi +; X86-NEXT:    orl %edx, %edi +; X86-NEXT:    notl %esi +; X86-NEXT:    movl (%esp), %eax # 4-byte Reload +; X86-NEXT:    movl 44(%esp,%eax), %eax +; X86-NEXT:    movl 12(%ebp), %ecx +; X86-NEXT:    # kill: def $cl killed $cl killed $ecx  ; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload  ; X86-NEXT:    shldl %cl, %edx, %eax -; X86-NEXT:    movl %edi, %esi  ; X86-NEXT:    movl 8(%ebp), %ecx -; X86-NEXT:    movl 12(%ecx), %ebx -; X86-NEXT:    movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    andl %ebx, %edi -; X86-NEXT:    movl %eax, %ebx -; X86-NEXT:    movl %eax, %edx -; X86-NEXT:    movl 4(%ecx), %ecx -; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    andl %ecx, %ebx -; X86-NEXT:    orl %edi, %ebx -; X86-NEXT:    movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X86-NEXT:    notl %ecx -; X86-NEXT:    andl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload -; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X86-NEXT:    movl 100(%esp,%ecx), %edi -; X86-NEXT:    movl 104(%esp,%ecx), %ecx -; X86-NEXT:    movl %ecx, %ebx -; X86-NEXT:    movl %ecx, %eax -; X86-NEXT:    movzbl 12(%ebp), %ecx -; X86-NEXT:    shldl %cl, %edi, %ebx -; X86-NEXT:    orl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill -; X86-NEXT:    notl %esi -; X86-NEXT:    andl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload -; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload -; X86-NEXT:    movl 108(%esp,%ebx), %ebx -; X86-NEXT:    shldl %cl, %eax, %ebx -; X86-NEXT:    orl %ebx, %esi -; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT:    notl %eax -; X86-NEXT:    andl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload -; X86-NEXT:    movl 96(%esp,%ebx), %ebx -; X86-NEXT:    movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    shll %cl, %ebx -; X86-NEXT:    orl %ebx, %eax +; X86-NEXT:    andl 12(%ecx), %esi +; X86-NEXT:    orl %eax, %esi +; X86-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT:    notl %ebx +; X86-NEXT:    movl (%esp), %eax # 4-byte Reload +; X86-NEXT:    movl 32(%esp,%eax), %edx +; X86-NEXT:    movl %edx, (%esp) # 4-byte Spill +; X86-NEXT:    movl 12(%ebp), %ecx +; X86-NEXT:    shll %cl, %edx +; X86-NEXT:    movl 8(%ebp), %eax +; X86-NEXT:    andl (%eax), %ebx +; X86-NEXT:    orl %edx, %ebx +; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload  ; X86-NEXT:    notl %edx -; X86-NEXT:    andl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload -; X86-NEXT:    shldl %cl, %ebx, %edi -; X86-NEXT:    orl %edi, %edx -; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X86-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload +; X86-NEXT:    # kill: def $cl killed $cl killed $ecx +; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X86-NEXT:    movl (%esp), %esi # 4-byte Reload +; X86-NEXT:    shldl %cl, %esi, %eax  ; X86-NEXT:    movl 8(%ebp), %ecx -; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload +; X86-NEXT:    andl 4(%ecx), %edx +; X86-NEXT:    orl %eax, %edx +; X86-NEXT:    movl 12(%ebp), %esi +; X86-NEXT:    movl %esi, %eax +; X86-NEXT:    andl $96, %eax +; X86-NEXT:    shrl $3, %eax +; X86-NEXT:    movl (%ecx,%eax), %eax +; X86-NEXT:    btl %esi, %eax +; X86-NEXT:    movl %ecx, %eax  ; X86-NEXT:    movl %edi, 8(%ecx) -; X86-NEXT:    movl %esi, 12(%ecx) -; X86-NEXT:    movl %eax, (%ecx) -; X86-NEXT:    movl %edx, 4(%ecx) -; X86-NEXT:    sete %al +; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X86-NEXT:    movl %ecx, 12(%eax) +; X86-NEXT:    movl %ebx, (%eax) +; X86-NEXT:    movl %edx, 4(%eax) +; X86-NEXT:    setae %al  ; X86-NEXT:    leal -12(%ebp), %esp  ; X86-NEXT:    popl %esi  ; X86-NEXT:    popl %edi @@ -1151,22 +723,20 @@ define i1 @init_eq_i128(ptr %word, i32 %position, i1 zeroext %value) nounwind {  ; SSE-NEXT:    testb $64, %cl  ; SSE-NEXT:    cmovneq %rsi, %r8  ; SSE-NEXT:    cmovneq %r9, %rsi +; SSE-NEXT:    notq %r8  ; SSE-NEXT:    cmovneq %rax, %rdx  ; SSE-NEXT:    cmovneq %r9, %rax -; SSE-NEXT:    movq (%rdi), %rcx -; SSE-NEXT:    movq 8(%rdi), %r9 -; SSE-NEXT:    movq %r9, %r10 -; SSE-NEXT:    andq %r8, %r10 -; SSE-NEXT:    notq %r8 -; SSE-NEXT:    movq %rcx, %r11 -; SSE-NEXT:    andq %rsi, %r11  ; SSE-NEXT:    notq %rsi -; SSE-NEXT:    andq %r9, %r8 +; SSE-NEXT:    andq 8(%rdi), %r8  ; SSE-NEXT:    orq %rdx, %r8 -; SSE-NEXT:    andq %rcx, %rsi +; SSE-NEXT:    andq (%rdi), %rsi  ; SSE-NEXT:    orq %rax, %rsi -; SSE-NEXT:    orq %r10, %r11 -; SSE-NEXT:    sete %al +; SSE-NEXT:    movl %ecx, %eax +; SSE-NEXT:    andl $96, %eax +; SSE-NEXT:    shrl $3, %eax +; SSE-NEXT:    movl (%rdi,%rax), %eax +; SSE-NEXT:    btl %ecx, %eax +; SSE-NEXT:    setae %al  ; SSE-NEXT:    movq %rsi, (%rdi)  ; SSE-NEXT:    movq %r8, 8(%rdi)  ; SSE-NEXT:    retq @@ -1174,63 +744,63 @@ define i1 @init_eq_i128(ptr %word, i32 %position, i1 zeroext %value) nounwind {  ; AVX2-LABEL: init_eq_i128:  ; AVX2:       # %bb.0:  ; AVX2-NEXT:    movl %esi, %ecx -; AVX2-NEXT:    movl $1, %esi -; AVX2-NEXT:    xorl %eax, %eax -; AVX2-NEXT:    shldq %cl, %rsi, %rax -; AVX2-NEXT:    xorl %r8d, %r8d +; AVX2-NEXT:    movl $1, %eax +; AVX2-NEXT:    xorl %esi, %esi +; AVX2-NEXT:    shldq %cl, %rax, %rsi  ; AVX2-NEXT:    movl %edx, %edx +; AVX2-NEXT:    xorl %r8d, %r8d +; AVX2-NEXT:    shldq %cl, %rdx, %r8  ; AVX2-NEXT:    xorl %r9d, %r9d -; AVX2-NEXT:    shldq %cl, %rdx, %r9 -; AVX2-NEXT:    shlxq %rcx, %rsi, %rsi +; AVX2-NEXT:    shlxq %rcx, %rax, %rax  ; AVX2-NEXT:    testb $64, %cl -; AVX2-NEXT:    cmovneq %rsi, %rax -; AVX2-NEXT:    cmovneq %r8, %rsi -; AVX2-NEXT:    shlxq %rcx, %rdx, %rcx -; AVX2-NEXT:    cmovneq %rcx, %r9 -; AVX2-NEXT:    cmovneq %r8, %rcx -; AVX2-NEXT:    movq (%rdi), %rdx -; AVX2-NEXT:    movq 8(%rdi), %r8 -; AVX2-NEXT:    andnq %r8, %rax, %r10 -; AVX2-NEXT:    andq %rax, %r8 -; AVX2-NEXT:    andnq %rdx, %rsi, %r11 -; AVX2-NEXT:    andq %rsi, %rdx -; AVX2-NEXT:    orq %r9, %r10 -; AVX2-NEXT:    orq %rcx, %r11 -; AVX2-NEXT:    orq %r8, %rdx -; AVX2-NEXT:    sete %al -; AVX2-NEXT:    movq %r11, (%rdi) -; AVX2-NEXT:    movq %r10, 8(%rdi) +; AVX2-NEXT:    cmovneq %rax, %rsi +; AVX2-NEXT:    cmovneq %r9, %rax +; AVX2-NEXT:    shlxq %rcx, %rdx, %rdx +; AVX2-NEXT:    cmovneq %rdx, %r8 +; AVX2-NEXT:    cmovneq %r9, %rdx +; AVX2-NEXT:    andnq 8(%rdi), %rsi, %rsi +; AVX2-NEXT:    orq %r8, %rsi +; AVX2-NEXT:    andnq (%rdi), %rax, %r8 +; AVX2-NEXT:    orq %rdx, %r8 +; AVX2-NEXT:    movl %ecx, %eax +; AVX2-NEXT:    andl $96, %eax +; AVX2-NEXT:    shrl $3, %eax +; AVX2-NEXT:    movl (%rdi,%rax), %eax +; AVX2-NEXT:    btl %ecx, %eax +; AVX2-NEXT:    setae %al +; AVX2-NEXT:    movq %r8, (%rdi) +; AVX2-NEXT:    movq %rsi, 8(%rdi)  ; AVX2-NEXT:    retq  ;  ; AVX512-LABEL: init_eq_i128:  ; AVX512:       # %bb.0:  ; AVX512-NEXT:    movl %esi, %ecx -; AVX512-NEXT:    xorl %eax, %eax -; AVX512-NEXT:    movl $1, %esi +; AVX512-NEXT:    movl $1, %eax +; AVX512-NEXT:    xorl %esi, %esi +; AVX512-NEXT:    shldq %cl, %rax, %rsi  ; AVX512-NEXT:    xorl %r8d, %r8d -; AVX512-NEXT:    shldq %cl, %rsi, %r8 -; AVX512-NEXT:    shlxq %rcx, %rsi, %rsi +; AVX512-NEXT:    shlxq %rcx, %rax, %rax  ; AVX512-NEXT:    movl %edx, %edx  ; AVX512-NEXT:    xorl %r9d, %r9d  ; AVX512-NEXT:    shldq %cl, %rdx, %r9  ; AVX512-NEXT:    testb $64, %cl -; AVX512-NEXT:    cmovneq %rsi, %r8  ; AVX512-NEXT:    cmovneq %rax, %rsi -; AVX512-NEXT:    shlxq %rcx, %rdx, %rcx -; AVX512-NEXT:    cmovneq %rcx, %r9 -; AVX512-NEXT:    cmovneq %rax, %rcx -; AVX512-NEXT:    movq (%rdi), %rax -; AVX512-NEXT:    movq 8(%rdi), %rdx -; AVX512-NEXT:    andnq %rdx, %r8, %r10 -; AVX512-NEXT:    andq %r8, %rdx -; AVX512-NEXT:    andnq %rax, %rsi, %r8 -; AVX512-NEXT:    andq %rsi, %rax -; AVX512-NEXT:    orq %r9, %r10 -; AVX512-NEXT:    orq %rcx, %r8 -; AVX512-NEXT:    orq %rdx, %rax -; AVX512-NEXT:    sete %al +; AVX512-NEXT:    cmovneq %r8, %rax +; AVX512-NEXT:    shlxq %rcx, %rdx, %rdx +; AVX512-NEXT:    cmovneq %rdx, %r9 +; AVX512-NEXT:    cmovneq %r8, %rdx +; AVX512-NEXT:    andnq 8(%rdi), %rsi, %rsi +; AVX512-NEXT:    orq %r9, %rsi +; AVX512-NEXT:    andnq (%rdi), %rax, %r8 +; AVX512-NEXT:    orq %rdx, %r8 +; AVX512-NEXT:    movl %ecx, %eax +; AVX512-NEXT:    andl $96, %eax +; AVX512-NEXT:    shrl $3, %eax +; AVX512-NEXT:    movl (%rdi,%rax), %eax +; AVX512-NEXT:    btl %ecx, %eax +; AVX512-NEXT:    setae %al  ; AVX512-NEXT:    movq %r8, (%rdi) -; AVX512-NEXT:    movq %r10, 8(%rdi) +; AVX512-NEXT:    movq %rsi, 8(%rdi)  ; AVX512-NEXT:    retq    %rem = and i32 %position, 127    %ofs = zext nneg i32 %rem to i128 @@ -1252,344 +822,25 @@ define i1 @init_eq_i128(ptr %word, i32 %position, i1 zeroext %value) nounwind {  define i1 @test_ne_i512(ptr %word, i32 %position) nounwind {  ; X86-LABEL: test_ne_i512:  ; X86:       # %bb.0: -; X86-NEXT:    pushl %ebp -; X86-NEXT:    movl %esp, %ebp -; X86-NEXT:    pushl %ebx -; X86-NEXT:    pushl %edi -; X86-NEXT:    pushl %esi -; X86-NEXT:    andl $-16, %esp -; X86-NEXT:    subl $224, %esp -; X86-NEXT:    movl 12(%ebp), %ecx -; X86-NEXT:    movl %ecx, %eax -; X86-NEXT:    shrl $3, %eax -; X86-NEXT:    andl $60, %eax -; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    leal {{[0-9]+}}(%esp), %edx -; X86-NEXT:    subl %eax, %edx -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $1, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl 24(%edx), %esi -; X86-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    movl 28(%edx), %eax -; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    andl $31, %ecx -; X86-NEXT:    shldl %cl, %esi, %eax -; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    movl 56(%edx), %esi -; X86-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    movl 60(%edx), %eax -; X86-NEXT:    shldl %cl, %esi, %eax -; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    movl 8(%edx), %esi -; X86-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    movl 12(%edx), %eax -; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    shldl %cl, %esi, %eax -; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    movl 40(%edx), %edi -; X86-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    movl 44(%edx), %eax -; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    shldl %cl, %edi, %eax -; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    movl 16(%edx), %eax -; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    movl 20(%edx), %edi -; X86-NEXT:    movl %edi, %ebx -; X86-NEXT:    shldl %cl, %eax, %ebx -; X86-NEXT:    movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    movl 32(%edx), %esi -; X86-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    movl 36(%edx), %ebx -; X86-NEXT:    movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    shldl %cl, %esi, %ebx -; X86-NEXT:    movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    shldl %cl, %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill -; X86-NEXT:    movl 52(%edx), %eax -; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    shldl %cl, %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill -; X86-NEXT:    movl 4(%edx), %edi -; X86-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X86-NEXT:    shldl %cl, %edi, %esi -; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X86-NEXT:    shldl %cl, %edi, %eax -; X86-NEXT:    movl 8(%ebp), %ebx -; X86-NEXT:    andl 40(%ebx), %eax -; X86-NEXT:    andl 8(%ebx), %esi -; X86-NEXT:    orl %eax, %esi -; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT:    andl 56(%ebx), %eax -; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X86-NEXT:    andl 24(%ebx), %edi -; X86-NEXT:    orl %eax, %edi -; X86-NEXT:    orl %esi, %edi -; X86-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT:    movl %ebx, %edi -; X86-NEXT:    andl 44(%ebx), %eax -; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X86-NEXT:    andl 12(%ebx), %esi -; X86-NEXT:    orl %eax, %esi -; X86-NEXT:    movl %esi, %ebx -; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X86-NEXT:    andl 60(%edi), %esi -; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT:    andl 28(%edi), %eax -; X86-NEXT:    orl %esi, %eax -; X86-NEXT:    orl %ebx, %eax -; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    movl (%edx), %esi -; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X86-NEXT:    shldl %cl, %edx, %edi -; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X86-NEXT:    shldl %cl, %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill -; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X86-NEXT:    negl %edx -; X86-NEXT:    movl 192(%esp,%edx), %edx -; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT:    shldl %cl, %edx, %eax -; X86-NEXT:    shldl %cl, %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill -; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload -; X86-NEXT:    shldl %cl, %ebx, %edx -; X86-NEXT:    # kill: def $cl killed $cl killed $ecx -; X86-NEXT:    shll %cl, %esi -; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X86-NEXT:    movl 8(%ebp), %ebx -; X86-NEXT:    andl 32(%ebx), %ecx -; X86-NEXT:    andl (%ebx), %esi -; X86-NEXT:    orl %ecx, %esi -; X86-NEXT:    andl 16(%ebx), %edi -; X86-NEXT:    andl 48(%ebx), %edx -; X86-NEXT:    orl %edi, %edx -; X86-NEXT:    orl %esi, %edx -; X86-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X86-NEXT:    andl 36(%ebx), %ecx -; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X86-NEXT:    andl 4(%ebx), %esi -; X86-NEXT:    orl %ecx, %esi -; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X86-NEXT:    andl 20(%ebx), %ecx -; X86-NEXT:    andl 52(%ebx), %eax -; X86-NEXT:    orl %ecx, %eax -; X86-NEXT:    orl %esi, %eax -; X86-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X86-NEXT:    orl %edx, %eax -; X86-NEXT:    setne %al -; X86-NEXT:    leal -12(%ebp), %esp -; X86-NEXT:    popl %esi -; X86-NEXT:    popl %edi -; X86-NEXT:    popl %ebx -; X86-NEXT:    popl %ebp +; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax +; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT:    movl %ecx, %edx +; X86-NEXT:    shrl $3, %edx +; X86-NEXT:    andl $60, %edx +; X86-NEXT:    movl (%eax,%edx), %eax +; X86-NEXT:    btl %ecx, %eax +; X86-NEXT:    setb %al  ; X86-NEXT:    retl  ; -; SSE-LABEL: test_ne_i512: -; SSE:       # %bb.0: -; SSE-NEXT:    pushq %r15 -; SSE-NEXT:    pushq %r14 -; SSE-NEXT:    pushq %rbx -; SSE-NEXT:    xorps %xmm0, %xmm0 -; SSE-NEXT:    movups %xmm0, -{{[0-9]+}}(%rsp) -; SSE-NEXT:    movups %xmm0, -{{[0-9]+}}(%rsp) -; SSE-NEXT:    movups %xmm0, -{{[0-9]+}}(%rsp) -; SSE-NEXT:    movaps %xmm0, -{{[0-9]+}}(%rsp) -; SSE-NEXT:    movaps %xmm0, -{{[0-9]+}}(%rsp) -; SSE-NEXT:    movaps %xmm0, -{{[0-9]+}}(%rsp) -; SSE-NEXT:    movaps %xmm0, -{{[0-9]+}}(%rsp) -; SSE-NEXT:    movq $0, -{{[0-9]+}}(%rsp) -; SSE-NEXT:    movq $1, -{{[0-9]+}}(%rsp) -; SSE-NEXT:    movl %esi, %ecx -; SSE-NEXT:    andl $63, %ecx -; SSE-NEXT:    shrl $3, %esi -; SSE-NEXT:    andl $56, %esi -; SSE-NEXT:    negl %esi -; SSE-NEXT:    movslq %esi, %rbx -; SSE-NEXT:    movq -48(%rsp,%rbx), %rdx -; SSE-NEXT:    movq -40(%rsp,%rbx), %r14 -; SSE-NEXT:    movq %r14, %rax -; SSE-NEXT:    shldq %cl, %rdx, %rax -; SSE-NEXT:    movq -16(%rsp,%rbx), %r11 -; SSE-NEXT:    movq -8(%rsp,%rbx), %r10 -; SSE-NEXT:    shldq %cl, %r11, %r10 -; SSE-NEXT:    movq -32(%rsp,%rbx), %r9 -; SSE-NEXT:    movq -24(%rsp,%rbx), %r15 -; SSE-NEXT:    movq %r15, %r8 -; SSE-NEXT:    shldq %cl, %r9, %r8 -; SSE-NEXT:    movq -56(%rsp,%rbx), %rsi -; SSE-NEXT:    shldq %cl, %rsi, %rdx -; SSE-NEXT:    shldq %cl, %r15, %r11 -; SSE-NEXT:    shldq %cl, %r14, %r9 -; SSE-NEXT:    movq -64(%rsp,%rbx), %rbx -; SSE-NEXT:    shldq %cl, %rbx, %rsi -; SSE-NEXT:    # kill: def $cl killed $cl killed $ecx -; SSE-NEXT:    shlq %cl, %rbx -; SSE-NEXT:    andq 32(%rdi), %r9 -; SSE-NEXT:    andq 48(%rdi), %r11 -; SSE-NEXT:    andq 16(%rdi), %rdx -; SSE-NEXT:    orq %r11, %rdx -; SSE-NEXT:    andq 40(%rdi), %r8 -; SSE-NEXT:    andq 56(%rdi), %r10 -; SSE-NEXT:    andq 24(%rdi), %rax -; SSE-NEXT:    orq %r10, %rax -; SSE-NEXT:    andq (%rdi), %rbx -; SSE-NEXT:    orq %r9, %rbx -; SSE-NEXT:    orq %rdx, %rbx -; SSE-NEXT:    andq 8(%rdi), %rsi -; SSE-NEXT:    orq %r8, %rsi -; SSE-NEXT:    orq %rax, %rsi -; SSE-NEXT:    orq %rbx, %rsi -; SSE-NEXT:    setne %al -; SSE-NEXT:    popq %rbx -; SSE-NEXT:    popq %r14 -; SSE-NEXT:    popq %r15 -; SSE-NEXT:    retq -; -; AVX2-LABEL: test_ne_i512: -; AVX2:       # %bb.0: -; AVX2-NEXT:    pushq %r15 -; AVX2-NEXT:    pushq %r14 -; AVX2-NEXT:    pushq %rbx -; AVX2-NEXT:    vxorps %xmm0, %xmm0, %xmm0 -; AVX2-NEXT:    vmovups %ymm0, -{{[0-9]+}}(%rsp) -; AVX2-NEXT:    vmovups %ymm0, -{{[0-9]+}}(%rsp) -; AVX2-NEXT:    vmovups %ymm0, -{{[0-9]+}}(%rsp) -; AVX2-NEXT:    vmovss {{.*#+}} xmm0 = [1,0,0,0] -; AVX2-NEXT:    vmovups %ymm0, -{{[0-9]+}}(%rsp) -; AVX2-NEXT:    movl %esi, %ecx -; AVX2-NEXT:    andl $63, %ecx -; AVX2-NEXT:    shrl $3, %esi -; AVX2-NEXT:    andl $56, %esi -; AVX2-NEXT:    negl %esi -; AVX2-NEXT:    movslq %esi, %rsi -; AVX2-NEXT:    movq -48(%rsp,%rsi), %rdx -; AVX2-NEXT:    movq -40(%rsp,%rsi), %rbx -; AVX2-NEXT:    movq %rbx, %rax -; AVX2-NEXT:    shldq %cl, %rdx, %rax -; AVX2-NEXT:    movq -16(%rsp,%rsi), %r11 -; AVX2-NEXT:    movq -8(%rsp,%rsi), %r10 -; AVX2-NEXT:    shldq %cl, %r11, %r10 -; AVX2-NEXT:    movq -32(%rsp,%rsi), %r9 -; AVX2-NEXT:    movq -24(%rsp,%rsi), %r14 -; AVX2-NEXT:    movq %r14, %r8 -; AVX2-NEXT:    shldq %cl, %r9, %r8 -; AVX2-NEXT:    movq -64(%rsp,%rsi), %r15 -; AVX2-NEXT:    movq -56(%rsp,%rsi), %rsi -; AVX2-NEXT:    shldq %cl, %rsi, %rdx -; AVX2-NEXT:    shldq %cl, %r14, %r11 -; AVX2-NEXT:    shldq %cl, %rbx, %r9 -; AVX2-NEXT:    shldq %cl, %r15, %rsi -; AVX2-NEXT:    shlxq %rcx, %r15, %rcx -; AVX2-NEXT:    andq 32(%rdi), %r9 -; AVX2-NEXT:    andq 48(%rdi), %r11 -; AVX2-NEXT:    andq 16(%rdi), %rdx -; AVX2-NEXT:    andq 40(%rdi), %r8 -; AVX2-NEXT:    andq 56(%rdi), %r10 -; AVX2-NEXT:    andq 24(%rdi), %rax -; AVX2-NEXT:    orq %r11, %rdx -; AVX2-NEXT:    orq %r10, %rax -; AVX2-NEXT:    andq (%rdi), %rcx -; AVX2-NEXT:    orq %r9, %rcx -; AVX2-NEXT:    orq %rdx, %rcx -; AVX2-NEXT:    andq 8(%rdi), %rsi -; AVX2-NEXT:    orq %r8, %rsi -; AVX2-NEXT:    orq %rax, %rsi -; AVX2-NEXT:    orq %rcx, %rsi -; AVX2-NEXT:    setne %al -; AVX2-NEXT:    popq %rbx -; AVX2-NEXT:    popq %r14 -; AVX2-NEXT:    popq %r15 -; AVX2-NEXT:    vzeroupper -; AVX2-NEXT:    retq -; -; AVX512-LABEL: test_ne_i512: -; AVX512:       # %bb.0: -; AVX512-NEXT:    pushq %r15 -; AVX512-NEXT:    pushq %r14 -; AVX512-NEXT:    pushq %rbx -; AVX512-NEXT:    vxorps %xmm0, %xmm0, %xmm0 -; AVX512-NEXT:    vmovups %ymm0, -{{[0-9]+}}(%rsp) -; AVX512-NEXT:    vmovups %ymm0, -{{[0-9]+}}(%rsp) -; AVX512-NEXT:    vmovups %ymm0, -{{[0-9]+}}(%rsp) -; AVX512-NEXT:    vmovaps {{.*#+}} xmm0 = [1,0,0,0] -; AVX512-NEXT:    vmovups %ymm0, -{{[0-9]+}}(%rsp) -; AVX512-NEXT:    movl %esi, %ecx -; AVX512-NEXT:    andl $63, %ecx -; AVX512-NEXT:    shrl $3, %esi -; AVX512-NEXT:    andl $56, %esi -; AVX512-NEXT:    negl %esi -; AVX512-NEXT:    movslq %esi, %rbx -; AVX512-NEXT:    movq -48(%rsp,%rbx), %rdx -; AVX512-NEXT:    movq -40(%rsp,%rbx), %r14 -; AVX512-NEXT:    movq %r14, %rax -; AVX512-NEXT:    shldq %cl, %rdx, %rax -; AVX512-NEXT:    movq -16(%rsp,%rbx), %r11 -; AVX512-NEXT:    movq -8(%rsp,%rbx), %r10 -; AVX512-NEXT:    shldq %cl, %r11, %r10 -; AVX512-NEXT:    movq -32(%rsp,%rbx), %r9 -; AVX512-NEXT:    movq -24(%rsp,%rbx), %r15 -; AVX512-NEXT:    movq %r15, %r8 -; AVX512-NEXT:    shldq %cl, %r9, %r8 -; AVX512-NEXT:    movq -56(%rsp,%rbx), %rsi -; AVX512-NEXT:    shldq %cl, %rsi, %rdx -; AVX512-NEXT:    shldq %cl, %r15, %r11 -; AVX512-NEXT:    shldq %cl, %r14, %r9 -; AVX512-NEXT:    movq -64(%rsp,%rbx), %rbx -; AVX512-NEXT:    shldq %cl, %rbx, %rsi -; AVX512-NEXT:    shlxq %rcx, %rbx, %rcx -; AVX512-NEXT:    andq 32(%rdi), %r9 -; AVX512-NEXT:    andq 48(%rdi), %r11 -; AVX512-NEXT:    andq 16(%rdi), %rdx -; AVX512-NEXT:    andq 40(%rdi), %r8 -; AVX512-NEXT:    andq 56(%rdi), %r10 -; AVX512-NEXT:    andq 24(%rdi), %rax -; AVX512-NEXT:    orq %r11, %rdx -; AVX512-NEXT:    orq %r10, %rax -; AVX512-NEXT:    andq (%rdi), %rcx -; AVX512-NEXT:    orq %r9, %rcx -; AVX512-NEXT:    orq %rdx, %rcx -; AVX512-NEXT:    andq 8(%rdi), %rsi -; AVX512-NEXT:    orq %r8, %rsi -; AVX512-NEXT:    orq %rax, %rsi -; AVX512-NEXT:    orq %rcx, %rsi -; AVX512-NEXT:    setne %al -; AVX512-NEXT:    popq %rbx -; AVX512-NEXT:    popq %r14 -; AVX512-NEXT:    popq %r15 -; AVX512-NEXT:    vzeroupper -; AVX512-NEXT:    retq +; X64-LABEL: test_ne_i512: +; X64:       # %bb.0: +; X64-NEXT:    movl %esi, %eax +; X64-NEXT:    shrl $3, %eax +; X64-NEXT:    andl $60, %eax +; X64-NEXT:    movl (%rdi,%rax), %eax +; X64-NEXT:    btl %esi, %eax +; X64-NEXT:    setb %al +; X64-NEXT:    retq    %rem = and i32 %position, 511    %ofs = zext nneg i32 %rem to i512    %bit = shl nuw i512 1, %ofs @@ -1602,572 +853,33 @@ define i1 @test_ne_i512(ptr %word, i32 %position) nounwind {  define i1 @complement_ne_i512(ptr %word, i32 %position) nounwind {  ; X86-LABEL: complement_ne_i512:  ; X86:       # %bb.0: -; X86-NEXT:    pushl %ebp -; X86-NEXT:    movl %esp, %ebp -; X86-NEXT:    pushl %ebx  ; X86-NEXT:    pushl %edi  ; X86-NEXT:    pushl %esi -; X86-NEXT:    andl $-16, %esp -; X86-NEXT:    subl $272, %esp # imm = 0x110 -; X86-NEXT:    movl 12(%ebp), %ecx -; X86-NEXT:    movl %ecx, %eax -; X86-NEXT:    shrl $3, %eax -; X86-NEXT:    andl $60, %eax -; X86-NEXT:    movl %eax, (%esp) # 4-byte Spill -; X86-NEXT:    leal {{[0-9]+}}(%esp), %edx -; X86-NEXT:    subl %eax, %edx -; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $1, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl 24(%edx), %eax -; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    movl 28(%edx), %esi -; X86-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    andl $31, %ecx -; X86-NEXT:    shldl %cl, %eax, %esi -; X86-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    movl 56(%edx), %eax -; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    movl 60(%edx), %esi -; X86-NEXT:    shldl %cl, %eax, %esi -; X86-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    movl 8(%edx), %eax -; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    movl 12(%edx), %esi -; X86-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    shldl %cl, %eax, %esi -; X86-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    movl 40(%edx), %eax -; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    movl 44(%edx), %esi -; X86-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    shldl %cl, %eax, %esi -; X86-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    movl 16(%edx), %eax -; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    movl 20(%edx), %ebx -; X86-NEXT:    movl %ebx, %esi -; X86-NEXT:    shldl %cl, %eax, %esi -; X86-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    movl 32(%edx), %edi -; X86-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    movl 36(%edx), %esi -; X86-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    shldl %cl, %edi, %esi -; X86-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    shldl %cl, %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill -; X86-NEXT:    movl 52(%edx), %esi -; X86-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X86-NEXT:    shldl %cl, %esi, %edi -; X86-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    movl 4(%edx), %eax -; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload -; X86-NEXT:    shldl %cl, %eax, %ebx -; X86-NEXT:    movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT:    shldl %cl, %eax, %esi -; X86-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    movl 8(%ebp), %edx -; X86-NEXT:    movl 40(%edx), %eax -; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    andl %esi, %eax -; X86-NEXT:    movl 8(%edx), %esi -; X86-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    andl %ebx, %esi -; X86-NEXT:    orl %eax, %esi -; X86-NEXT:    movl %edx, %eax -; X86-NEXT:    movl 56(%edx), %ebx -; X86-NEXT:    movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    andl %edi, %ebx -; X86-NEXT:    movl 24(%edx), %edx -; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    andl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X86-NEXT:    orl %ebx, %edx -; X86-NEXT:    orl %esi, %edx -; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    movl 44(%eax), %esi -; X86-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    andl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload -; X86-NEXT:    movl 12(%eax), %ebx -; X86-NEXT:    movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    andl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload -; X86-NEXT:    orl %esi, %ebx -; X86-NEXT:    movl 60(%eax), %edx -; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    movl %eax, %esi -; X86-NEXT:    movl %edx, %eax -; X86-NEXT:    andl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X86-NEXT:    movl 28(%esi), %edx -; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    andl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X86-NEXT:    orl %eax, %edx -; X86-NEXT:    orl %ebx, %edx -; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT:    movl (%eax), %edx -; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload -; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT:    shldl %cl, %eax, %ebx -; X86-NEXT:    movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT:    shldl %cl, %eax, %edi -; X86-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    movl (%esp), %eax # 4-byte Reload -; X86-NEXT:    negl %eax -; X86-NEXT:    movl 240(%esp,%eax), %esi -; X86-NEXT:    shldl %cl, %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill -; X86-NEXT:    shldl %cl, %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill -; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT:    shldl %cl, %eax, %esi -; X86-NEXT:    movl %esi, (%esp) # 4-byte Spill -; X86-NEXT:    # kill: def $cl killed $cl killed $ecx -; X86-NEXT:    shll %cl, %edx -; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    movl 8(%ebp), %esi -; X86-NEXT:    movl 32(%esi), %eax -; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    andl %edi, %eax -; X86-NEXT:    movl (%esi), %ecx -; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    andl %edx, %ecx -; X86-NEXT:    orl %eax, %ecx -; X86-NEXT:    movl 16(%esi), %eax -; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    andl %ebx, %eax -; X86-NEXT:    movl 48(%esi), %edx -; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    andl (%esp), %edx # 4-byte Folded Reload -; X86-NEXT:    orl %eax, %edx -; X86-NEXT:    orl %ecx, %edx -; X86-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X86-NEXT:    movl 36(%esi), %ebx -; X86-NEXT:    movl %ebx, %eax -; X86-NEXT:    andl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X86-NEXT:    movl 4(%esi), %ecx -; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    andl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload -; X86-NEXT:    orl %eax, %ecx -; X86-NEXT:    movl %esi, %eax -; X86-NEXT:    movl 20(%esi), %edi -; X86-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X86-NEXT:    andl %esi, %edi -; X86-NEXT:    movl 52(%eax), %eax -; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    andl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X86-NEXT:    orl %edi, %eax -; X86-NEXT:    orl %ecx, %eax -; X86-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X86-NEXT:    xorl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill -; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X86-NEXT:    xorl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload -; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X86-NEXT:    xorl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill -; X86-NEXT:    xorl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload -; X86-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X86-NEXT:    xorl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill -; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X86-NEXT:    xorl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload -; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X86-NEXT:    xorl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill -; X86-NEXT:    xorl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill -; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X86-NEXT:    xorl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill -; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X86-NEXT:    xorl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload -; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X86-NEXT:    xorl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload -; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload -; X86-NEXT:    xorl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload -; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X86-NEXT:    xorl %ecx, (%esp) # 4-byte Folded Spill -; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X86-NEXT:    xorl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill -; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X86-NEXT:    xorl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill -; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X86-NEXT:    xorl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload -; X86-NEXT:    orl %edx, %eax -; X86-NEXT:    movl 8(%ebp), %edx -; X86-NEXT:    movl %ebx, 60(%edx) -; X86-NEXT:    movl %edi, 56(%edx) -; X86-NEXT:    movl %ecx, 52(%edx) -; X86-NEXT:    movl %esi, 44(%edx) -; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT:    movl %eax, 40(%edx) -; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT:    movl %eax, 36(%edx) -; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT:    movl %eax, 32(%edx) -; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT:    movl %eax, 28(%edx) -; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT:    movl %eax, 24(%edx) -; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT:    movl %eax, 20(%edx) -; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT:    movl %eax, 16(%edx) -; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT:    movl %eax, 12(%edx) -; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT:    movl %eax, 8(%edx) -; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT:    movl %eax, 4(%edx) -; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT:    movl %eax, (%edx) -; X86-NEXT:    movl (%esp), %eax # 4-byte Reload -; X86-NEXT:    movl %eax, 48(%edx) -; X86-NEXT:    setne %al -; X86-NEXT:    leal -12(%ebp), %esp +; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx +; X86-NEXT:    movl %edx, %esi +; X86-NEXT:    shrl $3, %esi +; X86-NEXT:    andl $60, %esi +; X86-NEXT:    movl (%ecx,%esi), %edi +; X86-NEXT:    btl %edx, %edi +; X86-NEXT:    setb %al +; X86-NEXT:    btcl %edx, %edi +; X86-NEXT:    movl %edi, (%ecx,%esi)  ; X86-NEXT:    popl %esi  ; X86-NEXT:    popl %edi -; X86-NEXT:    popl %ebx -; X86-NEXT:    popl %ebp  ; X86-NEXT:    retl  ; -; SSE-LABEL: complement_ne_i512: -; SSE:       # %bb.0: -; SSE-NEXT:    pushq %rbp -; SSE-NEXT:    pushq %r15 -; SSE-NEXT:    pushq %r14 -; SSE-NEXT:    pushq %r13 -; SSE-NEXT:    pushq %r12 -; SSE-NEXT:    pushq %rbx -; SSE-NEXT:    subq $56, %rsp -; SSE-NEXT:    xorps %xmm0, %xmm0 -; SSE-NEXT:    movups %xmm0, {{[0-9]+}}(%rsp) -; SSE-NEXT:    movups %xmm0, {{[0-9]+}}(%rsp) -; SSE-NEXT:    movups %xmm0, -{{[0-9]+}}(%rsp) -; SSE-NEXT:    movaps %xmm0, -{{[0-9]+}}(%rsp) -; SSE-NEXT:    movaps %xmm0, -{{[0-9]+}}(%rsp) -; SSE-NEXT:    movaps %xmm0, -{{[0-9]+}}(%rsp) -; SSE-NEXT:    movaps %xmm0, -{{[0-9]+}}(%rsp) -; SSE-NEXT:    movq $0, {{[0-9]+}}(%rsp) -; SSE-NEXT:    movq $1, -{{[0-9]+}}(%rsp) -; SSE-NEXT:    movl %esi, %ecx -; SSE-NEXT:    andl $63, %ecx -; SSE-NEXT:    shrl $3, %esi -; SSE-NEXT:    andl $56, %esi -; SSE-NEXT:    negl %esi -; SSE-NEXT:    movslq %esi, %rbx -; SSE-NEXT:    movq (%rsp,%rbx), %rsi -; SSE-NEXT:    movq 8(%rsp,%rbx), %r14 -; SSE-NEXT:    movq %r14, %rax -; SSE-NEXT:    shldq %cl, %rsi, %rax -; SSE-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; SSE-NEXT:    movq 32(%rsp,%rbx), %r8 -; SSE-NEXT:    movq 40(%rsp,%rbx), %rbp -; SSE-NEXT:    shldq %cl, %r8, %rbp -; SSE-NEXT:    movq 16(%rsp,%rbx), %r9 -; SSE-NEXT:    movq 24(%rsp,%rbx), %r15 -; SSE-NEXT:    movq %r15, %r10 -; SSE-NEXT:    shldq %cl, %r9, %r10 -; SSE-NEXT:    movq -8(%rsp,%rbx), %r11 -; SSE-NEXT:    shldq %cl, %r11, %rsi -; SSE-NEXT:    shldq %cl, %r15, %r8 -; SSE-NEXT:    shldq %cl, %r14, %r9 -; SSE-NEXT:    movq -16(%rsp,%rbx), %rbx -; SSE-NEXT:    shldq %cl, %rbx, %r11 -; SSE-NEXT:    # kill: def $cl killed $cl killed $ecx -; SSE-NEXT:    shlq %cl, %rbx -; SSE-NEXT:    movq 24(%rdi), %r15 -; SSE-NEXT:    movq %r15, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; SSE-NEXT:    movq 56(%rdi), %rcx -; SSE-NEXT:    movq %rcx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; SSE-NEXT:    movq 16(%rdi), %r12 -; SSE-NEXT:    movq %r12, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; SSE-NEXT:    movq 48(%rdi), %r13 -; SSE-NEXT:    movq %r13, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; SSE-NEXT:    andq %r8, %r13 -; SSE-NEXT:    andq %rsi, %r12 -; SSE-NEXT:    orq %r13, %r12 -; SSE-NEXT:    movq %rcx, %r13 -; SSE-NEXT:    andq %rbp, %r13 -; SSE-NEXT:    andq %rax, %r15 -; SSE-NEXT:    orq %r13, %r15 -; SSE-NEXT:    movq 32(%rdi), %r14 -; SSE-NEXT:    movq %r14, %rcx -; SSE-NEXT:    andq %r9, %rcx -; SSE-NEXT:    movq (%rdi), %r13 -; SSE-NEXT:    movq %r13, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; SSE-NEXT:    andq %rbx, %r13 -; SSE-NEXT:    orq %rcx, %r13 -; SSE-NEXT:    orq %r12, %r13 -; SSE-NEXT:    movq 40(%rdi), %rcx -; SSE-NEXT:    movq %rcx, %r12 -; SSE-NEXT:    andq %r10, %r12 -; SSE-NEXT:    movq 8(%rdi), %rdx -; SSE-NEXT:    movq %rdx, %rax -; SSE-NEXT:    andq %r11, %rax -; SSE-NEXT:    orq %r12, %rax -; SSE-NEXT:    orq %r15, %rax -; SSE-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r15 # 8-byte Reload -; SSE-NEXT:    xorq {{[-0-9]+}}(%r{{[sb]}}p), %r15 # 8-byte Folded Reload -; SSE-NEXT:    xorq {{[-0-9]+}}(%r{{[sb]}}p), %rsi # 8-byte Folded Reload -; SSE-NEXT:    xorq %rcx, %r10 -; SSE-NEXT:    xorq %r14, %r9 -; SSE-NEXT:    xorq {{[-0-9]+}}(%r{{[sb]}}p), %rbp # 8-byte Folded Reload -; SSE-NEXT:    xorq {{[-0-9]+}}(%r{{[sb]}}p), %r8 # 8-byte Folded Reload -; SSE-NEXT:    xorq %rdx, %r11 -; SSE-NEXT:    xorq {{[-0-9]+}}(%r{{[sb]}}p), %rbx # 8-byte Folded Reload -; SSE-NEXT:    orq %r13, %rax -; SSE-NEXT:    movq %r8, 48(%rdi) -; SSE-NEXT:    movq %rbp, 56(%rdi) -; SSE-NEXT:    movq %r9, 32(%rdi) -; SSE-NEXT:    movq %r10, 40(%rdi) -; SSE-NEXT:    movq %rsi, 16(%rdi) -; SSE-NEXT:    movq %r15, 24(%rdi) -; SSE-NEXT:    movq %rbx, (%rdi) -; SSE-NEXT:    movq %r11, 8(%rdi) -; SSE-NEXT:    setne %al -; SSE-NEXT:    addq $56, %rsp -; SSE-NEXT:    popq %rbx -; SSE-NEXT:    popq %r12 -; SSE-NEXT:    popq %r13 -; SSE-NEXT:    popq %r14 -; SSE-NEXT:    popq %r15 -; SSE-NEXT:    popq %rbp -; SSE-NEXT:    retq -; -; AVX2-LABEL: complement_ne_i512: -; AVX2:       # %bb.0: -; AVX2-NEXT:    pushq %rbp -; AVX2-NEXT:    pushq %r15 -; AVX2-NEXT:    pushq %r14 -; AVX2-NEXT:    pushq %r13 -; AVX2-NEXT:    pushq %r12 -; AVX2-NEXT:    pushq %rbx -; AVX2-NEXT:    subq $72, %rsp -; AVX2-NEXT:    vxorps %xmm0, %xmm0, %xmm0 -; AVX2-NEXT:    vmovups %ymm0, {{[0-9]+}}(%rsp) -; AVX2-NEXT:    vmovups %ymm0, -{{[0-9]+}}(%rsp) -; AVX2-NEXT:    vmovups %ymm0, -{{[0-9]+}}(%rsp) -; AVX2-NEXT:    vmovss {{.*#+}} xmm0 = [1,0,0,0] -; AVX2-NEXT:    vmovups %ymm0, (%rsp) -; AVX2-NEXT:    movl %esi, %ecx -; AVX2-NEXT:    andl $63, %ecx -; AVX2-NEXT:    shrl $3, %esi -; AVX2-NEXT:    andl $56, %esi -; AVX2-NEXT:    negl %esi -; AVX2-NEXT:    movslq %esi, %rbx -; AVX2-NEXT:    movq 16(%rsp,%rbx), %rsi -; AVX2-NEXT:    movq 24(%rsp,%rbx), %rbp -; AVX2-NEXT:    movq %rbp, %rax -; AVX2-NEXT:    shldq %cl, %rsi, %rax -; AVX2-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; AVX2-NEXT:    movq 48(%rsp,%rbx), %r8 -; AVX2-NEXT:    movq 56(%rsp,%rbx), %r13 -; AVX2-NEXT:    shldq %cl, %r8, %r13 -; AVX2-NEXT:    movq 32(%rsp,%rbx), %r9 -; AVX2-NEXT:    movq 40(%rsp,%rbx), %r14 -; AVX2-NEXT:    movq %r14, %r10 -; AVX2-NEXT:    shldq %cl, %r9, %r10 -; AVX2-NEXT:    movq 8(%rsp,%rbx), %r11 -; AVX2-NEXT:    shldq %cl, %r11, %rsi -; AVX2-NEXT:    shldq %cl, %r14, %r8 -; AVX2-NEXT:    movq 16(%rdi), %r12 -; AVX2-NEXT:    movq %r12, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; AVX2-NEXT:    movq 48(%rdi), %r14 -; AVX2-NEXT:    movq %r14, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; AVX2-NEXT:    andq %r8, %r14 -; AVX2-NEXT:    andq %rsi, %r12 -; AVX2-NEXT:    orq %r14, %r12 -; AVX2-NEXT:    movq 56(%rdi), %r15 -; AVX2-NEXT:    movq %r15, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; AVX2-NEXT:    andq %r13, %r15 -; AVX2-NEXT:    movq 24(%rdi), %r14 -; AVX2-NEXT:    movq %r14, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; AVX2-NEXT:    andq %rax, %r14 -; AVX2-NEXT:    orq %r15, %r14 -; AVX2-NEXT:    shldq %cl, %rbp, %r9 -; AVX2-NEXT:    movq (%rsp,%rbx), %rdx -; AVX2-NEXT:    movq 32(%rdi), %r15 -; AVX2-NEXT:    movq %r15, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; AVX2-NEXT:    andq %r9, %r15 -; AVX2-NEXT:    shlxq %rcx, %rdx, %rax -; AVX2-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; AVX2-NEXT:    movq (%rdi), %rbx -; AVX2-NEXT:    movq %rbx, %rbp -; AVX2-NEXT:    andq %rax, %rbp -; AVX2-NEXT:    orq %r15, %rbp -; AVX2-NEXT:    orq %r12, %rbp -; AVX2-NEXT:    # kill: def $cl killed $cl killed $rcx -; AVX2-NEXT:    shldq %cl, %rdx, %r11 -; AVX2-NEXT:    movq 40(%rdi), %rax -; AVX2-NEXT:    movq %rax, %rcx -; AVX2-NEXT:    andq %r10, %rcx -; AVX2-NEXT:    movq 8(%rdi), %r15 -; AVX2-NEXT:    movq %r15, %r12 -; AVX2-NEXT:    andq %r11, %r12 -; AVX2-NEXT:    orq %rcx, %r12 -; AVX2-NEXT:    orq %r14, %r12 -; AVX2-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload -; AVX2-NEXT:    xorq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Folded Reload -; AVX2-NEXT:    xorq {{[-0-9]+}}(%r{{[sb]}}p), %rsi # 8-byte Folded Reload -; AVX2-NEXT:    xorq %rax, %r10 -; AVX2-NEXT:    xorq {{[-0-9]+}}(%r{{[sb]}}p), %r9 # 8-byte Folded Reload -; AVX2-NEXT:    xorq {{[-0-9]+}}(%r{{[sb]}}p), %r13 # 8-byte Folded Reload -; AVX2-NEXT:    xorq {{[-0-9]+}}(%r{{[sb]}}p), %r8 # 8-byte Folded Reload -; AVX2-NEXT:    xorq %r15, %r11 -; AVX2-NEXT:    xorq {{[-0-9]+}}(%r{{[sb]}}p), %rbx # 8-byte Folded Reload -; AVX2-NEXT:    orq %rbp, %r12 -; AVX2-NEXT:    movq %r8, 48(%rdi) -; AVX2-NEXT:    movq %r13, 56(%rdi) -; AVX2-NEXT:    movq %r9, 32(%rdi) -; AVX2-NEXT:    movq %r10, 40(%rdi) -; AVX2-NEXT:    movq %rsi, 16(%rdi) -; AVX2-NEXT:    movq %rcx, 24(%rdi) -; AVX2-NEXT:    movq %rbx, (%rdi) -; AVX2-NEXT:    movq %r11, 8(%rdi) -; AVX2-NEXT:    setne %al -; AVX2-NEXT:    addq $72, %rsp -; AVX2-NEXT:    popq %rbx -; AVX2-NEXT:    popq %r12 -; AVX2-NEXT:    popq %r13 -; AVX2-NEXT:    popq %r14 -; AVX2-NEXT:    popq %r15 -; AVX2-NEXT:    popq %rbp -; AVX2-NEXT:    vzeroupper -; AVX2-NEXT:    retq -; -; AVX512-LABEL: complement_ne_i512: -; AVX512:       # %bb.0: -; AVX512-NEXT:    pushq %rbp -; AVX512-NEXT:    pushq %r15 -; AVX512-NEXT:    pushq %r14 -; AVX512-NEXT:    pushq %r13 -; AVX512-NEXT:    pushq %r12 -; AVX512-NEXT:    pushq %rbx -; AVX512-NEXT:    subq $72, %rsp -; AVX512-NEXT:    vxorps %xmm0, %xmm0, %xmm0 -; AVX512-NEXT:    vmovups %ymm0, {{[0-9]+}}(%rsp) -; AVX512-NEXT:    vmovups %ymm0, -{{[0-9]+}}(%rsp) -; AVX512-NEXT:    vmovups %ymm0, -{{[0-9]+}}(%rsp) -; AVX512-NEXT:    vmovaps {{.*#+}} xmm0 = [1,0,0,0] -; AVX512-NEXT:    vmovups %ymm0, (%rsp) -; AVX512-NEXT:    movl %esi, %ecx -; AVX512-NEXT:    andl $63, %ecx -; AVX512-NEXT:    shrl $3, %esi -; AVX512-NEXT:    andl $56, %esi -; AVX512-NEXT:    negl %esi -; AVX512-NEXT:    movslq %esi, %rbx -; AVX512-NEXT:    movq 16(%rsp,%rbx), %rsi -; AVX512-NEXT:    movq 24(%rsp,%rbx), %rbp -; AVX512-NEXT:    movq %rbp, %rax -; AVX512-NEXT:    shldq %cl, %rsi, %rax -; AVX512-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; AVX512-NEXT:    movq 48(%rsp,%rbx), %r8 -; AVX512-NEXT:    movq 56(%rsp,%rbx), %r13 -; AVX512-NEXT:    shldq %cl, %r8, %r13 -; AVX512-NEXT:    movq 32(%rsp,%rbx), %r9 -; AVX512-NEXT:    movq 40(%rsp,%rbx), %r14 -; AVX512-NEXT:    movq %r14, %r10 -; AVX512-NEXT:    shldq %cl, %r9, %r10 -; AVX512-NEXT:    movq 8(%rsp,%rbx), %r11 -; AVX512-NEXT:    shldq %cl, %r11, %rsi -; AVX512-NEXT:    shldq %cl, %r14, %r8 -; AVX512-NEXT:    movq 16(%rdi), %r12 -; AVX512-NEXT:    movq %r12, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; AVX512-NEXT:    movq 48(%rdi), %r14 -; AVX512-NEXT:    movq %r14, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; AVX512-NEXT:    andq %r8, %r14 -; AVX512-NEXT:    andq %rsi, %r12 -; AVX512-NEXT:    orq %r14, %r12 -; AVX512-NEXT:    movq 56(%rdi), %r15 -; AVX512-NEXT:    movq %r15, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; AVX512-NEXT:    andq %r13, %r15 -; AVX512-NEXT:    movq 24(%rdi), %r14 -; AVX512-NEXT:    movq %r14, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; AVX512-NEXT:    andq %rax, %r14 -; AVX512-NEXT:    orq %r15, %r14 -; AVX512-NEXT:    shldq %cl, %rbp, %r9 -; AVX512-NEXT:    movq (%rsp,%rbx), %rdx -; AVX512-NEXT:    movq 32(%rdi), %r15 -; AVX512-NEXT:    movq %r15, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; AVX512-NEXT:    andq %r9, %r15 -; AVX512-NEXT:    shlxq %rcx, %rdx, %rax -; AVX512-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; AVX512-NEXT:    movq (%rdi), %rbx -; AVX512-NEXT:    movq %rbx, %rbp -; AVX512-NEXT:    andq %rax, %rbp -; AVX512-NEXT:    orq %r15, %rbp -; AVX512-NEXT:    orq %r12, %rbp -; AVX512-NEXT:    # kill: def $cl killed $cl killed $rcx -; AVX512-NEXT:    shldq %cl, %rdx, %r11 -; AVX512-NEXT:    movq 40(%rdi), %rax -; AVX512-NEXT:    movq %rax, %rcx -; AVX512-NEXT:    andq %r10, %rcx -; AVX512-NEXT:    movq 8(%rdi), %r15 -; AVX512-NEXT:    movq %r15, %r12 -; AVX512-NEXT:    andq %r11, %r12 -; AVX512-NEXT:    orq %rcx, %r12 -; AVX512-NEXT:    orq %r14, %r12 -; AVX512-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload -; AVX512-NEXT:    xorq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Folded Reload -; AVX512-NEXT:    xorq {{[-0-9]+}}(%r{{[sb]}}p), %rsi # 8-byte Folded Reload -; AVX512-NEXT:    xorq %rax, %r10 -; AVX512-NEXT:    xorq {{[-0-9]+}}(%r{{[sb]}}p), %r9 # 8-byte Folded Reload -; AVX512-NEXT:    xorq {{[-0-9]+}}(%r{{[sb]}}p), %r13 # 8-byte Folded Reload -; AVX512-NEXT:    xorq {{[-0-9]+}}(%r{{[sb]}}p), %r8 # 8-byte Folded Reload -; AVX512-NEXT:    xorq %r15, %r11 -; AVX512-NEXT:    xorq {{[-0-9]+}}(%r{{[sb]}}p), %rbx # 8-byte Folded Reload -; AVX512-NEXT:    orq %rbp, %r12 -; AVX512-NEXT:    movq %r8, 48(%rdi) -; AVX512-NEXT:    movq %r13, 56(%rdi) -; AVX512-NEXT:    movq %r9, 32(%rdi) -; AVX512-NEXT:    movq %r10, 40(%rdi) -; AVX512-NEXT:    movq %rsi, 16(%rdi) -; AVX512-NEXT:    movq %rcx, 24(%rdi) -; AVX512-NEXT:    movq %rbx, (%rdi) -; AVX512-NEXT:    movq %r11, 8(%rdi) -; AVX512-NEXT:    setne %al -; AVX512-NEXT:    addq $72, %rsp -; AVX512-NEXT:    popq %rbx -; AVX512-NEXT:    popq %r12 -; AVX512-NEXT:    popq %r13 -; AVX512-NEXT:    popq %r14 -; AVX512-NEXT:    popq %r15 -; AVX512-NEXT:    popq %rbp -; AVX512-NEXT:    vzeroupper -; AVX512-NEXT:    retq +; X64-LABEL: complement_ne_i512: +; X64:       # %bb.0: +; X64-NEXT:    movl %esi, %ecx +; X64-NEXT:    shrl $3, %ecx +; X64-NEXT:    andl $60, %ecx +; X64-NEXT:    movl (%rdi,%rcx), %edx +; X64-NEXT:    btl %esi, %edx +; X64-NEXT:    setb %al +; X64-NEXT:    btcl %esi, %edx +; X64-NEXT:    movl %edx, (%rdi,%rcx) +; X64-NEXT:    retq    %rem = and i32 %position, 511    %ofs = zext nneg i32 %rem to i512    %bit = shl nuw i512 1, %ofs @@ -2182,606 +894,33 @@ define i1 @complement_ne_i512(ptr %word, i32 %position) nounwind {  define i1 @reset_eq_i512(ptr %word, i32 %position) nounwind {  ; X86-LABEL: reset_eq_i512:  ; X86:       # %bb.0: -; X86-NEXT:    pushl %ebp -; X86-NEXT:    movl %esp, %ebp -; X86-NEXT:    pushl %ebx  ; X86-NEXT:    pushl %edi  ; X86-NEXT:    pushl %esi -; X86-NEXT:    andl $-16, %esp -; X86-NEXT:    subl $288, %esp # imm = 0x120 -; X86-NEXT:    movl 12(%ebp), %ecx -; X86-NEXT:    movl %ecx, %eax -; X86-NEXT:    shrl $3, %eax -; X86-NEXT:    andl $60, %eax -; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    leal {{[0-9]+}}(%esp), %edi -; X86-NEXT:    subl %eax, %edi -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $1, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl 4(%edi), %edx -; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    movl 8(%edi), %eax -; X86-NEXT:    andl $31, %ecx -; X86-NEXT:    movl %eax, %ebx -; X86-NEXT:    shldl %cl, %edx, %ebx -; X86-NEXT:    movl 12(%edi), %edx -; X86-NEXT:    movl %edx, %esi -; X86-NEXT:    shldl %cl, %eax, %esi -; X86-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    movl 16(%edi), %eax -; X86-NEXT:    movl %eax, %esi -; X86-NEXT:    shldl %cl, %edx, %esi -; X86-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    movl 20(%edi), %edx -; X86-NEXT:    movl %edx, %esi -; X86-NEXT:    shldl %cl, %eax, %esi -; X86-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    movl 24(%edi), %eax -; X86-NEXT:    movl %eax, %esi -; X86-NEXT:    shldl %cl, %edx, %esi -; X86-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    movl 28(%edi), %edx +; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx  ; X86-NEXT:    movl %edx, %esi -; X86-NEXT:    shldl %cl, %eax, %esi -; X86-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    movl 32(%edi), %eax -; X86-NEXT:    movl %eax, %esi -; X86-NEXT:    shldl %cl, %edx, %esi -; X86-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    movl 36(%edi), %esi -; X86-NEXT:    movl %esi, %edx -; X86-NEXT:    shldl %cl, %eax, %edx -; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    movl 40(%edi), %edx -; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    shldl %cl, %esi, %edx -; X86-NEXT:    movl 8(%ebp), %esi -; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    movl 40(%esi), %eax -; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    andl %eax, %edx -; X86-NEXT:    movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    movl 8(%esi), %eax -; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    andl %eax, %ebx -; X86-NEXT:    orl %edx, %ebx -; X86-NEXT:    movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    movl 44(%edi), %edx -; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT:    shldl %cl, %eax, %edx -; X86-NEXT:    movl 52(%edi), %esi -; X86-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    movl 56(%edi), %ebx -; X86-NEXT:    movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    # kill: def $cl killed $cl killed $ecx -; X86-NEXT:    shldl %cl, %esi, %ebx -; X86-NEXT:    movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    movl 8(%ebp), %esi -; X86-NEXT:    movl 56(%esi), %eax -; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    andl %eax, %ebx -; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    movl 24(%esi), %ecx -; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    andl %ecx, %eax -; X86-NEXT:    orl %ebx, %eax -; X86-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    movl %esi, %ebx -; X86-NEXT:    movl 44(%esi), %eax -; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    andl %eax, %edx -; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    movl 12(%esi), %ecx -; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    andl %ecx, %eax -; X86-NEXT:    orl %edx, %eax -; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    movl 60(%edi), %eax -; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X86-NEXT:    shldl %cl, %esi, %eax -; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    movl 60(%ebx), %edx -; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    andl %edx, %eax -; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X86-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    movl 28(%ebx), %edx -; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    andl %edx, %esi -; X86-NEXT:    orl %eax, %esi -; X86-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload -; X86-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    movl (%edi), %esi -; X86-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT:    negl %eax -; X86-NEXT:    movl 256(%esp,%eax), %eax -; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X86-NEXT:    shldl %cl, %edi, %eax -; X86-NEXT:    movl %esi, %edi -; X86-NEXT:    # kill: def $cl killed $cl killed $ecx -; X86-NEXT:    shll %cl, %edi -; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    movl %ebx, %esi -; X86-NEXT:    movl 32(%ebx), %ecx -; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    andl %ecx, %edx -; X86-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    movl (%ebx), %ecx -; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    andl %ecx, %edi -; X86-NEXT:    orl %edx, %edi -; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload -; X86-NEXT:    movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    movl 16(%esi), %ecx -; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    andl %ecx, %ebx -; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    movl 48(%esi), %ecx -; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    andl %ecx, %eax -; X86-NEXT:    orl %ebx, %eax -; X86-NEXT:    orl %edi, %eax -; X86-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X86-NEXT:    shldl %cl, %edx, %edi -; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    movl 36(%esi), %ebx -; X86-NEXT:    movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    andl %ebx, %edx -; X86-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    movl 4(%esi), %ebx -; X86-NEXT:    movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    andl %ebx, %edi -; X86-NEXT:    orl %edx, %edi -; X86-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    # kill: def $cl killed $cl killed $ecx -; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X86-NEXT:    shldl %cl, %edi, %edx -; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    movl 20(%esi), %edi -; X86-NEXT:    andl %edi, %ecx -; X86-NEXT:    movl %ecx, %esi -; X86-NEXT:    movl %edx, %ecx -; X86-NEXT:    movl 8(%ebp), %ebx -; X86-NEXT:    movl 52(%ebx), %ebx -; X86-NEXT:    movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    andl %ebx, %edx -; X86-NEXT:    orl %esi, %edx -; X86-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X86-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X86-NEXT:    notl %edx -; X86-NEXT:    andl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X86-NEXT:    notl %edx -; X86-NEXT:    andl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X86-NEXT:    notl %edx -; X86-NEXT:    andl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload -; X86-NEXT:    notl %ebx -; X86-NEXT:    andl %edi, %ebx -; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X86-NEXT:    notl %edx -; X86-NEXT:    andl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X86-NEXT:    notl %edx -; X86-NEXT:    andl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X86-NEXT:    notl %edx -; X86-NEXT:    andl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X86-NEXT:    notl %edx -; X86-NEXT:    andl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X86-NEXT:    notl %edx -; X86-NEXT:    andl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X86-NEXT:    notl %edx -; X86-NEXT:    andl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X86-NEXT:    notl %esi -; X86-NEXT:    andl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload -; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X86-NEXT:    notl %edx -; X86-NEXT:    andl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X86-NEXT:    notl %edi -; X86-NEXT:    andl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload -; X86-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X86-NEXT:    notl %edi -; X86-NEXT:    andl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload -; X86-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X86-NEXT:    notl %edi -; X86-NEXT:    andl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload -; X86-NEXT:    notl %ecx -; X86-NEXT:    andl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload -; X86-NEXT:    orl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill -; X86-NEXT:    movl 8(%ebp), %eax -; X86-NEXT:    movl %edx, 60(%eax) -; X86-NEXT:    movl %esi, 56(%eax) -; X86-NEXT:    movl %ecx, 52(%eax) -; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X86-NEXT:    movl %ecx, 44(%eax) -; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X86-NEXT:    movl %ecx, 40(%eax) -; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X86-NEXT:    movl %ecx, 36(%eax) -; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X86-NEXT:    movl %ecx, 32(%eax) -; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X86-NEXT:    movl %ecx, 28(%eax) -; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X86-NEXT:    movl %ecx, 24(%eax) -; X86-NEXT:    movl %ebx, 20(%eax) -; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X86-NEXT:    movl %ecx, 16(%eax) -; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X86-NEXT:    movl %ecx, 12(%eax) -; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X86-NEXT:    movl %ecx, 8(%eax) -; X86-NEXT:    movl %edi, 4(%eax) -; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X86-NEXT:    movl %ecx, (%eax) -; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X86-NEXT:    movl %ecx, 48(%eax) -; X86-NEXT:    sete %al -; X86-NEXT:    leal -12(%ebp), %esp +; X86-NEXT:    shrl $3, %esi +; X86-NEXT:    andl $60, %esi +; X86-NEXT:    movl (%ecx,%esi), %edi +; X86-NEXT:    btl %edx, %edi +; X86-NEXT:    setae %al +; X86-NEXT:    btrl %edx, %edi +; X86-NEXT:    movl %edi, (%ecx,%esi)  ; X86-NEXT:    popl %esi  ; X86-NEXT:    popl %edi -; X86-NEXT:    popl %ebx -; X86-NEXT:    popl %ebp  ; X86-NEXT:    retl  ; -; SSE-LABEL: reset_eq_i512: -; SSE:       # %bb.0: -; SSE-NEXT:    pushq %rbp -; SSE-NEXT:    pushq %r15 -; SSE-NEXT:    pushq %r14 -; SSE-NEXT:    pushq %r13 -; SSE-NEXT:    pushq %r12 -; SSE-NEXT:    pushq %rbx -; SSE-NEXT:    subq $56, %rsp -; SSE-NEXT:    xorps %xmm0, %xmm0 -; SSE-NEXT:    movups %xmm0, {{[0-9]+}}(%rsp) -; SSE-NEXT:    movups %xmm0, {{[0-9]+}}(%rsp) -; SSE-NEXT:    movups %xmm0, -{{[0-9]+}}(%rsp) -; SSE-NEXT:    movaps %xmm0, -{{[0-9]+}}(%rsp) -; SSE-NEXT:    movaps %xmm0, -{{[0-9]+}}(%rsp) -; SSE-NEXT:    movaps %xmm0, -{{[0-9]+}}(%rsp) -; SSE-NEXT:    movaps %xmm0, -{{[0-9]+}}(%rsp) -; SSE-NEXT:    movq $0, {{[0-9]+}}(%rsp) -; SSE-NEXT:    movq $1, -{{[0-9]+}}(%rsp) -; SSE-NEXT:    movl %esi, %ecx -; SSE-NEXT:    andl $63, %ecx -; SSE-NEXT:    shrl $3, %esi -; SSE-NEXT:    andl $56, %esi -; SSE-NEXT:    negl %esi -; SSE-NEXT:    movslq %esi, %rdx -; SSE-NEXT:    movq (%rsp,%rdx), %r9 -; SSE-NEXT:    movq 8(%rsp,%rdx), %r8 -; SSE-NEXT:    movq %r8, %rsi -; SSE-NEXT:    shldq %cl, %r9, %rsi -; SSE-NEXT:    movq -8(%rsp,%rdx), %rax -; SSE-NEXT:    shldq %cl, %rax, %r9 -; SSE-NEXT:    movq 16(%rsp,%rdx), %r14 -; SSE-NEXT:    movq 24(%rsp,%rdx), %r10 -; SSE-NEXT:    movq %r10, %rbx -; SSE-NEXT:    shldq %cl, %r14, %rbx -; SSE-NEXT:    shldq %cl, %r8, %r14 -; SSE-NEXT:    movq 32(%rsp,%rdx), %r13 -; SSE-NEXT:    movq 40(%rsp,%rdx), %r12 -; SSE-NEXT:    shldq %cl, %r13, %r12 -; SSE-NEXT:    shldq %cl, %r10, %r13 -; SSE-NEXT:    movq -16(%rsp,%rdx), %rdx -; SSE-NEXT:    shldq %cl, %rdx, %rax -; SSE-NEXT:    # kill: def $cl killed $cl killed $ecx -; SSE-NEXT:    shlq %cl, %rdx -; SSE-NEXT:    movq %r13, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; SSE-NEXT:    movq %r12, %rbp -; SSE-NEXT:    movq %r9, %r15 -; SSE-NEXT:    movq %rsi, %r11 -; SSE-NEXT:    movq 16(%rdi), %r8 -; SSE-NEXT:    movq %r8, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; SSE-NEXT:    movq 48(%rdi), %rcx -; SSE-NEXT:    movq %rcx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; SSE-NEXT:    andq %rcx, %r13 -; SSE-NEXT:    andq %r8, %r9 -; SSE-NEXT:    orq %r13, %r9 -; SSE-NEXT:    movq 56(%rdi), %rcx -; SSE-NEXT:    movq %rcx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; SSE-NEXT:    andq %rcx, %r12 -; SSE-NEXT:    movq 24(%rdi), %r10 -; SSE-NEXT:    andq %r10, %rsi -; SSE-NEXT:    orq %r12, %rsi -; SSE-NEXT:    movq %r14, %r13 -; SSE-NEXT:    movq 32(%rdi), %rcx -; SSE-NEXT:    movq %rcx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; SSE-NEXT:    andq %rcx, %r14 -; SSE-NEXT:    movq %rdx, %r12 -; SSE-NEXT:    movq (%rdi), %rcx -; SSE-NEXT:    movq %rcx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; SSE-NEXT:    andq %rcx, %rdx -; SSE-NEXT:    orq %r14, %rdx -; SSE-NEXT:    orq %r9, %rdx -; SSE-NEXT:    movq %rbx, %r14 -; SSE-NEXT:    movq 40(%rdi), %rcx -; SSE-NEXT:    andq %rcx, %rbx -; SSE-NEXT:    movq %rax, %r9 -; SSE-NEXT:    movq 8(%rdi), %r8 -; SSE-NEXT:    andq %r8, %rax -; SSE-NEXT:    orq %rbx, %rax -; SSE-NEXT:    orq %rsi, %rax -; SSE-NEXT:    notq %r11 -; SSE-NEXT:    andq %r10, %r11 -; SSE-NEXT:    notq %r15 -; SSE-NEXT:    andq {{[-0-9]+}}(%r{{[sb]}}p), %r15 # 8-byte Folded Reload -; SSE-NEXT:    notq %r14 -; SSE-NEXT:    andq %rcx, %r14 -; SSE-NEXT:    notq %r13 -; SSE-NEXT:    andq {{[-0-9]+}}(%r{{[sb]}}p), %r13 # 8-byte Folded Reload -; SSE-NEXT:    notq %rbp -; SSE-NEXT:    andq {{[-0-9]+}}(%r{{[sb]}}p), %rbp # 8-byte Folded Reload -; SSE-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload -; SSE-NEXT:    notq %rcx -; SSE-NEXT:    andq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Folded Reload -; SSE-NEXT:    notq %r9 -; SSE-NEXT:    andq %r8, %r9 -; SSE-NEXT:    notq %r12 -; SSE-NEXT:    andq {{[-0-9]+}}(%r{{[sb]}}p), %r12 # 8-byte Folded Reload -; SSE-NEXT:    orq %rdx, %rax -; SSE-NEXT:    movq %rcx, 48(%rdi) -; SSE-NEXT:    movq %rbp, 56(%rdi) -; SSE-NEXT:    movq %r13, 32(%rdi) -; SSE-NEXT:    movq %r14, 40(%rdi) -; SSE-NEXT:    movq %r15, 16(%rdi) -; SSE-NEXT:    movq %r11, 24(%rdi) -; SSE-NEXT:    movq %r12, (%rdi) -; SSE-NEXT:    movq %r9, 8(%rdi) -; SSE-NEXT:    sete %al -; SSE-NEXT:    addq $56, %rsp -; SSE-NEXT:    popq %rbx -; SSE-NEXT:    popq %r12 -; SSE-NEXT:    popq %r13 -; SSE-NEXT:    popq %r14 -; SSE-NEXT:    popq %r15 -; SSE-NEXT:    popq %rbp -; SSE-NEXT:    retq -; -; AVX2-LABEL: reset_eq_i512: -; AVX2:       # %bb.0: -; AVX2-NEXT:    pushq %rbp -; AVX2-NEXT:    pushq %r15 -; AVX2-NEXT:    pushq %r14 -; AVX2-NEXT:    pushq %r13 -; AVX2-NEXT:    pushq %r12 -; AVX2-NEXT:    pushq %rbx -; AVX2-NEXT:    pushq %rax -; AVX2-NEXT:    vxorps %xmm0, %xmm0, %xmm0 -; AVX2-NEXT:    vmovups %ymm0, -{{[0-9]+}}(%rsp) -; AVX2-NEXT:    vmovups %ymm0, -{{[0-9]+}}(%rsp) -; AVX2-NEXT:    vmovups %ymm0, -{{[0-9]+}}(%rsp) -; AVX2-NEXT:    vmovss {{.*#+}} xmm0 = [1,0,0,0] -; AVX2-NEXT:    vmovups %ymm0, -{{[0-9]+}}(%rsp) -; AVX2-NEXT:    movl %esi, %ecx -; AVX2-NEXT:    andl $63, %ecx -; AVX2-NEXT:    shrl $3, %esi -; AVX2-NEXT:    andl $56, %esi -; AVX2-NEXT:    negl %esi -; AVX2-NEXT:    movslq %esi, %rdx -; AVX2-NEXT:    movq -48(%rsp,%rdx), %r8 -; AVX2-NEXT:    movq -40(%rsp,%rdx), %rbx -; AVX2-NEXT:    movq %rbx, %rax -; AVX2-NEXT:    shldq %cl, %r8, %rax -; AVX2-NEXT:    movq -16(%rsp,%rdx), %r10 -; AVX2-NEXT:    movq -8(%rsp,%rdx), %rsi -; AVX2-NEXT:    shldq %cl, %r10, %rsi -; AVX2-NEXT:    movq -32(%rsp,%rdx), %r11 -; AVX2-NEXT:    movq -24(%rsp,%rdx), %r14 -; AVX2-NEXT:    movq %r14, %r9 -; AVX2-NEXT:    shldq %cl, %r11, %r9 -; AVX2-NEXT:    movq -64(%rsp,%rdx), %r15 -; AVX2-NEXT:    movq -56(%rsp,%rdx), %rdx -; AVX2-NEXT:    shldq %cl, %rdx, %r8 -; AVX2-NEXT:    shldq %cl, %r14, %r10 -; AVX2-NEXT:    shldq %cl, %rbx, %r11 -; AVX2-NEXT:    shldq %cl, %r15, %rdx -; AVX2-NEXT:    shlxq %rcx, %r15, %rcx -; AVX2-NEXT:    movq 24(%rdi), %rbx -; AVX2-NEXT:    movq 56(%rdi), %r14 -; AVX2-NEXT:    movq 16(%rdi), %r15 -; AVX2-NEXT:    movq 48(%rdi), %r13 -; AVX2-NEXT:    movq 32(%rdi), %rbp -; AVX2-NEXT:    andnq %rbp, %r11, %r12 -; AVX2-NEXT:    andq %r11, %rbp -; AVX2-NEXT:    andnq %r13, %r10, %r11 -; AVX2-NEXT:    andq %r10, %r13 -; AVX2-NEXT:    andnq %r15, %r8, %r10 -; AVX2-NEXT:    andq %r8, %r15 -; AVX2-NEXT:    movq 40(%rdi), %r8 -; AVX2-NEXT:    orq %r13, %r15 -; AVX2-NEXT:    andnq %r8, %r9, %r13 -; AVX2-NEXT:    andq %r9, %r8 -; AVX2-NEXT:    andnq %r14, %rsi, %r9 -; AVX2-NEXT:    andq %rsi, %r14 -; AVX2-NEXT:    andnq %rbx, %rax, %rsi -; AVX2-NEXT:    andq %rax, %rbx -; AVX2-NEXT:    movq (%rdi), %rax -; AVX2-NEXT:    orq %r14, %rbx -; AVX2-NEXT:    andnq %rax, %rcx, %r14 -; AVX2-NEXT:    andq %rcx, %rax -; AVX2-NEXT:    orq %rbp, %rax -; AVX2-NEXT:    movq 8(%rdi), %rcx -; AVX2-NEXT:    orq %r15, %rax -; AVX2-NEXT:    andnq %rcx, %rdx, %r15 -; AVX2-NEXT:    andq %rdx, %rcx -; AVX2-NEXT:    orq %r8, %rcx -; AVX2-NEXT:    orq %rbx, %rcx -; AVX2-NEXT:    orq %rax, %rcx -; AVX2-NEXT:    movq %r11, 48(%rdi) -; AVX2-NEXT:    movq %r9, 56(%rdi) -; AVX2-NEXT:    movq %r12, 32(%rdi) -; AVX2-NEXT:    movq %r13, 40(%rdi) -; AVX2-NEXT:    movq %r10, 16(%rdi) -; AVX2-NEXT:    movq %rsi, 24(%rdi) -; AVX2-NEXT:    movq %r14, (%rdi) -; AVX2-NEXT:    movq %r15, 8(%rdi) -; AVX2-NEXT:    sete %al -; AVX2-NEXT:    addq $8, %rsp -; AVX2-NEXT:    popq %rbx -; AVX2-NEXT:    popq %r12 -; AVX2-NEXT:    popq %r13 -; AVX2-NEXT:    popq %r14 -; AVX2-NEXT:    popq %r15 -; AVX2-NEXT:    popq %rbp -; AVX2-NEXT:    vzeroupper -; AVX2-NEXT:    retq -; -; AVX512-LABEL: reset_eq_i512: -; AVX512:       # %bb.0: -; AVX512-NEXT:    pushq %rbp -; AVX512-NEXT:    pushq %r15 -; AVX512-NEXT:    pushq %r14 -; AVX512-NEXT:    pushq %r13 -; AVX512-NEXT:    pushq %r12 -; AVX512-NEXT:    pushq %rbx -; AVX512-NEXT:    pushq %rax -; AVX512-NEXT:    vxorps %xmm0, %xmm0, %xmm0 -; AVX512-NEXT:    vmovups %ymm0, -{{[0-9]+}}(%rsp) -; AVX512-NEXT:    vmovups %ymm0, -{{[0-9]+}}(%rsp) -; AVX512-NEXT:    vmovups %ymm0, -{{[0-9]+}}(%rsp) -; AVX512-NEXT:    vmovaps {{.*#+}} xmm0 = [1,0,0,0] -; AVX512-NEXT:    vmovups %ymm0, -{{[0-9]+}}(%rsp) -; AVX512-NEXT:    movl %esi, %ecx -; AVX512-NEXT:    andl $63, %ecx -; AVX512-NEXT:    shrl $3, %esi -; AVX512-NEXT:    andl $56, %esi -; AVX512-NEXT:    negl %esi -; AVX512-NEXT:    movslq %esi, %rbx -; AVX512-NEXT:    movq -48(%rsp,%rbx), %r8 -; AVX512-NEXT:    movq -40(%rsp,%rbx), %r14 -; AVX512-NEXT:    movq %r14, %rax -; AVX512-NEXT:    shldq %cl, %r8, %rax -; AVX512-NEXT:    movq -16(%rsp,%rbx), %r10 -; AVX512-NEXT:    movq -8(%rsp,%rbx), %rsi -; AVX512-NEXT:    shldq %cl, %r10, %rsi -; AVX512-NEXT:    movq -32(%rsp,%rbx), %r11 -; AVX512-NEXT:    movq -24(%rsp,%rbx), %r15 -; AVX512-NEXT:    movq %r15, %r9 -; AVX512-NEXT:    shldq %cl, %r11, %r9 -; AVX512-NEXT:    movq -56(%rsp,%rbx), %rdx -; AVX512-NEXT:    shldq %cl, %rdx, %r8 -; AVX512-NEXT:    shldq %cl, %r15, %r10 -; AVX512-NEXT:    shldq %cl, %r14, %r11 -; AVX512-NEXT:    movq -64(%rsp,%rbx), %rbx -; AVX512-NEXT:    shldq %cl, %rbx, %rdx -; AVX512-NEXT:    shlxq %rcx, %rbx, %rcx -; AVX512-NEXT:    movq 24(%rdi), %rbx -; AVX512-NEXT:    movq 56(%rdi), %r14 -; AVX512-NEXT:    movq 16(%rdi), %r15 -; AVX512-NEXT:    movq 48(%rdi), %r13 -; AVX512-NEXT:    movq 32(%rdi), %rbp -; AVX512-NEXT:    andnq %rbp, %r11, %r12 -; AVX512-NEXT:    andq %r11, %rbp -; AVX512-NEXT:    andnq %r13, %r10, %r11 -; AVX512-NEXT:    andq %r10, %r13 -; AVX512-NEXT:    andnq %r15, %r8, %r10 -; AVX512-NEXT:    andq %r8, %r15 -; AVX512-NEXT:    movq 40(%rdi), %r8 -; AVX512-NEXT:    orq %r13, %r15 -; AVX512-NEXT:    andnq %r8, %r9, %r13 -; AVX512-NEXT:    andq %r9, %r8 -; AVX512-NEXT:    andnq %r14, %rsi, %r9 -; AVX512-NEXT:    andq %rsi, %r14 -; AVX512-NEXT:    andnq %rbx, %rax, %rsi -; AVX512-NEXT:    andq %rax, %rbx -; AVX512-NEXT:    movq (%rdi), %rax -; AVX512-NEXT:    orq %r14, %rbx -; AVX512-NEXT:    andnq %rax, %rcx, %r14 -; AVX512-NEXT:    andq %rcx, %rax -; AVX512-NEXT:    orq %rbp, %rax -; AVX512-NEXT:    movq 8(%rdi), %rcx -; AVX512-NEXT:    orq %r15, %rax -; AVX512-NEXT:    andnq %rcx, %rdx, %r15 -; AVX512-NEXT:    andq %rdx, %rcx -; AVX512-NEXT:    orq %r8, %rcx -; AVX512-NEXT:    orq %rbx, %rcx -; AVX512-NEXT:    orq %rax, %rcx -; AVX512-NEXT:    movq %r11, 48(%rdi) -; AVX512-NEXT:    movq %r9, 56(%rdi) -; AVX512-NEXT:    movq %r12, 32(%rdi) -; AVX512-NEXT:    movq %r13, 40(%rdi) -; AVX512-NEXT:    movq %r10, 16(%rdi) -; AVX512-NEXT:    movq %rsi, 24(%rdi) -; AVX512-NEXT:    movq %r14, (%rdi) -; AVX512-NEXT:    movq %r15, 8(%rdi) -; AVX512-NEXT:    sete %al -; AVX512-NEXT:    addq $8, %rsp -; AVX512-NEXT:    popq %rbx -; AVX512-NEXT:    popq %r12 -; AVX512-NEXT:    popq %r13 -; AVX512-NEXT:    popq %r14 -; AVX512-NEXT:    popq %r15 -; AVX512-NEXT:    popq %rbp -; AVX512-NEXT:    vzeroupper -; AVX512-NEXT:    retq +; X64-LABEL: reset_eq_i512: +; X64:       # %bb.0: +; X64-NEXT:    movl %esi, %ecx +; X64-NEXT:    shrl $3, %ecx +; X64-NEXT:    andl $60, %ecx +; X64-NEXT:    movl (%rdi,%rcx), %edx +; X64-NEXT:    btl %esi, %edx +; X64-NEXT:    setae %al +; X64-NEXT:    btrl %esi, %edx +; X64-NEXT:    movl %edx, (%rdi,%rcx) +; X64-NEXT:    retq    %rem = and i32 %position, 511    %ofs = zext nneg i32 %rem to i512    %bit = shl nuw i512 1, %ofs @@ -2797,572 +936,33 @@ define i1 @reset_eq_i512(ptr %word, i32 %position) nounwind {  define i1 @set_ne_i512(ptr %word, i32 %position) nounwind {  ; X86-LABEL: set_ne_i512:  ; X86:       # %bb.0: -; X86-NEXT:    pushl %ebp -; X86-NEXT:    movl %esp, %ebp -; X86-NEXT:    pushl %ebx  ; X86-NEXT:    pushl %edi  ; X86-NEXT:    pushl %esi -; X86-NEXT:    andl $-16, %esp -; X86-NEXT:    subl $272, %esp # imm = 0x110 -; X86-NEXT:    movl 12(%ebp), %ecx -; X86-NEXT:    movl %ecx, %eax -; X86-NEXT:    shrl $3, %eax -; X86-NEXT:    andl $60, %eax -; X86-NEXT:    movl %eax, (%esp) # 4-byte Spill -; X86-NEXT:    leal {{[0-9]+}}(%esp), %edx -; X86-NEXT:    subl %eax, %edx -; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $1, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl 24(%edx), %eax -; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    movl 28(%edx), %esi -; X86-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    andl $31, %ecx -; X86-NEXT:    shldl %cl, %eax, %esi -; X86-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    movl 56(%edx), %eax -; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    movl 60(%edx), %esi -; X86-NEXT:    shldl %cl, %eax, %esi -; X86-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    movl 8(%edx), %eax -; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    movl 12(%edx), %esi -; X86-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    shldl %cl, %eax, %esi -; X86-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    movl 40(%edx), %eax -; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    movl 44(%edx), %esi -; X86-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    shldl %cl, %eax, %esi -; X86-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    movl 16(%edx), %eax -; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    movl 20(%edx), %ebx -; X86-NEXT:    movl %ebx, %esi -; X86-NEXT:    shldl %cl, %eax, %esi -; X86-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    movl 32(%edx), %edi -; X86-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    movl 36(%edx), %esi -; X86-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    shldl %cl, %edi, %esi -; X86-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    shldl %cl, %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill -; X86-NEXT:    movl 52(%edx), %esi -; X86-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X86-NEXT:    shldl %cl, %esi, %edi -; X86-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    movl 4(%edx), %eax -; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload -; X86-NEXT:    shldl %cl, %eax, %ebx -; X86-NEXT:    movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT:    shldl %cl, %eax, %esi -; X86-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    movl 8(%ebp), %edx -; X86-NEXT:    movl 40(%edx), %eax -; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    andl %esi, %eax -; X86-NEXT:    movl 8(%edx), %esi -; X86-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    andl %ebx, %esi -; X86-NEXT:    orl %eax, %esi -; X86-NEXT:    movl %edx, %eax -; X86-NEXT:    movl 56(%edx), %ebx -; X86-NEXT:    movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    andl %edi, %ebx -; X86-NEXT:    movl 24(%edx), %edx -; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    andl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X86-NEXT:    orl %ebx, %edx -; X86-NEXT:    orl %esi, %edx -; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    movl 44(%eax), %esi -; X86-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    andl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload -; X86-NEXT:    movl 12(%eax), %ebx -; X86-NEXT:    movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    andl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload -; X86-NEXT:    orl %esi, %ebx -; X86-NEXT:    movl 60(%eax), %edx -; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    movl %eax, %esi -; X86-NEXT:    movl %edx, %eax -; X86-NEXT:    andl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X86-NEXT:    movl 28(%esi), %edx -; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    andl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X86-NEXT:    orl %eax, %edx -; X86-NEXT:    orl %ebx, %edx -; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT:    movl (%eax), %edx -; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload -; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT:    shldl %cl, %eax, %ebx -; X86-NEXT:    movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT:    shldl %cl, %eax, %edi -; X86-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    movl (%esp), %eax # 4-byte Reload -; X86-NEXT:    negl %eax -; X86-NEXT:    movl 240(%esp,%eax), %esi -; X86-NEXT:    shldl %cl, %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill -; X86-NEXT:    shldl %cl, %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill -; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT:    shldl %cl, %eax, %esi -; X86-NEXT:    movl %esi, (%esp) # 4-byte Spill -; X86-NEXT:    # kill: def $cl killed $cl killed $ecx -; X86-NEXT:    shll %cl, %edx -; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    movl 8(%ebp), %esi -; X86-NEXT:    movl 32(%esi), %eax -; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    andl %edi, %eax -; X86-NEXT:    movl (%esi), %ecx -; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    andl %edx, %ecx -; X86-NEXT:    orl %eax, %ecx -; X86-NEXT:    movl 16(%esi), %eax -; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    andl %ebx, %eax -; X86-NEXT:    movl 48(%esi), %edx -; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    andl (%esp), %edx # 4-byte Folded Reload -; X86-NEXT:    orl %eax, %edx -; X86-NEXT:    orl %ecx, %edx -; X86-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X86-NEXT:    movl 36(%esi), %ebx -; X86-NEXT:    movl %ebx, %eax -; X86-NEXT:    andl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X86-NEXT:    movl 4(%esi), %ecx -; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    andl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload -; X86-NEXT:    orl %eax, %ecx -; X86-NEXT:    movl %esi, %eax -; X86-NEXT:    movl 20(%esi), %edi -; X86-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X86-NEXT:    andl %esi, %edi -; X86-NEXT:    movl 52(%eax), %eax -; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    andl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X86-NEXT:    orl %edi, %eax -; X86-NEXT:    orl %ecx, %eax -; X86-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X86-NEXT:    orl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill -; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X86-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload -; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X86-NEXT:    orl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill -; X86-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload -; X86-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X86-NEXT:    orl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill -; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X86-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload -; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X86-NEXT:    orl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill -; X86-NEXT:    orl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill -; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X86-NEXT:    orl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill -; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X86-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload -; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X86-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload -; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload -; X86-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload -; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X86-NEXT:    orl %ecx, (%esp) # 4-byte Folded Spill -; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X86-NEXT:    orl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill -; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X86-NEXT:    orl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill -; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X86-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload -; X86-NEXT:    orl %edx, %eax -; X86-NEXT:    movl 8(%ebp), %edx -; X86-NEXT:    movl %ebx, 60(%edx) -; X86-NEXT:    movl %edi, 56(%edx) -; X86-NEXT:    movl %ecx, 52(%edx) -; X86-NEXT:    movl %esi, 44(%edx) -; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT:    movl %eax, 40(%edx) -; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT:    movl %eax, 36(%edx) -; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT:    movl %eax, 32(%edx) -; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT:    movl %eax, 28(%edx) -; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT:    movl %eax, 24(%edx) -; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT:    movl %eax, 20(%edx) -; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT:    movl %eax, 16(%edx) -; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT:    movl %eax, 12(%edx) -; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT:    movl %eax, 8(%edx) -; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT:    movl %eax, 4(%edx) -; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT:    movl %eax, (%edx) -; X86-NEXT:    movl (%esp), %eax # 4-byte Reload -; X86-NEXT:    movl %eax, 48(%edx) -; X86-NEXT:    setne %al -; X86-NEXT:    leal -12(%ebp), %esp +; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx +; X86-NEXT:    movl %edx, %esi +; X86-NEXT:    shrl $3, %esi +; X86-NEXT:    andl $60, %esi +; X86-NEXT:    movl (%ecx,%esi), %edi +; X86-NEXT:    btl %edx, %edi +; X86-NEXT:    setb %al +; X86-NEXT:    btsl %edx, %edi +; X86-NEXT:    movl %edi, (%ecx,%esi)  ; X86-NEXT:    popl %esi  ; X86-NEXT:    popl %edi -; X86-NEXT:    popl %ebx -; X86-NEXT:    popl %ebp  ; X86-NEXT:    retl  ; -; SSE-LABEL: set_ne_i512: -; SSE:       # %bb.0: -; SSE-NEXT:    pushq %rbp -; SSE-NEXT:    pushq %r15 -; SSE-NEXT:    pushq %r14 -; SSE-NEXT:    pushq %r13 -; SSE-NEXT:    pushq %r12 -; SSE-NEXT:    pushq %rbx -; SSE-NEXT:    subq $56, %rsp -; SSE-NEXT:    xorps %xmm0, %xmm0 -; SSE-NEXT:    movups %xmm0, {{[0-9]+}}(%rsp) -; SSE-NEXT:    movups %xmm0, {{[0-9]+}}(%rsp) -; SSE-NEXT:    movups %xmm0, -{{[0-9]+}}(%rsp) -; SSE-NEXT:    movaps %xmm0, -{{[0-9]+}}(%rsp) -; SSE-NEXT:    movaps %xmm0, -{{[0-9]+}}(%rsp) -; SSE-NEXT:    movaps %xmm0, -{{[0-9]+}}(%rsp) -; SSE-NEXT:    movaps %xmm0, -{{[0-9]+}}(%rsp) -; SSE-NEXT:    movq $0, {{[0-9]+}}(%rsp) -; SSE-NEXT:    movq $1, -{{[0-9]+}}(%rsp) -; SSE-NEXT:    movl %esi, %ecx -; SSE-NEXT:    andl $63, %ecx -; SSE-NEXT:    shrl $3, %esi -; SSE-NEXT:    andl $56, %esi -; SSE-NEXT:    negl %esi -; SSE-NEXT:    movslq %esi, %rbx -; SSE-NEXT:    movq (%rsp,%rbx), %rsi -; SSE-NEXT:    movq 8(%rsp,%rbx), %r14 -; SSE-NEXT:    movq %r14, %rax -; SSE-NEXT:    shldq %cl, %rsi, %rax -; SSE-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; SSE-NEXT:    movq 32(%rsp,%rbx), %r8 -; SSE-NEXT:    movq 40(%rsp,%rbx), %rbp -; SSE-NEXT:    shldq %cl, %r8, %rbp -; SSE-NEXT:    movq 16(%rsp,%rbx), %r9 -; SSE-NEXT:    movq 24(%rsp,%rbx), %r15 -; SSE-NEXT:    movq %r15, %r10 -; SSE-NEXT:    shldq %cl, %r9, %r10 -; SSE-NEXT:    movq -8(%rsp,%rbx), %r11 -; SSE-NEXT:    shldq %cl, %r11, %rsi -; SSE-NEXT:    shldq %cl, %r15, %r8 -; SSE-NEXT:    shldq %cl, %r14, %r9 -; SSE-NEXT:    movq -16(%rsp,%rbx), %rbx -; SSE-NEXT:    shldq %cl, %rbx, %r11 -; SSE-NEXT:    # kill: def $cl killed $cl killed $ecx -; SSE-NEXT:    shlq %cl, %rbx -; SSE-NEXT:    movq 24(%rdi), %r15 -; SSE-NEXT:    movq %r15, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; SSE-NEXT:    movq 56(%rdi), %rcx -; SSE-NEXT:    movq %rcx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; SSE-NEXT:    movq 16(%rdi), %r12 -; SSE-NEXT:    movq %r12, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; SSE-NEXT:    movq 48(%rdi), %r13 -; SSE-NEXT:    movq %r13, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; SSE-NEXT:    andq %r8, %r13 -; SSE-NEXT:    andq %rsi, %r12 -; SSE-NEXT:    orq %r13, %r12 -; SSE-NEXT:    movq %rcx, %r13 -; SSE-NEXT:    andq %rbp, %r13 -; SSE-NEXT:    andq %rax, %r15 -; SSE-NEXT:    orq %r13, %r15 -; SSE-NEXT:    movq 32(%rdi), %r14 -; SSE-NEXT:    movq %r14, %rcx -; SSE-NEXT:    andq %r9, %rcx -; SSE-NEXT:    movq (%rdi), %r13 -; SSE-NEXT:    movq %r13, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; SSE-NEXT:    andq %rbx, %r13 -; SSE-NEXT:    orq %rcx, %r13 -; SSE-NEXT:    orq %r12, %r13 -; SSE-NEXT:    movq 40(%rdi), %rcx -; SSE-NEXT:    movq %rcx, %r12 -; SSE-NEXT:    andq %r10, %r12 -; SSE-NEXT:    movq 8(%rdi), %rdx -; SSE-NEXT:    movq %rdx, %rax -; SSE-NEXT:    andq %r11, %rax -; SSE-NEXT:    orq %r12, %rax -; SSE-NEXT:    orq %r15, %rax -; SSE-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r15 # 8-byte Reload -; SSE-NEXT:    orq {{[-0-9]+}}(%r{{[sb]}}p), %r15 # 8-byte Folded Reload -; SSE-NEXT:    orq {{[-0-9]+}}(%r{{[sb]}}p), %rsi # 8-byte Folded Reload -; SSE-NEXT:    orq %rcx, %r10 -; SSE-NEXT:    orq %r14, %r9 -; SSE-NEXT:    orq {{[-0-9]+}}(%r{{[sb]}}p), %rbp # 8-byte Folded Reload -; SSE-NEXT:    orq {{[-0-9]+}}(%r{{[sb]}}p), %r8 # 8-byte Folded Reload -; SSE-NEXT:    orq %rdx, %r11 -; SSE-NEXT:    orq {{[-0-9]+}}(%r{{[sb]}}p), %rbx # 8-byte Folded Reload -; SSE-NEXT:    orq %r13, %rax -; SSE-NEXT:    movq %r8, 48(%rdi) -; SSE-NEXT:    movq %rbp, 56(%rdi) -; SSE-NEXT:    movq %r9, 32(%rdi) -; SSE-NEXT:    movq %r10, 40(%rdi) -; SSE-NEXT:    movq %rsi, 16(%rdi) -; SSE-NEXT:    movq %r15, 24(%rdi) -; SSE-NEXT:    movq %rbx, (%rdi) -; SSE-NEXT:    movq %r11, 8(%rdi) -; SSE-NEXT:    setne %al -; SSE-NEXT:    addq $56, %rsp -; SSE-NEXT:    popq %rbx -; SSE-NEXT:    popq %r12 -; SSE-NEXT:    popq %r13 -; SSE-NEXT:    popq %r14 -; SSE-NEXT:    popq %r15 -; SSE-NEXT:    popq %rbp -; SSE-NEXT:    retq -; -; AVX2-LABEL: set_ne_i512: -; AVX2:       # %bb.0: -; AVX2-NEXT:    pushq %rbp -; AVX2-NEXT:    pushq %r15 -; AVX2-NEXT:    pushq %r14 -; AVX2-NEXT:    pushq %r13 -; AVX2-NEXT:    pushq %r12 -; AVX2-NEXT:    pushq %rbx -; AVX2-NEXT:    subq $72, %rsp -; AVX2-NEXT:    vxorps %xmm0, %xmm0, %xmm0 -; AVX2-NEXT:    vmovups %ymm0, {{[0-9]+}}(%rsp) -; AVX2-NEXT:    vmovups %ymm0, -{{[0-9]+}}(%rsp) -; AVX2-NEXT:    vmovups %ymm0, -{{[0-9]+}}(%rsp) -; AVX2-NEXT:    vmovss {{.*#+}} xmm0 = [1,0,0,0] -; AVX2-NEXT:    vmovups %ymm0, (%rsp) -; AVX2-NEXT:    movl %esi, %ecx -; AVX2-NEXT:    andl $63, %ecx -; AVX2-NEXT:    shrl $3, %esi -; AVX2-NEXT:    andl $56, %esi -; AVX2-NEXT:    negl %esi -; AVX2-NEXT:    movslq %esi, %rbx -; AVX2-NEXT:    movq 16(%rsp,%rbx), %rsi -; AVX2-NEXT:    movq 24(%rsp,%rbx), %rbp -; AVX2-NEXT:    movq %rbp, %rax -; AVX2-NEXT:    shldq %cl, %rsi, %rax -; AVX2-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; AVX2-NEXT:    movq 48(%rsp,%rbx), %r8 -; AVX2-NEXT:    movq 56(%rsp,%rbx), %r13 -; AVX2-NEXT:    shldq %cl, %r8, %r13 -; AVX2-NEXT:    movq 32(%rsp,%rbx), %r9 -; AVX2-NEXT:    movq 40(%rsp,%rbx), %r14 -; AVX2-NEXT:    movq %r14, %r10 -; AVX2-NEXT:    shldq %cl, %r9, %r10 -; AVX2-NEXT:    movq 8(%rsp,%rbx), %r11 -; AVX2-NEXT:    shldq %cl, %r11, %rsi -; AVX2-NEXT:    shldq %cl, %r14, %r8 -; AVX2-NEXT:    movq 16(%rdi), %r12 -; AVX2-NEXT:    movq %r12, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; AVX2-NEXT:    movq 48(%rdi), %r14 -; AVX2-NEXT:    movq %r14, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; AVX2-NEXT:    andq %r8, %r14 -; AVX2-NEXT:    andq %rsi, %r12 -; AVX2-NEXT:    orq %r14, %r12 -; AVX2-NEXT:    movq 56(%rdi), %r15 -; AVX2-NEXT:    movq %r15, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; AVX2-NEXT:    andq %r13, %r15 -; AVX2-NEXT:    movq 24(%rdi), %r14 -; AVX2-NEXT:    movq %r14, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; AVX2-NEXT:    andq %rax, %r14 -; AVX2-NEXT:    orq %r15, %r14 -; AVX2-NEXT:    shldq %cl, %rbp, %r9 -; AVX2-NEXT:    movq (%rsp,%rbx), %rdx -; AVX2-NEXT:    movq 32(%rdi), %r15 -; AVX2-NEXT:    movq %r15, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; AVX2-NEXT:    andq %r9, %r15 -; AVX2-NEXT:    shlxq %rcx, %rdx, %rax -; AVX2-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; AVX2-NEXT:    movq (%rdi), %rbx -; AVX2-NEXT:    movq %rbx, %rbp -; AVX2-NEXT:    andq %rax, %rbp -; AVX2-NEXT:    orq %r15, %rbp -; AVX2-NEXT:    orq %r12, %rbp -; AVX2-NEXT:    # kill: def $cl killed $cl killed $rcx -; AVX2-NEXT:    shldq %cl, %rdx, %r11 -; AVX2-NEXT:    movq 40(%rdi), %rax -; AVX2-NEXT:    movq %rax, %rcx -; AVX2-NEXT:    andq %r10, %rcx -; AVX2-NEXT:    movq 8(%rdi), %r15 -; AVX2-NEXT:    movq %r15, %r12 -; AVX2-NEXT:    andq %r11, %r12 -; AVX2-NEXT:    orq %rcx, %r12 -; AVX2-NEXT:    orq %r14, %r12 -; AVX2-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload -; AVX2-NEXT:    orq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Folded Reload -; AVX2-NEXT:    orq {{[-0-9]+}}(%r{{[sb]}}p), %rsi # 8-byte Folded Reload -; AVX2-NEXT:    orq %rax, %r10 -; AVX2-NEXT:    orq {{[-0-9]+}}(%r{{[sb]}}p), %r9 # 8-byte Folded Reload -; AVX2-NEXT:    orq {{[-0-9]+}}(%r{{[sb]}}p), %r13 # 8-byte Folded Reload -; AVX2-NEXT:    orq {{[-0-9]+}}(%r{{[sb]}}p), %r8 # 8-byte Folded Reload -; AVX2-NEXT:    orq %r15, %r11 -; AVX2-NEXT:    orq {{[-0-9]+}}(%r{{[sb]}}p), %rbx # 8-byte Folded Reload -; AVX2-NEXT:    orq %rbp, %r12 -; AVX2-NEXT:    movq %r8, 48(%rdi) -; AVX2-NEXT:    movq %r13, 56(%rdi) -; AVX2-NEXT:    movq %r9, 32(%rdi) -; AVX2-NEXT:    movq %r10, 40(%rdi) -; AVX2-NEXT:    movq %rsi, 16(%rdi) -; AVX2-NEXT:    movq %rcx, 24(%rdi) -; AVX2-NEXT:    movq %rbx, (%rdi) -; AVX2-NEXT:    movq %r11, 8(%rdi) -; AVX2-NEXT:    setne %al -; AVX2-NEXT:    addq $72, %rsp -; AVX2-NEXT:    popq %rbx -; AVX2-NEXT:    popq %r12 -; AVX2-NEXT:    popq %r13 -; AVX2-NEXT:    popq %r14 -; AVX2-NEXT:    popq %r15 -; AVX2-NEXT:    popq %rbp -; AVX2-NEXT:    vzeroupper -; AVX2-NEXT:    retq -; -; AVX512-LABEL: set_ne_i512: -; AVX512:       # %bb.0: -; AVX512-NEXT:    pushq %rbp -; AVX512-NEXT:    pushq %r15 -; AVX512-NEXT:    pushq %r14 -; AVX512-NEXT:    pushq %r13 -; AVX512-NEXT:    pushq %r12 -; AVX512-NEXT:    pushq %rbx -; AVX512-NEXT:    subq $72, %rsp -; AVX512-NEXT:    vxorps %xmm0, %xmm0, %xmm0 -; AVX512-NEXT:    vmovups %ymm0, {{[0-9]+}}(%rsp) -; AVX512-NEXT:    vmovups %ymm0, -{{[0-9]+}}(%rsp) -; AVX512-NEXT:    vmovups %ymm0, -{{[0-9]+}}(%rsp) -; AVX512-NEXT:    vmovaps {{.*#+}} xmm0 = [1,0,0,0] -; AVX512-NEXT:    vmovups %ymm0, (%rsp) -; AVX512-NEXT:    movl %esi, %ecx -; AVX512-NEXT:    andl $63, %ecx -; AVX512-NEXT:    shrl $3, %esi -; AVX512-NEXT:    andl $56, %esi -; AVX512-NEXT:    negl %esi -; AVX512-NEXT:    movslq %esi, %rbx -; AVX512-NEXT:    movq 16(%rsp,%rbx), %rsi -; AVX512-NEXT:    movq 24(%rsp,%rbx), %rbp -; AVX512-NEXT:    movq %rbp, %rax -; AVX512-NEXT:    shldq %cl, %rsi, %rax -; AVX512-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; AVX512-NEXT:    movq 48(%rsp,%rbx), %r8 -; AVX512-NEXT:    movq 56(%rsp,%rbx), %r13 -; AVX512-NEXT:    shldq %cl, %r8, %r13 -; AVX512-NEXT:    movq 32(%rsp,%rbx), %r9 -; AVX512-NEXT:    movq 40(%rsp,%rbx), %r14 -; AVX512-NEXT:    movq %r14, %r10 -; AVX512-NEXT:    shldq %cl, %r9, %r10 -; AVX512-NEXT:    movq 8(%rsp,%rbx), %r11 -; AVX512-NEXT:    shldq %cl, %r11, %rsi -; AVX512-NEXT:    shldq %cl, %r14, %r8 -; AVX512-NEXT:    movq 16(%rdi), %r12 -; AVX512-NEXT:    movq %r12, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; AVX512-NEXT:    movq 48(%rdi), %r14 -; AVX512-NEXT:    movq %r14, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; AVX512-NEXT:    andq %r8, %r14 -; AVX512-NEXT:    andq %rsi, %r12 -; AVX512-NEXT:    orq %r14, %r12 -; AVX512-NEXT:    movq 56(%rdi), %r15 -; AVX512-NEXT:    movq %r15, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; AVX512-NEXT:    andq %r13, %r15 -; AVX512-NEXT:    movq 24(%rdi), %r14 -; AVX512-NEXT:    movq %r14, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; AVX512-NEXT:    andq %rax, %r14 -; AVX512-NEXT:    orq %r15, %r14 -; AVX512-NEXT:    shldq %cl, %rbp, %r9 -; AVX512-NEXT:    movq (%rsp,%rbx), %rdx -; AVX512-NEXT:    movq 32(%rdi), %r15 -; AVX512-NEXT:    movq %r15, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; AVX512-NEXT:    andq %r9, %r15 -; AVX512-NEXT:    shlxq %rcx, %rdx, %rax -; AVX512-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; AVX512-NEXT:    movq (%rdi), %rbx -; AVX512-NEXT:    movq %rbx, %rbp -; AVX512-NEXT:    andq %rax, %rbp -; AVX512-NEXT:    orq %r15, %rbp -; AVX512-NEXT:    orq %r12, %rbp -; AVX512-NEXT:    # kill: def $cl killed $cl killed $rcx -; AVX512-NEXT:    shldq %cl, %rdx, %r11 -; AVX512-NEXT:    movq 40(%rdi), %rax -; AVX512-NEXT:    movq %rax, %rcx -; AVX512-NEXT:    andq %r10, %rcx -; AVX512-NEXT:    movq 8(%rdi), %r15 -; AVX512-NEXT:    movq %r15, %r12 -; AVX512-NEXT:    andq %r11, %r12 -; AVX512-NEXT:    orq %rcx, %r12 -; AVX512-NEXT:    orq %r14, %r12 -; AVX512-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload -; AVX512-NEXT:    orq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Folded Reload -; AVX512-NEXT:    orq {{[-0-9]+}}(%r{{[sb]}}p), %rsi # 8-byte Folded Reload -; AVX512-NEXT:    orq %rax, %r10 -; AVX512-NEXT:    orq {{[-0-9]+}}(%r{{[sb]}}p), %r9 # 8-byte Folded Reload -; AVX512-NEXT:    orq {{[-0-9]+}}(%r{{[sb]}}p), %r13 # 8-byte Folded Reload -; AVX512-NEXT:    orq {{[-0-9]+}}(%r{{[sb]}}p), %r8 # 8-byte Folded Reload -; AVX512-NEXT:    orq %r15, %r11 -; AVX512-NEXT:    orq {{[-0-9]+}}(%r{{[sb]}}p), %rbx # 8-byte Folded Reload -; AVX512-NEXT:    orq %rbp, %r12 -; AVX512-NEXT:    movq %r8, 48(%rdi) -; AVX512-NEXT:    movq %r13, 56(%rdi) -; AVX512-NEXT:    movq %r9, 32(%rdi) -; AVX512-NEXT:    movq %r10, 40(%rdi) -; AVX512-NEXT:    movq %rsi, 16(%rdi) -; AVX512-NEXT:    movq %rcx, 24(%rdi) -; AVX512-NEXT:    movq %rbx, (%rdi) -; AVX512-NEXT:    movq %r11, 8(%rdi) -; AVX512-NEXT:    setne %al -; AVX512-NEXT:    addq $72, %rsp -; AVX512-NEXT:    popq %rbx -; AVX512-NEXT:    popq %r12 -; AVX512-NEXT:    popq %r13 -; AVX512-NEXT:    popq %r14 -; AVX512-NEXT:    popq %r15 -; AVX512-NEXT:    popq %rbp -; AVX512-NEXT:    vzeroupper -; AVX512-NEXT:    retq +; X64-LABEL: set_ne_i512: +; X64:       # %bb.0: +; X64-NEXT:    movl %esi, %ecx +; X64-NEXT:    shrl $3, %ecx +; X64-NEXT:    andl $60, %ecx +; X64-NEXT:    movl (%rdi,%rcx), %edx +; X64-NEXT:    btl %esi, %edx +; X64-NEXT:    setb %al +; X64-NEXT:    btsl %esi, %edx +; X64-NEXT:    movl %edx, (%rdi,%rcx) +; X64-NEXT:    retq    %rem = and i32 %position, 511    %ofs = zext nneg i32 %rem to i512    %bit = shl nuw i512 1, %ofs @@ -3383,13 +983,14 @@ define i1 @init_eq_i512(ptr %word, i32 %position, i1 zeroext %value) nounwind {  ; X86-NEXT:    pushl %edi  ; X86-NEXT:    pushl %esi  ; X86-NEXT:    andl $-16, %esp -; X86-NEXT:    subl $432, %esp # imm = 0x1B0 +; X86-NEXT:    subl $352, %esp # imm = 0x160  ; X86-NEXT:    movl 12(%ebp), %ecx  ; X86-NEXT:    movl %ecx, %edx  ; X86-NEXT:    shrl $3, %edx  ; X86-NEXT:    andl $60, %edx -; X86-NEXT:    leal {{[0-9]+}}(%esp), %esi -; X86-NEXT:    subl %edx, %esi +; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT:    leal {{[0-9]+}}(%esp), %eax +; X86-NEXT:    subl %edx, %eax  ; X86-NEXT:    movl $0, {{[0-9]+}}(%esp)  ; X86-NEXT:    movl $0, {{[0-9]+}}(%esp)  ; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) @@ -3422,60 +1023,58 @@ define i1 @init_eq_i512(ptr %word, i32 %position, i1 zeroext %value) nounwind {  ; X86-NEXT:    movl $0, {{[0-9]+}}(%esp)  ; X86-NEXT:    movl $0, {{[0-9]+}}(%esp)  ; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl 56(%esi), %eax +; X86-NEXT:    movl 56(%eax), %esi +; X86-NEXT:    movl 60(%eax), %ebx +; X86-NEXT:    movl 52(%eax), %edi +; X86-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT:    movl 48(%eax), %edi +; X86-NEXT:    movl 44(%eax), %edx +; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT:    movl 40(%eax), %edx +; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT:    movl 36(%eax), %edx +; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT:    movl 32(%eax), %edx +; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT:    movl 28(%eax), %edx +; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT:    movl 24(%eax), %edx +; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT:    movl 20(%eax), %edx +; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT:    movl 16(%eax), %edx +; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT:    movl 12(%eax), %edx +; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT:    movl 8(%eax), %edx +; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT:    movl (%eax), %edx +; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT:    movl 4(%eax), %eax  ; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    movl 60(%esi), %eax +; X86-NEXT:    movzbl 16(%ebp), %eax +; X86-NEXT:    movzbl %al, %eax +; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp) +; X86-NEXT:    andl $31, %ecx +; X86-NEXT:    shldl %cl, %esi, %ebx +; X86-NEXT:    movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X86-NEXT:    shldl %cl, %eax, %esi +; X86-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT:    shldl %cl, %edi, %eax  ; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    movl 52(%esi), %eax -; X86-NEXT:    movl 48(%esi), %edi -; X86-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    movl 40(%esi), %edi -; X86-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    movl 44(%esi), %edi -; X86-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    movl 36(%esi), %edi -; X86-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    movl 32(%esi), %edi -; X86-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    movl 28(%esi), %edi -; X86-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    movl 24(%esi), %edi -; X86-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    movl 20(%esi), %edi -; X86-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    movl 16(%esi), %edi -; X86-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    movl 12(%esi), %edi -; X86-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    movl 8(%esi), %edi -; X86-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    movl (%esi), %edi +; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload +; X86-NEXT:    shldl %cl, %ebx, %edi  ; X86-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    movl 4(%esi), %esi -; X86-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    movzbl 16(%ebp), %ebx -; X86-NEXT:    movzbl %bl, %esi -; X86-NEXT:    movl %esi, {{[0-9]+}}(%esp) -; X86-NEXT:    leal {{[0-9]+}}(%esp), %esi -; X86-NEXT:    subl %edx, %esi -; X86-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    andl $31, %ecx  ; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X86-NEXT:    shldl %cl, %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; X86-NEXT:    shldl %cl, %edx, %ebx +; X86-NEXT:    movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload  ; X86-NEXT:    shldl %cl, %eax, %edx  ; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload -; X86-NEXT:    shldl %cl, %ebx, %eax -; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X86-NEXT:    movl %edi, %eax -; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X86-NEXT:    shldl %cl, %esi, %eax -; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill  ; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X86-NEXT:    shldl %cl, %edx, %esi -; X86-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT:    shldl %cl, %edx, %eax +; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill  ; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload  ; X86-NEXT:    shldl %cl, %eax, %edx  ; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill @@ -3500,9 +1099,12 @@ define i1 @init_eq_i512(ptr %word, i32 %position, i1 zeroext %value) nounwind {  ; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload  ; X86-NEXT:    shldl %cl, %eax, %esi  ; X86-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    # kill: def $cl killed $cl killed $ecx -; X86-NEXT:    movl %ebx, %edx -; X86-NEXT:    shldl %cl, %edi, %edx +; X86-NEXT:    shll %cl, %eax +; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload +; X86-NEXT:    notl %ebx +; X86-NEXT:    leal {{[0-9]+}}(%esp), %eax +; X86-NEXT:    subl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload  ; X86-NEXT:    movl $0, {{[0-9]+}}(%esp)  ; X86-NEXT:    movl $0, {{[0-9]+}}(%esp)  ; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) @@ -3534,273 +1136,148 @@ define i1 @init_eq_i512(ptr %word, i32 %position, i1 zeroext %value) nounwind {  ; X86-NEXT:    movl $0, {{[0-9]+}}(%esp)  ; X86-NEXT:    movl $0, {{[0-9]+}}(%esp)  ; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl 8(%ebp), %ebx -; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    movl 48(%ebx), %esi -; X86-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    andl %esi, %edx +; X86-NEXT:    movl 56(%eax), %esi +; X86-NEXT:    movl 60(%eax), %edi +; X86-NEXT:    shldl %cl, %esi, %edi +; X86-NEXT:    movl 8(%ebp), %edx +; X86-NEXT:    andl 60(%edx), %ebx +; X86-NEXT:    orl %edi, %ebx +; X86-NEXT:    movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload +; X86-NEXT:    notl %ebx +; X86-NEXT:    movl 52(%eax), %edi +; X86-NEXT:    shldl %cl, %edi, %esi +; X86-NEXT:    andl 56(%edx), %ebx +; X86-NEXT:    orl %esi, %ebx +; X86-NEXT:    movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload +; X86-NEXT:    notl %ebx +; X86-NEXT:    movl 48(%eax), %esi +; X86-NEXT:    shldl %cl, %esi, %edi +; X86-NEXT:    andl 52(%edx), %ebx +; X86-NEXT:    orl %edi, %ebx +; X86-NEXT:    movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload +; X86-NEXT:    notl %ebx +; X86-NEXT:    movl 44(%eax), %edi +; X86-NEXT:    shldl %cl, %edi, %esi +; X86-NEXT:    andl 48(%edx), %ebx +; X86-NEXT:    orl %esi, %ebx +; X86-NEXT:    movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload +; X86-NEXT:    notl %ebx +; X86-NEXT:    movl 40(%eax), %esi +; X86-NEXT:    shldl %cl, %esi, %edi +; X86-NEXT:    andl 44(%edx), %ebx +; X86-NEXT:    orl %edi, %ebx +; X86-NEXT:    movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload +; X86-NEXT:    notl %ebx +; X86-NEXT:    movl 36(%eax), %edi +; X86-NEXT:    shldl %cl, %edi, %esi +; X86-NEXT:    andl 40(%edx), %ebx +; X86-NEXT:    orl %esi, %ebx +; X86-NEXT:    movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload +; X86-NEXT:    notl %ebx +; X86-NEXT:    movl 32(%eax), %esi +; X86-NEXT:    shldl %cl, %esi, %edi +; X86-NEXT:    andl 36(%edx), %ebx +; X86-NEXT:    orl %edi, %ebx +; X86-NEXT:    movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload +; X86-NEXT:    notl %ebx +; X86-NEXT:    movl 28(%eax), %edi +; X86-NEXT:    shldl %cl, %edi, %esi +; X86-NEXT:    andl 32(%edx), %ebx +; X86-NEXT:    orl %esi, %ebx +; X86-NEXT:    movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload +; X86-NEXT:    notl %ebx +; X86-NEXT:    movl 24(%eax), %esi +; X86-NEXT:    shldl %cl, %esi, %edi +; X86-NEXT:    andl 28(%edx), %ebx +; X86-NEXT:    orl %edi, %ebx +; X86-NEXT:    movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload +; X86-NEXT:    notl %ebx +; X86-NEXT:    movl 20(%eax), %edi +; X86-NEXT:    shldl %cl, %edi, %esi +; X86-NEXT:    andl 24(%edx), %ebx +; X86-NEXT:    orl %esi, %ebx +; X86-NEXT:    movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload +; X86-NEXT:    notl %ebx +; X86-NEXT:    movl 16(%eax), %esi +; X86-NEXT:    shldl %cl, %esi, %edi +; X86-NEXT:    andl 20(%edx), %ebx +; X86-NEXT:    orl %edi, %ebx +; X86-NEXT:    movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload +; X86-NEXT:    notl %ebx +; X86-NEXT:    movl 12(%eax), %edi +; X86-NEXT:    shldl %cl, %edi, %esi +; X86-NEXT:    andl 16(%edx), %ebx +; X86-NEXT:    orl %esi, %ebx +; X86-NEXT:    movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload +; X86-NEXT:    notl %ebx +; X86-NEXT:    movl 8(%eax), %esi +; X86-NEXT:    shldl %cl, %esi, %edi +; X86-NEXT:    andl 12(%edx), %ebx +; X86-NEXT:    orl %edi, %ebx +; X86-NEXT:    movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload +; X86-NEXT:    notl %ebx +; X86-NEXT:    movl 4(%eax), %edi +; X86-NEXT:    shldl %cl, %edi, %esi +; X86-NEXT:    andl 8(%edx), %ebx +; X86-NEXT:    orl %esi, %ebx  ; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X86-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    movl 16(%ebx), %eax -; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    andl %eax, %esi -; X86-NEXT:    orl %edx, %esi -; X86-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    movl 40(%ebx), %eax -; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    andl %eax, %edx -; X86-NEXT:    movl %edx, %esi +; X86-NEXT:    notl %esi +; X86-NEXT:    movl (%eax), %eax +; X86-NEXT:    shldl %cl, %eax, %edi +; X86-NEXT:    andl 4(%edx), %esi +; X86-NEXT:    orl %edi, %esi +; X86-NEXT:    movl %esi, %edi +; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X86-NEXT:    notl %esi +; X86-NEXT:    shll %cl, %eax +; X86-NEXT:    andl (%edx), %esi +; X86-NEXT:    orl %eax, %esi  ; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    movl 8(%ebx), %edx -; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    andl %edx, %eax -; X86-NEXT:    orl %esi, %eax -; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    movl 56(%ebx), %esi -; X86-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    andl %esi, %edx -; X86-NEXT:    movl %edx, %edi -; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    movl 24(%ebx), %esi -; X86-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    andl %esi, %edx -; X86-NEXT:    orl %edi, %edx -; X86-NEXT:    orl %eax, %edx -; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT:    movl (%edx,%eax), %eax +; X86-NEXT:    btl %ecx, %eax  ; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    movl 52(%ebx), %edx -; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    andl %edx, %eax -; X86-NEXT:    movl %eax, %edx +; X86-NEXT:    movl %eax, 60(%edx)  ; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    movl 20(%ebx), %esi -; X86-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    andl %esi, %eax -; X86-NEXT:    orl %edx, %eax -; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT:    movl %eax, 56(%edx)  ; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    movl 44(%ebx), %edx -; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    andl %edx, %eax -; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X86-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    movl 12(%ebx), %esi -; X86-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    andl %esi, %edi -; X86-NEXT:    orl %eax, %edi -; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X86-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    movl 60(%ebx), %edx -; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    andl %edx, %esi -; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    movl 28(%ebx), %eax -; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    andl %eax, %edx -; X86-NEXT:    orl %esi, %edx -; X86-NEXT:    orl %edi, %edx -; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X86-NEXT:    movl %eax, 52(%edx)  ; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X86-NEXT:    shldl %cl, %eax, %edx -; X86-NEXT:    # kill: def $cl killed $cl killed $ecx -; X86-NEXT:    shll %cl, %eax -; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    movl 32(%ebx), %esi -; X86-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    andl %esi, %ecx -; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    movl (%ebx), %edi -; X86-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    andl %edi, %eax -; X86-NEXT:    orl %ecx, %eax -; X86-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X86-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT:    movl %eax, 48(%edx)  ; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    movl 36(%ebx), %ecx -; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    andl %ecx, %eax -; X86-NEXT:    movl %eax, %ecx -; X86-NEXT:    movl %edx, %eax -; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    movl 4(%ebx), %edx -; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    andl %edx, %eax -; X86-NEXT:    orl %ecx, %eax -; X86-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X86-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X86-NEXT:    notl %edx -; X86-NEXT:    andl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X86-NEXT:    movl 56(%edi), %ebx -; X86-NEXT:    movl 60(%edi), %eax -; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X86-NEXT:    shldl %cl, %ebx, %eax -; X86-NEXT:    orl %eax, %edx -; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X86-NEXT:    notl %edx -; X86-NEXT:    andl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X86-NEXT:    movl 52(%edi), %eax -; X86-NEXT:    shldl %cl, %eax, %ebx -; X86-NEXT:    orl %ebx, %edx -; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X86-NEXT:    notl %edx -; X86-NEXT:    andl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X86-NEXT:    movl 48(%edi), %esi -; X86-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    shldl %cl, %esi, %eax -; X86-NEXT:    orl %eax, %edx -; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT:    movl %eax, 44(%edx)  ; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT:    notl %eax -; X86-NEXT:    andl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X86-NEXT:    movl %eax, %edx -; X86-NEXT:    movl 40(%edi), %ebx -; X86-NEXT:    movl 44(%edi), %eax -; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    shldl %cl, %ebx, %eax -; X86-NEXT:    orl %eax, %edx -; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X86-NEXT:    notl %edx -; X86-NEXT:    andl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X86-NEXT:    movl 36(%edi), %eax -; X86-NEXT:    shldl %cl, %eax, %ebx -; X86-NEXT:    orl %ebx, %edx -; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X86-NEXT:    notl %edx -; X86-NEXT:    andl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X86-NEXT:    movl 32(%edi), %ebx -; X86-NEXT:    shldl %cl, %ebx, %eax -; X86-NEXT:    orl %eax, %edx -; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X86-NEXT:    notl %edx -; X86-NEXT:    andl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X86-NEXT:    movl 28(%edi), %eax -; X86-NEXT:    shldl %cl, %eax, %ebx -; X86-NEXT:    orl %ebx, %edx -; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X86-NEXT:    notl %edx -; X86-NEXT:    andl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X86-NEXT:    movl 24(%edi), %ebx -; X86-NEXT:    shldl %cl, %ebx, %eax -; X86-NEXT:    orl %eax, %edx -; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X86-NEXT:    notl %edx -; X86-NEXT:    andl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X86-NEXT:    movl 20(%edi), %eax -; X86-NEXT:    shldl %cl, %eax, %ebx -; X86-NEXT:    orl %ebx, %edx -; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X86-NEXT:    notl %edx -; X86-NEXT:    andl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X86-NEXT:    movl 16(%edi), %ebx -; X86-NEXT:    shldl %cl, %ebx, %eax -; X86-NEXT:    orl %eax, %edx -; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X86-NEXT:    notl %edx -; X86-NEXT:    andl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X86-NEXT:    movl 12(%edi), %eax -; X86-NEXT:    # kill: def $cl killed $cl killed $ecx -; X86-NEXT:    shldl %cl, %eax, %ebx -; X86-NEXT:    orl %ebx, %edx -; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X86-NEXT:    notl %esi -; X86-NEXT:    andl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload -; X86-NEXT:    movl 8(%edi), %edx -; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X86-NEXT:    # kill: def $cl killed $cl killed $ecx -; X86-NEXT:    shldl %cl, %edx, %eax -; X86-NEXT:    orl %eax, %esi -; X86-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT:    movl %eax, 40(%edx)  ; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT:    notl %eax -; X86-NEXT:    andl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X86-NEXT:    movl 4(%edi), %ebx -; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X86-NEXT:    shldl %cl, %ebx, %edx -; X86-NEXT:    orl %edx, %eax -; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X86-NEXT:    notl %edx -; X86-NEXT:    andl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X86-NEXT:    # kill: def $cl killed $cl killed $ecx +; X86-NEXT:    movl %eax, 36(%edx)  ; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X86-NEXT:    shldl %cl, %esi, %eax -; X86-NEXT:    orl %eax, %edx -; X86-NEXT:    movl %edx, %esi -; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X86-NEXT:    notl %edx -; X86-NEXT:    andl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X86-NEXT:    movl (%edi), %eax -; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X86-NEXT:    shldl %cl, %eax, %ebx -; X86-NEXT:    orl %ebx, %edx -; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X86-NEXT:    notl %edi -; X86-NEXT:    andl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload -; X86-NEXT:    # kill: def $cl killed $cl killed $ecx -; X86-NEXT:    shll %cl, %eax -; X86-NEXT:    orl %eax, %edi -; X86-NEXT:    movl %edi, %ecx +; X86-NEXT:    movl %eax, 32(%edx)  ; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X86-NEXT:    movl 8(%ebp), %eax -; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X86-NEXT:    movl %edi, 60(%eax) -; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X86-NEXT:    movl %edi, 56(%eax) -; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X86-NEXT:    movl %edi, 52(%eax) -; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X86-NEXT:    movl %edi, 44(%eax) -; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X86-NEXT:    movl %edi, 40(%eax) -; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X86-NEXT:    movl %edi, 36(%eax) -; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X86-NEXT:    movl %edi, 32(%eax) -; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X86-NEXT:    movl %edi, 28(%eax) -; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X86-NEXT:    movl %edi, 24(%eax) -; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X86-NEXT:    movl %edi, 20(%eax) -; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X86-NEXT:    movl %edi, 16(%eax) -; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X86-NEXT:    movl %edi, 12(%eax) -; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X86-NEXT:    movl %edi, 8(%eax) -; X86-NEXT:    movl %edx, 4(%eax) -; X86-NEXT:    movl %ecx, (%eax) -; X86-NEXT:    movl %esi, 48(%eax) -; X86-NEXT:    sete %al +; X86-NEXT:    movl %eax, 28(%edx) +; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X86-NEXT:    movl %eax, 24(%edx) +; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X86-NEXT:    movl %eax, 20(%edx) +; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X86-NEXT:    movl %eax, 16(%edx) +; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X86-NEXT:    movl %eax, 12(%edx) +; X86-NEXT:    movl %ebx, 8(%edx) +; X86-NEXT:    movl %edi, 4(%edx) +; X86-NEXT:    movl %esi, (%edx) +; X86-NEXT:    setae %al  ; X86-NEXT:    leal -12(%ebp), %esp  ; X86-NEXT:    popl %esi  ; X86-NEXT:    popl %edi @@ -3816,7 +1293,8 @@ define i1 @init_eq_i512(ptr %word, i32 %position, i1 zeroext %value) nounwind {  ; SSE-NEXT:    pushq %r13  ; SSE-NEXT:    pushq %r12  ; SSE-NEXT:    pushq %rbx -; SSE-NEXT:    subq $216, %rsp +; SSE-NEXT:    subq $168, %rsp +; SSE-NEXT:    movl %esi, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill  ; SSE-NEXT:    xorps %xmm0, %xmm0  ; SSE-NEXT:    movups %xmm0, {{[0-9]+}}(%rsp)  ; SSE-NEXT:    movups %xmm0, {{[0-9]+}}(%rsp) @@ -3829,139 +1307,107 @@ define i1 @init_eq_i512(ptr %word, i32 %position, i1 zeroext %value) nounwind {  ; SSE-NEXT:    movq $1, {{[0-9]+}}(%rsp)  ; SSE-NEXT:    movl %esi, %ecx  ; SSE-NEXT:    andl $63, %ecx -; SSE-NEXT:    shrl $3, %esi -; SSE-NEXT:    andl $56, %esi -; SSE-NEXT:    negl %esi -; SSE-NEXT:    movslq %esi, %r10 -; SSE-NEXT:    movq 184(%rsp,%r10), %r11 -; SSE-NEXT:    movq 192(%rsp,%r10), %rsi -; SSE-NEXT:    movq %rsi, %r13 -; SSE-NEXT:    shldq %cl, %r11, %r13 -; SSE-NEXT:    movq 200(%rsp,%r10), %r15 -; SSE-NEXT:    shldq %cl, %rsi, %r15 -; SSE-NEXT:    movq 168(%rsp,%r10), %rbx -; SSE-NEXT:    movq 176(%rsp,%r10), %rsi -; SSE-NEXT:    movq %rsi, %r14 -; SSE-NEXT:    shldq %cl, %rbx, %r14 -; SSE-NEXT:    shldq %cl, %rsi, %r11 -; SSE-NEXT:    movq 152(%rsp,%r10), %rax -; SSE-NEXT:    movq 160(%rsp,%r10), %r8 -; SSE-NEXT:    movq %r8, %r12 -; SSE-NEXT:    shldq %cl, %rax, %r12 -; SSE-NEXT:    shldq %cl, %r8, %rbx -; SSE-NEXT:    movq 144(%rsp,%r10), %r9 -; SSE-NEXT:    movq %r9, %r8 -; SSE-NEXT:    shlq %cl, %r8 -; SSE-NEXT:    shldq %cl, %r9, %rax -; SSE-NEXT:    movq %r12, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; SSE-NEXT:    movq %r13, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; SSE-NEXT:    movl %edx, %edx -; SSE-NEXT:    movups %xmm0, {{[0-9]+}}(%rsp) -; SSE-NEXT:    movups %xmm0, {{[0-9]+}}(%rsp) +; SSE-NEXT:    movl %esi, %eax +; SSE-NEXT:    shrl $3, %eax +; SSE-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; SSE-NEXT:    # kill: def $eax killed $eax killed $rax +; SSE-NEXT:    andl $56, %eax +; SSE-NEXT:    negl %eax +; SSE-NEXT:    movslq %eax, %r12 +; SSE-NEXT:    movq 136(%rsp,%r12), %r9 +; SSE-NEXT:    movq 144(%rsp,%r12), %rax +; SSE-NEXT:    movq %rax, %rsi +; SSE-NEXT:    shldq %cl, %r9, %rsi +; SSE-NEXT:    movq %rsi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; SSE-NEXT:    movq 152(%rsp,%r12), %r11 +; SSE-NEXT:    shldq %cl, %rax, %r11 +; SSE-NEXT:    movq 120(%rsp,%r12), %r10 +; SSE-NEXT:    movq 128(%rsp,%r12), %rax +; SSE-NEXT:    movq %rax, %rbx +; SSE-NEXT:    shldq %cl, %r10, %rbx +; SSE-NEXT:    shldq %cl, %rax, %r9 +; SSE-NEXT:    movq 104(%rsp,%r12), %r14 +; SSE-NEXT:    movq 112(%rsp,%r12), %rax +; SSE-NEXT:    movq %rax, %r15 +; SSE-NEXT:    shldq %cl, %r14, %r15 +; SSE-NEXT:    shldq %cl, %rax, %r10 +; SSE-NEXT:    movq 96(%rsp,%r12), %rax +; SSE-NEXT:    movq %rax, %r13 +; SSE-NEXT:    shlq %cl, %r13 +; SSE-NEXT:    shldq %cl, %rax, %r14 +; SSE-NEXT:    movl %edx, %eax  ; SSE-NEXT:    movups %xmm0, {{[0-9]+}}(%rsp) -; SSE-NEXT:    movaps %xmm0, (%rsp) +; SSE-NEXT:    movups %xmm0, -{{[0-9]+}}(%rsp) +; SSE-NEXT:    movups %xmm0, -{{[0-9]+}}(%rsp) +; SSE-NEXT:    movaps %xmm0, -{{[0-9]+}}(%rsp)  ; SSE-NEXT:    movaps %xmm0, -{{[0-9]+}}(%rsp)  ; SSE-NEXT:    movaps %xmm0, -{{[0-9]+}}(%rsp)  ; SSE-NEXT:    movaps %xmm0, -{{[0-9]+}}(%rsp) -; SSE-NEXT:    movq %rdx, {{[0-9]+}}(%rsp) +; SSE-NEXT:    movq %rax, -{{[0-9]+}}(%rsp)  ; SSE-NEXT:    movq $0, {{[0-9]+}}(%rsp) -; SSE-NEXT:    movq 16(%rdi), %rdx -; SSE-NEXT:    movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; SSE-NEXT:    movq 48(%rdi), %rsi -; SSE-NEXT:    movq %rsi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; SSE-NEXT:    andq %rsi, %r13 -; SSE-NEXT:    andq %rdx, %r12 -; SSE-NEXT:    orq %r13, %r12 -; SSE-NEXT:    movq %r15, %rsi -; SSE-NEXT:    movq 56(%rdi), %rdx -; SSE-NEXT:    movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; SSE-NEXT:    andq %rdx, %r15 -; SSE-NEXT:    movq %rbx, %r13 -; SSE-NEXT:    movq 24(%rdi), %rdx -; SSE-NEXT:    movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; SSE-NEXT:    andq %rdx, %rbx -; SSE-NEXT:    orq %r15, %rbx -; SSE-NEXT:    movq %r14, %rbp -; SSE-NEXT:    movq 32(%rdi), %rdx -; SSE-NEXT:    movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; SSE-NEXT:    andq %rdx, %r14 -; SSE-NEXT:    movq %r8, %r15 -; SSE-NEXT:    movq (%rdi), %rdx -; SSE-NEXT:    movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; SSE-NEXT:    andq %rdx, %r8 -; SSE-NEXT:    orq %r14, %r8 -; SSE-NEXT:    orq %r12, %r8 -; SSE-NEXT:    movq %r11, %r12 -; SSE-NEXT:    movq 40(%rdi), %r9 -; SSE-NEXT:    andq %r9, %r11 -; SSE-NEXT:    movq %rax, %r14 -; SSE-NEXT:    movq 8(%rdi), %rdx -; SSE-NEXT:    movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; SSE-NEXT:    andq %rdx, %rax -; SSE-NEXT:    orq %r11, %rax -; SSE-NEXT:    orq %rbx, %rax -; SSE-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; SSE-NEXT:    movq 8(%rsp,%r12), %r8 +; SSE-NEXT:    movq 16(%rsp,%r12), %rsi +; SSE-NEXT:    movq %rsi, %rbp +; SSE-NEXT:    shldq %cl, %r8, %rbp  ; SSE-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload  ; SSE-NEXT:    notq %rax -; SSE-NEXT:    andq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Folded Reload -; SSE-NEXT:    movq %rax, %rdx -; SSE-NEXT:    movq 56(%rsp,%r10), %r11 -; SSE-NEXT:    movq 64(%rsp,%r10), %rax -; SSE-NEXT:    movq %rax, %rbx -; SSE-NEXT:    shldq %cl, %r11, %rbx -; SSE-NEXT:    orq %rbx, %rdx -; SSE-NEXT:    movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; SSE-NEXT:    notq %rsi -; SSE-NEXT:    movq 72(%rsp,%r10), %rbx -; SSE-NEXT:    shldq %cl, %rax, %rbx -; SSE-NEXT:    andq {{[-0-9]+}}(%r{{[sb]}}p), %rsi # 8-byte Folded Reload -; SSE-NEXT:    orq %rbx, %rsi -; SSE-NEXT:    notq %rbp -; SSE-NEXT:    andq {{[-0-9]+}}(%r{{[sb]}}p), %rbp # 8-byte Folded Reload -; SSE-NEXT:    movq 40(%rsp,%r10), %rax -; SSE-NEXT:    movq 48(%rsp,%r10), %rdx -; SSE-NEXT:    movq %rdx, %rbx -; SSE-NEXT:    shldq %cl, %rax, %rbx -; SSE-NEXT:    orq %rbx, %rbp -; SSE-NEXT:    notq %r12 -; SSE-NEXT:    andq %r9, %r12 -; SSE-NEXT:    shldq %cl, %rdx, %r11 -; SSE-NEXT:    movq 24(%rsp,%r10), %r9 -; SSE-NEXT:    movq 32(%rsp,%r10), %rdx -; SSE-NEXT:    movq %rdx, %rbx -; SSE-NEXT:    shldq %cl, %r9, %rbx -; SSE-NEXT:    orq %r11, %r12 -; SSE-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r11 # 8-byte Reload +; SSE-NEXT:    andq 48(%rdi), %rax +; SSE-NEXT:    orq %rbp, %rax +; SSE-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; SSE-NEXT:    notq %rbx  ; SSE-NEXT:    notq %r11 -; SSE-NEXT:    andq {{[-0-9]+}}(%r{{[sb]}}p), %r11 # 8-byte Folded Reload -; SSE-NEXT:    shldq %cl, %rdx, %rax -; SSE-NEXT:    orq %rbx, %r11 -; SSE-NEXT:    notq %r13 -; SSE-NEXT:    andq {{[-0-9]+}}(%r{{[sb]}}p), %r13 # 8-byte Folded Reload -; SSE-NEXT:    orq %rax, %r13 +; SSE-NEXT:    movq 24(%rsp,%r12), %rax +; SSE-NEXT:    shldq %cl, %rsi, %rax +; SSE-NEXT:    movq -8(%rsp,%r12), %rbp +; SSE-NEXT:    movq (%rsp,%r12), %rdx +; SSE-NEXT:    movq %rdx, %rsi +; SSE-NEXT:    shldq %cl, %rbp, %rsi +; SSE-NEXT:    andq 56(%rdi), %r11 +; SSE-NEXT:    andq 32(%rdi), %rbx +; SSE-NEXT:    orq %rax, %r11 +; SSE-NEXT:    orq %rsi, %rbx  ; SSE-NEXT:    notq %r15 -; SSE-NEXT:    andq {{[-0-9]+}}(%r{{[sb]}}p), %r15 # 8-byte Folded Reload -; SSE-NEXT:    movq 16(%rsp,%r10), %rax -; SSE-NEXT:    movq %rax, %rdx -; SSE-NEXT:    shlq %cl, %rdx -; SSE-NEXT:    orq %rdx, %r15 +; SSE-NEXT:    shldq %cl, %rdx, %r8 +; SSE-NEXT:    notq %r9 +; SSE-NEXT:    andq 40(%rdi), %r9 +; SSE-NEXT:    orq %r8, %r9 +; SSE-NEXT:    movq -24(%rsp,%r12), %rax +; SSE-NEXT:    movq -16(%rsp,%r12), %rdx +; SSE-NEXT:    movq %rdx, %rsi +; SSE-NEXT:    shldq %cl, %rax, %rsi +; SSE-NEXT:    andq 16(%rdi), %r15 +; SSE-NEXT:    orq %rsi, %r15 +; SSE-NEXT:    shldq %cl, %rdx, %rbp +; SSE-NEXT:    notq %r10 +; SSE-NEXT:    notq %r13 +; SSE-NEXT:    movq -32(%rsp,%r12), %rdx +; SSE-NEXT:    movq %rdx, %rsi +; SSE-NEXT:    shlq %cl, %rsi +; SSE-NEXT:    andq 24(%rdi), %r10 +; SSE-NEXT:    andq (%rdi), %r13 +; SSE-NEXT:    orq %rbp, %r10 +; SSE-NEXT:    orq %rsi, %r13  ; SSE-NEXT:    notq %r14 -; SSE-NEXT:    andq {{[-0-9]+}}(%r{{[sb]}}p), %r14 # 8-byte Folded Reload  ; SSE-NEXT:    # kill: def $cl killed $cl killed $ecx -; SSE-NEXT:    shldq %cl, %rax, %r9 -; SSE-NEXT:    orq %r9, %r14 -; SSE-NEXT:    orq %r8, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Folded Spill +; SSE-NEXT:    shldq %cl, %rdx, %rax +; SSE-NEXT:    andq 8(%rdi), %r14 +; SSE-NEXT:    orq %rax, %r14 +; SSE-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload +; SSE-NEXT:    andl $60, %eax +; SSE-NEXT:    movl (%rdi,%rax), %eax +; SSE-NEXT:    movl {{[-0-9]+}}(%r{{[sb]}}p), %ecx # 4-byte Reload +; SSE-NEXT:    btl %ecx, %eax  ; SSE-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload  ; SSE-NEXT:    movq %rax, 48(%rdi) -; SSE-NEXT:    movq %rsi, 56(%rdi) -; SSE-NEXT:    movq %rbp, 32(%rdi) -; SSE-NEXT:    movq %r12, 40(%rdi) -; SSE-NEXT:    movq %r11, 16(%rdi) -; SSE-NEXT:    movq %r13, 24(%rdi) -; SSE-NEXT:    movq %r15, (%rdi) +; SSE-NEXT:    movq %r11, 56(%rdi) +; SSE-NEXT:    movq %rbx, 32(%rdi) +; SSE-NEXT:    movq %r9, 40(%rdi) +; SSE-NEXT:    movq %r15, 16(%rdi) +; SSE-NEXT:    movq %r10, 24(%rdi) +; SSE-NEXT:    movq %r13, (%rdi)  ; SSE-NEXT:    movq %r14, 8(%rdi) -; SSE-NEXT:    sete %al -; SSE-NEXT:    addq $216, %rsp +; SSE-NEXT:    setae %al +; SSE-NEXT:    addq $168, %rsp  ; SSE-NEXT:    popq %rbx  ; SSE-NEXT:    popq %r12  ; SSE-NEXT:    popq %r13 @@ -3978,132 +1424,105 @@ define i1 @init_eq_i512(ptr %word, i32 %position, i1 zeroext %value) nounwind {  ; AVX2-NEXT:    pushq %r13  ; AVX2-NEXT:    pushq %r12  ; AVX2-NEXT:    pushq %rbx -; AVX2-NEXT:    subq $200, %rsp +; AVX2-NEXT:    subq $184, %rsp  ; AVX2-NEXT:    vxorps %xmm0, %xmm0, %xmm0  ; AVX2-NEXT:    vmovups %ymm0, {{[0-9]+}}(%rsp)  ; AVX2-NEXT:    vmovups %ymm0, {{[0-9]+}}(%rsp)  ; AVX2-NEXT:    vmovups %ymm0, {{[0-9]+}}(%rsp)  ; AVX2-NEXT:    vmovss {{.*#+}} xmm1 = [1,0,0,0]  ; AVX2-NEXT:    vmovups %ymm1, {{[0-9]+}}(%rsp) -; AVX2-NEXT:    movl %esi, %r8d -; AVX2-NEXT:    andl $63, %r8d -; AVX2-NEXT:    shrl $3, %esi -; AVX2-NEXT:    andl $56, %esi -; AVX2-NEXT:    negl %esi -; AVX2-NEXT:    movslq %esi, %rsi -; AVX2-NEXT:    movq 144(%rsp,%rsi), %r11 -; AVX2-NEXT:    movq 152(%rsp,%rsi), %r12 -; AVX2-NEXT:    movq %r12, %r10 -; AVX2-NEXT:    movl %r8d, %ecx -; AVX2-NEXT:    shldq %cl, %r11, %r10 -; AVX2-NEXT:    movq 176(%rsp,%rsi), %r14 -; AVX2-NEXT:    movq 184(%rsp,%rsi), %r9 +; AVX2-NEXT:    movl %esi, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill +; AVX2-NEXT:    movl %esi, %ecx +; AVX2-NEXT:    andl $63, %ecx +; AVX2-NEXT:    movl %esi, %ebx +; AVX2-NEXT:    shrl $3, %ebx +; AVX2-NEXT:    movl %ebx, %eax +; AVX2-NEXT:    andl $56, %eax +; AVX2-NEXT:    negl %eax +; AVX2-NEXT:    movslq %eax, %r11 +; AVX2-NEXT:    movq 128(%rsp,%r11), %r15 +; AVX2-NEXT:    movq 136(%rsp,%r11), %rax +; AVX2-NEXT:    movq %rax, %rsi +; AVX2-NEXT:    shldq %cl, %r15, %rsi +; AVX2-NEXT:    movq %rsi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; AVX2-NEXT:    movq 120(%rsp,%r11), %r8 +; AVX2-NEXT:    shldq %cl, %r8, %r15 +; AVX2-NEXT:    movq 144(%rsp,%r11), %r14 +; AVX2-NEXT:    movq 152(%rsp,%r11), %rsi +; AVX2-NEXT:    movq %rsi, %r9  ; AVX2-NEXT:    shldq %cl, %r14, %r9 -; AVX2-NEXT:    movq 160(%rsp,%rsi), %r15 -; AVX2-NEXT:    movq 168(%rsp,%rsi), %r13 -; AVX2-NEXT:    movq %r13, %rbx -; AVX2-NEXT:    shldq %cl, %r15, %rbx -; AVX2-NEXT:    movq 128(%rsp,%rsi), %rbp -; AVX2-NEXT:    movq %rbp, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; AVX2-NEXT:    movq 136(%rsp,%rsi), %rax -; AVX2-NEXT:    shldq %cl, %rax, %r11 -; AVX2-NEXT:    shldq %cl, %r13, %r14 -; AVX2-NEXT:    shldq %cl, %r12, %r15 -; AVX2-NEXT:    shldq %cl, %rbp, %rax +; AVX2-NEXT:    movq %r9, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; AVX2-NEXT:    shldq %cl, %rax, %r14 +; AVX2-NEXT:    movq 112(%rsp,%r11), %rax  ; AVX2-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; AVX2-NEXT:    movl %edx, %edx +; AVX2-NEXT:    movq 160(%rsp,%r11), %r13 +; AVX2-NEXT:    movq 168(%rsp,%r11), %r12 +; AVX2-NEXT:    shldq %cl, %r13, %r12 +; AVX2-NEXT:    shldq %cl, %rsi, %r13 +; AVX2-NEXT:    shldq %cl, %rax, %r8 +; AVX2-NEXT:    movq %r8, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; AVX2-NEXT:    movl %edx, %eax  ; AVX2-NEXT:    vxorps %xmm1, %xmm1, %xmm1  ; AVX2-NEXT:    vmovups %xmm1, {{[0-9]+}}(%rsp) -; AVX2-NEXT:    vmovups %ymm0, {{[0-9]+}}(%rsp)  ; AVX2-NEXT:    vmovups %ymm0, -{{[0-9]+}}(%rsp)  ; AVX2-NEXT:    vmovups %ymm0, -{{[0-9]+}}(%rsp) -; AVX2-NEXT:    movq %rdx, (%rsp) +; AVX2-NEXT:    vmovups %ymm0, -{{[0-9]+}}(%rsp) +; AVX2-NEXT:    movq %rax, -{{[0-9]+}}(%rsp)  ; AVX2-NEXT:    movq $0, {{[0-9]+}}(%rsp) -; AVX2-NEXT:    movq 16(%rdi), %r12 -; AVX2-NEXT:    movq 48(%rdi), %rbp -; AVX2-NEXT:    movq 32(%rdi), %r13 -; AVX2-NEXT:    andnq %r13, %r15, %rax -; AVX2-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; AVX2-NEXT:    andq %r15, %r13 -; AVX2-NEXT:    andnq %rbp, %r14, %rax -; AVX2-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; AVX2-NEXT:    andq %r14, %rbp -; AVX2-NEXT:    andnq %r12, %r11, %rax -; AVX2-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; AVX2-NEXT:    andq %r11, %r12 -; AVX2-NEXT:    movq 40(%rdi), %rax -; AVX2-NEXT:    orq %rbp, %r12 -; AVX2-NEXT:    andnq %rax, %rbx, %rcx -; AVX2-NEXT:    movq %rcx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; AVX2-NEXT:    movq %rax, %rbp -; AVX2-NEXT:    andq %rbx, %rbp -; AVX2-NEXT:    movq 56(%rdi), %rcx -; AVX2-NEXT:    andnq %rcx, %r9, %rbx -; AVX2-NEXT:    andq %r9, %rcx -; AVX2-NEXT:    movq 24(%rdi), %rax -; AVX2-NEXT:    andnq %rax, %r10, %rdx -; AVX2-NEXT:    movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; AVX2-NEXT:    andq %r10, %rax -; AVX2-NEXT:    orq %rcx, %rax -; AVX2-NEXT:    shlxq %r8, {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Folded Reload -; AVX2-NEXT:    movq (%rdi), %r10 -; AVX2-NEXT:    andnq %r10, %rcx, %r15 -; AVX2-NEXT:    andq %rcx, %r10 -; AVX2-NEXT:    movq 40(%rsp,%rsi), %rdx -; AVX2-NEXT:    movq 48(%rsp,%rsi), %r11 -; AVX2-NEXT:    movq %r11, %r9 -; AVX2-NEXT:    movl %r8d, %ecx -; AVX2-NEXT:    shldq %cl, %rdx, %r9 -; AVX2-NEXT:    orq %r13, %r10 -; AVX2-NEXT:    orq %r12, %r10 -; AVX2-NEXT:    movq 8(%rdi), %r13 -; AVX2-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload -; AVX2-NEXT:    andnq %r13, %rcx, %r12 -; AVX2-NEXT:    andq %rcx, %r13 -; AVX2-NEXT:    orq %rbp, %r13 +; AVX2-NEXT:    movq 24(%rsp,%r11), %rbp +; AVX2-NEXT:    movq 32(%rsp,%r11), %rdx +; AVX2-NEXT:    movq %rdx, %rax +; AVX2-NEXT:    shldq %cl, %rbp, %rax +; AVX2-NEXT:    movq 40(%rsp,%r11), %r10 +; AVX2-NEXT:    shldq %cl, %rdx, %r10 +; AVX2-NEXT:    movq 8(%rsp,%r11), %r9 +; AVX2-NEXT:    movq 16(%rsp,%r11), %rdx +; AVX2-NEXT:    movq %rdx, %r8 +; AVX2-NEXT:    shldq %cl, %r9, %r8 +; AVX2-NEXT:    shldq %cl, %rdx, %rbp +; AVX2-NEXT:    andnq 48(%rdi), %r13, %r13  ; AVX2-NEXT:    orq %rax, %r13 -; AVX2-NEXT:    movq 56(%rsp,%rsi), %rax -; AVX2-NEXT:    movl %r8d, %ecx -; AVX2-NEXT:    shldq %cl, %r11, %rax -; AVX2-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r14 # 8-byte Reload -; AVX2-NEXT:    orq %r9, %r14 -; AVX2-NEXT:    orq %rax, %rbx -; AVX2-NEXT:    movq %rbx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; AVX2-NEXT:    movq 24(%rsp,%rsi), %rax -; AVX2-NEXT:    movq 32(%rsp,%rsi), %r9 -; AVX2-NEXT:    movq %r9, %r11 -; AVX2-NEXT:    shldq %cl, %rax, %r11 -; AVX2-NEXT:    shldq %cl, %r9, %rdx -; AVX2-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rbp # 8-byte Reload -; AVX2-NEXT:    orq %r11, %rbp -; AVX2-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rbx # 8-byte Reload -; AVX2-NEXT:    orq %rdx, %rbx -; AVX2-NEXT:    movq 8(%rsp,%rsi), %rdx -; AVX2-NEXT:    movq 16(%rsp,%rsi), %r9 -; AVX2-NEXT:    movq %r9, %r11 -; AVX2-NEXT:    shldq %cl, %rdx, %r11 -; AVX2-NEXT:    shldq %cl, %r9, %rax -; AVX2-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r9 # 8-byte Reload -; AVX2-NEXT:    orq %r11, %r9 -; AVX2-NEXT:    movq (%rsp,%rsi), %rsi -; AVX2-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r11 # 8-byte Reload -; AVX2-NEXT:    orq %rax, %r11 -; AVX2-NEXT:    shlxq %r8, %rsi, %rax -; AVX2-NEXT:    shldq %cl, %rsi, %rdx -; AVX2-NEXT:    orq %rax, %r15 -; AVX2-NEXT:    orq %rdx, %r12 -; AVX2-NEXT:    orq %r10, %r13 -; AVX2-NEXT:    movq %r14, 48(%rdi) -; AVX2-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload -; AVX2-NEXT:    movq %rax, 56(%rdi) -; AVX2-NEXT:    movq %rbp, 32(%rdi) -; AVX2-NEXT:    movq %rbx, 40(%rdi) -; AVX2-NEXT:    movq %r9, 16(%rdi) -; AVX2-NEXT:    movq %r11, 24(%rdi) -; AVX2-NEXT:    movq %r15, (%rdi) -; AVX2-NEXT:    movq %r12, 8(%rdi) -; AVX2-NEXT:    sete %al -; AVX2-NEXT:    addq $200, %rsp +; AVX2-NEXT:    movq -8(%rsp,%r11), %rax +; AVX2-NEXT:    movq (%rsp,%r11), %rdx +; AVX2-NEXT:    movq %rdx, %rsi +; AVX2-NEXT:    shldq %cl, %rax, %rsi +; AVX2-NEXT:    shldq %cl, %rdx, %r9 +; AVX2-NEXT:    andnq 56(%rdi), %r12, %r12 +; AVX2-NEXT:    andnq 32(%rdi), %r14, %r14 +; AVX2-NEXT:    orq %r10, %r12 +; AVX2-NEXT:    orq %r8, %r14 +; AVX2-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rdx # 8-byte Reload +; AVX2-NEXT:    andnq 40(%rdi), %rdx, %rdx +; AVX2-NEXT:    orq %rbp, %rdx +; AVX2-NEXT:    shlxq %rcx, {{[-0-9]+}}(%r{{[sb]}}p), %r8 # 8-byte Folded Reload +; AVX2-NEXT:    movq -16(%rsp,%r11), %r10 +; AVX2-NEXT:    shlxq %rcx, %r10, %r11 +; AVX2-NEXT:    # kill: def $cl killed $cl killed $rcx +; AVX2-NEXT:    shldq %cl, %r10, %rax +; AVX2-NEXT:    andnq 16(%rdi), %r15, %rcx +; AVX2-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r10 # 8-byte Reload +; AVX2-NEXT:    andnq 24(%rdi), %r10, %r10 +; AVX2-NEXT:    orq %rsi, %rcx +; AVX2-NEXT:    orq %r9, %r10 +; AVX2-NEXT:    andnq (%rdi), %r8, %rsi +; AVX2-NEXT:    orq %r11, %rsi +; AVX2-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r8 # 8-byte Reload +; AVX2-NEXT:    andnq 8(%rdi), %r8, %r8 +; AVX2-NEXT:    orq %rax, %r8 +; AVX2-NEXT:    andl $60, %ebx +; AVX2-NEXT:    movl (%rdi,%rbx), %eax +; AVX2-NEXT:    movl {{[-0-9]+}}(%r{{[sb]}}p), %r9d # 4-byte Reload +; AVX2-NEXT:    btl %r9d, %eax +; AVX2-NEXT:    movq %r13, 48(%rdi) +; AVX2-NEXT:    movq %r12, 56(%rdi) +; AVX2-NEXT:    movq %r14, 32(%rdi) +; AVX2-NEXT:    movq %rdx, 40(%rdi) +; AVX2-NEXT:    movq %rcx, 16(%rdi) +; AVX2-NEXT:    movq %r10, 24(%rdi) +; AVX2-NEXT:    movq %rsi, (%rdi) +; AVX2-NEXT:    movq %r8, 8(%rdi) +; AVX2-NEXT:    setae %al +; AVX2-NEXT:    addq $184, %rsp  ; AVX2-NEXT:    popq %rbx  ; AVX2-NEXT:    popq %r12  ; AVX2-NEXT:    popq %r13 @@ -4121,39 +1540,41 @@ define i1 @init_eq_i512(ptr %word, i32 %position, i1 zeroext %value) nounwind {  ; AVX512-NEXT:    pushq %r13  ; AVX512-NEXT:    pushq %r12  ; AVX512-NEXT:    pushq %rbx -; AVX512-NEXT:    subq $184, %rsp +; AVX512-NEXT:    subq $168, %rsp  ; AVX512-NEXT:    vxorps %xmm0, %xmm0, %xmm0  ; AVX512-NEXT:    vmovups %ymm0, {{[0-9]+}}(%rsp)  ; AVX512-NEXT:    vmovups %ymm0, {{[0-9]+}}(%rsp)  ; AVX512-NEXT:    vmovups %ymm0, {{[0-9]+}}(%rsp)  ; AVX512-NEXT:    vmovaps {{.*#+}} xmm1 = [1,0,0,0]  ; AVX512-NEXT:    vmovups %ymm1, {{[0-9]+}}(%rsp) +; AVX512-NEXT:    movl %esi, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill  ; AVX512-NEXT:    movl %esi, %ecx  ; AVX512-NEXT:    andl $63, %ecx -; AVX512-NEXT:    shrl $3, %esi -; AVX512-NEXT:    andl $56, %esi -; AVX512-NEXT:    negl %esi -; AVX512-NEXT:    movslq %esi, %rsi -; AVX512-NEXT:    movq 128(%rsp,%rsi), %r10 -; AVX512-NEXT:    movq 136(%rsp,%rsi), %r12 -; AVX512-NEXT:    movq %r12, %rax -; AVX512-NEXT:    shldq %cl, %r10, %rax -; AVX512-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; AVX512-NEXT:    movq 160(%rsp,%rsi), %r14 -; AVX512-NEXT:    movq 168(%rsp,%rsi), %rax -; AVX512-NEXT:    shldq %cl, %r14, %rax +; AVX512-NEXT:    movl %esi, %r10d +; AVX512-NEXT:    shrl $3, %r10d +; AVX512-NEXT:    movl %r10d, %r8d +; AVX512-NEXT:    andl $56, %r8d +; AVX512-NEXT:    negl %r8d +; AVX512-NEXT:    movslq %r8d, %r9 +; AVX512-NEXT:    movq 112(%rsp,%r9), %r11 +; AVX512-NEXT:    movq 120(%rsp,%r9), %r14 +; AVX512-NEXT:    movq %r14, %rax +; AVX512-NEXT:    shldq %cl, %r11, %rax  ; AVX512-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; AVX512-NEXT:    movq 144(%rsp,%rsi), %r15 -; AVX512-NEXT:    movq 152(%rsp,%rsi), %r11 -; AVX512-NEXT:    movq %r11, %rbx +; AVX512-NEXT:    movq 104(%rsp,%r9), %rax +; AVX512-NEXT:    shldq %cl, %rax, %r11 +; AVX512-NEXT:    movq 128(%rsp,%r9), %r15 +; AVX512-NEXT:    movq 136(%rsp,%r9), %rbp +; AVX512-NEXT:    movq %rbp, %rbx  ; AVX512-NEXT:    shldq %cl, %r15, %rbx -; AVX512-NEXT:    movq 120(%rsp,%rsi), %rax +; AVX512-NEXT:    shldq %cl, %r14, %r15 +; AVX512-NEXT:    movq 144(%rsp,%r9), %r13 +; AVX512-NEXT:    movq 152(%rsp,%r9), %r12 +; AVX512-NEXT:    shldq %cl, %r13, %r12 +; AVX512-NEXT:    movq 96(%rsp,%r9), %r14 +; AVX512-NEXT:    shldq %cl, %rbp, %r13 +; AVX512-NEXT:    shldq %cl, %r14, %rax  ; AVX512-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; AVX512-NEXT:    shldq %cl, %rax, %r10 -; AVX512-NEXT:    shldq %cl, %r11, %r14 -; AVX512-NEXT:    movq %rdi, %r9 -; AVX512-NEXT:    movq 112(%rsp,%rsi), %r11 -; AVX512-NEXT:    shldq %cl, %r12, %r15  ; AVX512-NEXT:    movl %edx, %edx  ; AVX512-NEXT:    vxorps %xmm1, %xmm1, %xmm1  ; AVX512-NEXT:    vmovups %xmm1, {{[0-9]+}}(%rsp) @@ -4162,90 +1583,59 @@ define i1 @init_eq_i512(ptr %word, i32 %position, i1 zeroext %value) nounwind {  ; AVX512-NEXT:    vmovups %ymm0, -{{[0-9]+}}(%rsp)  ; AVX512-NEXT:    movq %rdx, -{{[0-9]+}}(%rsp)  ; AVX512-NEXT:    movq $0, {{[0-9]+}}(%rsp) -; AVX512-NEXT:    movq 16(%rdi), %r12 -; AVX512-NEXT:    movq 48(%rdi), %r13 -; AVX512-NEXT:    movq 32(%rdi), %rbp -; AVX512-NEXT:    andnq %rbp, %r15, %rax -; AVX512-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; AVX512-NEXT:    andq %r15, %rbp -; AVX512-NEXT:    andnq %r13, %r14, %rax -; AVX512-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; AVX512-NEXT:    andq %r14, %r13 -; AVX512-NEXT:    andnq %r12, %r10, %rax -; AVX512-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; AVX512-NEXT:    andq %r10, %r12 -; AVX512-NEXT:    movq 40(%rdi), %r8 -; AVX512-NEXT:    orq %r13, %r12 -; AVX512-NEXT:    andnq %r8, %rbx, %rdi -; AVX512-NEXT:    andq %rbx, %r8 -; AVX512-NEXT:    movq 56(%r9), %r13 -; AVX512-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rdx # 8-byte Reload -; AVX512-NEXT:    andnq %r13, %rdx, %r10 -; AVX512-NEXT:    andq %rdx, %r13 -; AVX512-NEXT:    movq 24(%r9), %rax -; AVX512-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rdx # 8-byte Reload -; AVX512-NEXT:    andnq %rax, %rdx, %r15 -; AVX512-NEXT:    andq %rdx, %rax -; AVX512-NEXT:    orq %r13, %rax -; AVX512-NEXT:    shlxq %rcx, %r11, %r13 -; AVX512-NEXT:    movq (%r9), %rdx -; AVX512-NEXT:    andnq %rdx, %r13, %r14 -; AVX512-NEXT:    andq %r13, %rdx -; AVX512-NEXT:    orq %rbp, %rdx -; AVX512-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rbp # 8-byte Reload -; AVX512-NEXT:    shldq %cl, %r11, %rbp -; AVX512-NEXT:    orq %r12, %rdx -; AVX512-NEXT:    movq 8(%r9), %r13 -; AVX512-NEXT:    andnq %r13, %rbp, %rbx -; AVX512-NEXT:    andq %rbp, %r13 -; AVX512-NEXT:    orq %r8, %r13 -; AVX512-NEXT:    movq 24(%rsp,%rsi), %r8 -; AVX512-NEXT:    orq %rax, %r13 -; AVX512-NEXT:    movq 32(%rsp,%rsi), %rax -; AVX512-NEXT:    movq %rax, %r12 -; AVX512-NEXT:    shldq %cl, %r8, %r12 -; AVX512-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r11 # 8-byte Reload -; AVX512-NEXT:    orq %r12, %r11 -; AVX512-NEXT:    movq 40(%rsp,%rsi), %r12 -; AVX512-NEXT:    shldq %cl, %rax, %r12 -; AVX512-NEXT:    orq %r12, %r10 -; AVX512-NEXT:    movq %r10, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; AVX512-NEXT:    movq 8(%rsp,%rsi), %rax -; AVX512-NEXT:    movq 16(%rsp,%rsi), %r12 -; AVX512-NEXT:    movq %r12, %rbp -; AVX512-NEXT:    shldq %cl, %rax, %rbp -; AVX512-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r10 # 8-byte Reload -; AVX512-NEXT:    orq %rbp, %r10 -; AVX512-NEXT:    shldq %cl, %r12, %r8 -; AVX512-NEXT:    orq %r8, %rdi -; AVX512-NEXT:    movq %rdi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; AVX512-NEXT:    movq -8(%rsp,%rsi), %r8 -; AVX512-NEXT:    movq (%rsp,%rsi), %r12 -; AVX512-NEXT:    movq %r12, %rbp +; AVX512-NEXT:    movq 8(%rsp,%r9), %r8 +; AVX512-NEXT:    movq 16(%rsp,%r9), %rax +; AVX512-NEXT:    movq %rax, %rbp  ; AVX512-NEXT:    shldq %cl, %r8, %rbp -; AVX512-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rdi # 8-byte Reload -; AVX512-NEXT:    orq %rbp, %rdi -; AVX512-NEXT:    movq -16(%rsp,%rsi), %rsi -; AVX512-NEXT:    shldq %cl, %r12, %rax -; AVX512-NEXT:    orq %rax, %r15 -; AVX512-NEXT:    shlxq %rcx, %rsi, %rax -; AVX512-NEXT:    # kill: def $cl killed $cl killed $rcx +; AVX512-NEXT:    andnq 48(%rdi), %r13, %r13 +; AVX512-NEXT:    orq %rbp, %r13 +; AVX512-NEXT:    movq 24(%rsp,%r9), %rbp +; AVX512-NEXT:    shldq %cl, %rax, %rbp +; AVX512-NEXT:    movq -8(%rsp,%r9), %rax +; AVX512-NEXT:    movq (%rsp,%r9), %rsi +; AVX512-NEXT:    movq %rsi, %rdx +; AVX512-NEXT:    shldq %cl, %rax, %rdx +; AVX512-NEXT:    andnq 56(%rdi), %r12, %r12 +; AVX512-NEXT:    orq %rbp, %r12 +; AVX512-NEXT:    andnq 32(%rdi), %r15, %r15 +; AVX512-NEXT:    orq %rdx, %r15  ; AVX512-NEXT:    shldq %cl, %rsi, %r8 -; AVX512-NEXT:    orq %rax, %r14 +; AVX512-NEXT:    movq -24(%rsp,%r9), %rdx +; AVX512-NEXT:    movq -16(%rsp,%r9), %rsi +; AVX512-NEXT:    movq %rsi, %rbp +; AVX512-NEXT:    shldq %cl, %rdx, %rbp +; AVX512-NEXT:    andnq 40(%rdi), %rbx, %rbx  ; AVX512-NEXT:    orq %r8, %rbx -; AVX512-NEXT:    orq %rdx, %r13 -; AVX512-NEXT:    movq %r11, 48(%r9) -; AVX512-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload -; AVX512-NEXT:    movq %rax, 56(%r9) -; AVX512-NEXT:    movq %r10, 32(%r9) +; AVX512-NEXT:    andnq 16(%rdi), %r11, %r8 +; AVX512-NEXT:    orq %rbp, %r8 +; AVX512-NEXT:    shlxq %rcx, %r14, %r11 +; AVX512-NEXT:    movq -32(%rsp,%r9), %r9 +; AVX512-NEXT:    shldq %cl, %rsi, %rax +; AVX512-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rsi # 8-byte Reload +; AVX512-NEXT:    andnq 24(%rdi), %rsi, %rsi +; AVX512-NEXT:    orq %rax, %rsi +; AVX512-NEXT:    shlxq %rcx, %r9, %rax +; AVX512-NEXT:    # kill: def $cl killed $cl killed $rcx +; AVX512-NEXT:    shldq %cl, %r9, %rdx +; AVX512-NEXT:    andnq (%rdi), %r11, %rcx +; AVX512-NEXT:    orq %rax, %rcx  ; AVX512-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload -; AVX512-NEXT:    movq %rax, 40(%r9) -; AVX512-NEXT:    movq %rdi, 16(%r9) -; AVX512-NEXT:    movq %r15, 24(%r9) -; AVX512-NEXT:    movq %r14, (%r9) -; AVX512-NEXT:    movq %rbx, 8(%r9) -; AVX512-NEXT:    sete %al -; AVX512-NEXT:    addq $184, %rsp +; AVX512-NEXT:    andnq 8(%rdi), %rax, %rax +; AVX512-NEXT:    orq %rdx, %rax +; AVX512-NEXT:    andl $60, %r10d +; AVX512-NEXT:    movl (%rdi,%r10), %edx +; AVX512-NEXT:    movl {{[-0-9]+}}(%r{{[sb]}}p), %r9d # 4-byte Reload +; AVX512-NEXT:    btl %r9d, %edx +; AVX512-NEXT:    movq %r13, 48(%rdi) +; AVX512-NEXT:    movq %r12, 56(%rdi) +; AVX512-NEXT:    movq %r15, 32(%rdi) +; AVX512-NEXT:    movq %rbx, 40(%rdi) +; AVX512-NEXT:    movq %r8, 16(%rdi) +; AVX512-NEXT:    movq %rsi, 24(%rdi) +; AVX512-NEXT:    movq %rcx, (%rdi) +; AVX512-NEXT:    movq %rax, 8(%rdi) +; AVX512-NEXT:    setae %al +; AVX512-NEXT:    addq $168, %rsp  ; AVX512-NEXT:    popq %rbx  ; AVX512-NEXT:    popq %r12  ; AVX512-NEXT:    popq %r13 @@ -4274,2749 +1664,25 @@ define i1 @init_eq_i512(ptr %word, i32 %position, i1 zeroext %value) nounwind {  define i1 @test_ne_i4096(ptr %word, i32 %position) nounwind {  ; X86-LABEL: test_ne_i4096:  ; X86:       # %bb.0: -; X86-NEXT:    pushl %ebp -; X86-NEXT:    movl %esp, %ebp -; X86-NEXT:    pushl %ebx -; X86-NEXT:    pushl %edi -; X86-NEXT:    pushl %esi -; X86-NEXT:    andl $-16, %esp -; X86-NEXT:    subl $1792, %esp # imm = 0x700 -; X86-NEXT:    movl 12(%ebp), %ebx -; X86-NEXT:    movl %ebx, %ecx -; X86-NEXT:    shrl $3, %ecx -; X86-NEXT:    andl $508, %ecx # imm = 0x1FC -; X86-NEXT:    leal {{[0-9]+}}(%esp), %esi -; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    subl %ecx, %esi -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $1, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl 248(%esi), %eax -; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    movl 252(%esi), %edx -; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    andl $31, %ebx -; X86-NEXT:    movl %ebx, %ecx -; X86-NEXT:    shldl %cl, %eax, %edx -; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    movl 504(%esi), %eax -; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    movl 508(%esi), %edx -; X86-NEXT:    shldl %cl, %eax, %edx -; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    movl 120(%esi), %eax -; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    movl 124(%esi), %edx -; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    shldl %cl, %eax, %edx -; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    movl 376(%esi), %eax -; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    movl 380(%esi), %edx -; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    shldl %cl, %eax, %edx -; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    movl 184(%esi), %eax -; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    movl 188(%esi), %edx -; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    shldl %cl, %eax, %edx -; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    movl 440(%esi), %edx -; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    movl 444(%esi), %eax -; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    shldl %cl, %edx, %eax -; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    movl 56(%esi), %edx -; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    movl 60(%esi), %eax -; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    shldl %cl, %edx, %eax -; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    movl 312(%esi), %edx -; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    movl 316(%esi), %eax -; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    shldl %cl, %edx, %eax -; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    movl 216(%esi), %edx -; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    movl 220(%esi), %eax -; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    shldl %cl, %edx, %eax -; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    movl 472(%esi), %edi -; X86-NEXT:    movl 476(%esi), %eax -; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    shldl %cl, %edi, %eax -; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    movl 88(%esi), %edx -; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    movl 92(%esi), %eax -; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    shldl %cl, %edx, %eax -; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    movl 344(%esi), %edx -; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    movl 348(%esi), %eax -; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    shldl %cl, %edx, %eax -; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    movl 152(%esi), %edx -; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    movl 156(%esi), %eax -; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    shldl %cl, %edx, %eax -; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    movl 408(%esi), %edx -; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    movl 412(%esi), %eax -; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    shldl %cl, %edx, %eax -; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    movl 24(%esi), %edx -; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    movl 28(%esi), %eax -; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    shldl %cl, %edx, %eax -; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    movl 280(%esi), %edx -; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    movl 284(%esi), %eax -; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    shldl %cl, %edx, %eax -; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    movl 232(%esi), %edx -; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    movl 236(%esi), %eax -; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    shldl %cl, %edx, %eax -; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    movl 488(%esi), %edx -; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    movl 492(%esi), %eax -; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    shldl %cl, %edx, %eax -; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    movl 104(%esi), %edx -; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    movl 108(%esi), %eax -; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    shldl %cl, %edx, %eax -; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    movl 360(%esi), %edx -; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    movl 364(%esi), %eax -; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    shldl %cl, %edx, %eax -; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    movl 168(%esi), %edx -; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    movl 172(%esi), %eax -; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    shldl %cl, %edx, %eax -; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    movl 424(%esi), %edx -; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    movl 428(%esi), %eax -; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    shldl %cl, %edx, %eax -; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    movl 40(%esi), %edx -; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    movl 44(%esi), %eax -; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    shldl %cl, %edx, %eax -; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    movl 296(%esi), %edx -; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    movl 300(%esi), %eax -; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    shldl %cl, %edx, %eax -; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    movl 200(%esi), %edx -; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    movl 204(%esi), %eax -; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    shldl %cl, %edx, %eax -; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    movl 456(%esi), %edx -; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    movl 460(%esi), %eax -; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    shldl %cl, %edx, %eax -; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    movl 72(%esi), %edx -; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    movl 76(%esi), %eax -; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    shldl %cl, %edx, %eax -; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    movl 328(%esi), %edx -; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    movl 332(%esi), %eax -; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    shldl %cl, %edx, %eax -; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    movl 136(%esi), %edx -; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    movl 140(%esi), %eax -; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    shldl %cl, %edx, %eax -; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    movl 392(%esi), %edx -; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    movl 396(%esi), %eax -; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    shldl %cl, %edx, %eax -; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    movl 8(%esi), %edx -; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    movl 12(%esi), %eax -; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    shldl %cl, %edx, %eax -; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    movl 264(%esi), %edx -; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    movl 268(%esi), %eax -; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    shldl %cl, %edx, %eax -; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    movl 240(%esi), %edx -; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    movl 244(%esi), %eax -; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    shldl %cl, %edx, %eax -; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    movl 496(%esi), %edx -; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    movl 500(%esi), %eax -; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    shldl %cl, %edx, %eax -; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    movl 112(%esi), %edx -; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    movl 116(%esi), %eax -; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    shldl %cl, %edx, %eax -; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    movl 368(%esi), %edx -; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    movl 372(%esi), %eax -; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    shldl %cl, %edx, %eax -; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    movl 176(%esi), %edx -; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    movl 180(%esi), %eax -; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    shldl %cl, %edx, %eax -; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    movl 432(%esi), %edx -; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    movl 436(%esi), %eax -; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    shldl %cl, %edx, %eax -; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    movl 48(%esi), %edx -; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    movl 52(%esi), %eax -; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    shldl %cl, %edx, %eax -; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    movl 304(%esi), %edx -; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    movl 308(%esi), %eax -; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    shldl %cl, %edx, %eax -; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    movl 208(%esi), %edx -; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    movl 212(%esi), %eax -; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    shldl %cl, %edx, %eax -; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    movl 464(%esi), %edx -; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    movl 468(%esi), %eax -; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    shldl %cl, %edx, %eax -; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    movl 80(%esi), %edx -; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    movl 84(%esi), %eax -; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    shldl %cl, %edx, %eax -; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    movl 336(%esi), %edx -; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    movl 340(%esi), %eax -; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    shldl %cl, %edx, %eax -; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    movl 144(%esi), %edx -; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    movl 148(%esi), %eax -; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    shldl %cl, %edx, %eax -; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    movl 400(%esi), %edx -; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    movl 404(%esi), %eax -; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    shldl %cl, %edx, %eax -; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    movl 16(%esi), %edx -; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    movl 20(%esi), %eax -; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    shldl %cl, %edx, %eax -; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    movl 272(%esi), %edx -; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    movl 276(%esi), %eax -; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    shldl %cl, %edx, %eax -; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    movl 224(%esi), %edx -; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    movl 228(%esi), %eax -; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    shldl %cl, %edx, %eax -; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    movl 480(%esi), %edx -; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    movl 484(%esi), %eax -; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    shldl %cl, %edx, %eax -; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    movl 96(%esi), %edx -; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    movl 100(%esi), %eax -; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    shldl %cl, %edx, %eax -; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    movl 352(%esi), %edx -; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    movl 356(%esi), %eax -; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    shldl %cl, %edx, %eax -; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    movl 160(%esi), %edx -; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    movl 164(%esi), %eax -; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    shldl %cl, %edx, %eax -; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    movl 416(%esi), %edx -; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    movl 420(%esi), %eax -; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    shldl %cl, %edx, %eax -; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    movl 32(%esi), %edx -; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    movl 36(%esi), %eax -; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    shldl %cl, %edx, %eax -; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    movl 288(%esi), %edx -; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    movl 292(%esi), %eax -; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    shldl %cl, %edx, %eax -; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    movl 192(%esi), %eax -; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    movl 196(%esi), %edx -; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    shldl %cl, %eax, %edx -; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    movl 448(%esi), %edx -; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    movl 452(%esi), %eax -; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    shldl %cl, %edx, %eax -; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    movl 64(%esi), %edx -; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    movl 68(%esi), %eax -; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    shldl %cl, %edx, %eax -; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    movl 320(%esi), %edx -; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    movl 324(%esi), %eax -; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    shldl %cl, %edx, %eax -; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    movl 128(%esi), %edx -; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    movl 132(%esi), %eax -; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    shldl %cl, %edx, %eax -; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    movl %edi, %edx -; X86-NEXT:    movl 256(%esi), %edi -; X86-NEXT:    movl 260(%esi), %eax -; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    shldl %cl, %edi, %eax -; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X86-NEXT:    shldl %cl, %edi, %eax -; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT:    shldl %cl, %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill -; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT:    shldl %cl, %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill -; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT:    shldl %cl, %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill -; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT:    shldl %cl, %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill -; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT:    shldl %cl, %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill -; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT:    shldl %cl, %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill -; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT:    shldl %cl, %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill -; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT:    shldl %cl, %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill -; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT:    shldl %cl, %eax, %edx -; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT:    shldl %cl, %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill -; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT:    shldl %cl, %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill -; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT:    shldl %cl, %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill -; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT:    shldl %cl, %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill -; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT:    shldl %cl, %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill -; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT:    shldl %cl, %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill -; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT:    shldl %cl, %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill -; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT:    shldl %cl, %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill -; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT:    shldl %cl, %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill -; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT:    shldl %cl, %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill -; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT:    shldl %cl, %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill -; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT:    shldl %cl, %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill -; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT:    shldl %cl, %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill -; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT:    shldl %cl, %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill -; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT:    shldl %cl, %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill -; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT:    shldl %cl, %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill -; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT:    shldl %cl, %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill -; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT:    shldl %cl, %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill -; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT:    shldl %cl, %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill -; X86-NEXT:    movl 388(%esi), %edx -; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    shldl %cl, %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill -; X86-NEXT:    movl 4(%esi), %edx -; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    shldl %cl, %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill -; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT:    shldl %cl, %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill -; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT:    shldl %cl, %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill -; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT:    shldl %cl, %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill -; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT:    shldl %cl, %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill -; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT:    shldl %cl, %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill -; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT:    shldl %cl, %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill -; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT:    shldl %cl, %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill -; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT:    shldl %cl, %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill -; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT:    shldl %cl, %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill -; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT:    shldl %cl, %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill -; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT:    shldl %cl, %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill -; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT:    shldl %cl, %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill -; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT:    shldl %cl, %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill -; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT:    shldl %cl, %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill -; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT:    shldl %cl, %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill -; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT:    shldl %cl, %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill -; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT:    shldl %cl, %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill -; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT:    shldl %cl, %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill -; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT:    shldl %cl, %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill -; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT:    shldl %cl, %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill -; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT:    shldl %cl, %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill -; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT:    shldl %cl, %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill -; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT:    shldl %cl, %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill -; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT:    shldl %cl, %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill -; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT:    shldl %cl, %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill -; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT:    shldl %cl, %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill -; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT:    shldl %cl, %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill -; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT:    shldl %cl, %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill -; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT:    shldl %cl, %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill -; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT:    shldl %cl, %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill -; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT:    shrdl $1, %eax, %edi -; X86-NEXT:    shrl %eax -; X86-NEXT:    movl %ebx, %edx -; X86-NEXT:    movl %eax, %ebx -; X86-NEXT:    notb %cl -; X86-NEXT:    shrdl %cl, %eax, %edi -; X86-NEXT:    shrl %cl, %ebx -; X86-NEXT:    movb $32, %cl -; X86-NEXT:    testb %cl, %cl -; X86-NEXT:    movl (%esi), %eax -; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    movl 8(%ebp), %eax -; X86-NEXT:    jne .LBB20_2 -; X86-NEXT:  # %bb.1: -; X86-NEXT:    movl %edi, %ebx -; X86-NEXT:  .LBB20_2: -; X86-NEXT:    movl %edx, %ecx -; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    # kill: def $cl killed $cl killed $ecx -; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X86-NEXT:    shll %cl, %edx -; X86-NEXT:    orl %ebx, %edx -; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X86-NEXT:    andl 320(%eax), %ecx -; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X86-NEXT:    andl 64(%eax), %esi -; X86-NEXT:    orl %ecx, %esi -; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X86-NEXT:    andl 448(%eax), %edx -; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X86-NEXT:    andl 192(%eax), %ecx -; X86-NEXT:    orl %edx, %ecx -; X86-NEXT:    orl %esi, %ecx -; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X86-NEXT:    andl 288(%eax), %ecx -; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X86-NEXT:    andl 32(%eax), %edx -; X86-NEXT:    orl %ecx, %edx -; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X86-NEXT:    andl 416(%eax), %ecx -; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X86-NEXT:    andl 160(%eax), %edi -; X86-NEXT:    orl %ecx, %edi -; X86-NEXT:    orl %edx, %edi -; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X86-NEXT:    andl 352(%eax), %ecx -; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X86-NEXT:    andl 96(%eax), %esi -; X86-NEXT:    orl %ecx, %esi -; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X86-NEXT:    andl 480(%eax), %edx -; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X86-NEXT:    andl 224(%eax), %ecx -; X86-NEXT:    orl %edx, %ecx -; X86-NEXT:    orl %esi, %ecx -; X86-NEXT:    orl %edi, %ecx -; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X86-NEXT:    andl 272(%eax), %ecx -; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X86-NEXT:    andl 16(%eax), %edx -; X86-NEXT:    orl %ecx, %edx -; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X86-NEXT:    andl 400(%eax), %ecx -; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X86-NEXT:    andl 144(%eax), %esi -; X86-NEXT:    orl %ecx, %esi -; X86-NEXT:    orl %edx, %esi -; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X86-NEXT:    andl 336(%eax), %ecx -; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X86-NEXT:    andl 80(%eax), %edx -; X86-NEXT:    orl %ecx, %edx -; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X86-NEXT:    andl 464(%eax), %ecx -; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X86-NEXT:    andl 208(%eax), %edi -; X86-NEXT:    orl %ecx, %edi -; X86-NEXT:    orl %edx, %edi -; X86-NEXT:    orl %esi, %edi -; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X86-NEXT:    andl 304(%eax), %ecx -; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X86-NEXT:    andl 48(%eax), %edx -; X86-NEXT:    orl %ecx, %edx -; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X86-NEXT:    andl 432(%eax), %ecx -; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X86-NEXT:    andl 176(%eax), %esi -; X86-NEXT:    orl %ecx, %esi -; X86-NEXT:    orl %edx, %esi -; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X86-NEXT:    andl 368(%eax), %ecx -; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X86-NEXT:    andl 112(%eax), %edx -; X86-NEXT:    orl %ecx, %edx -; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X86-NEXT:    andl 496(%eax), %ecx -; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload -; X86-NEXT:    andl 240(%eax), %ebx -; X86-NEXT:    orl %ecx, %ebx -; X86-NEXT:    orl %edx, %ebx -; X86-NEXT:    orl %esi, %ebx -; X86-NEXT:    orl %edi, %ebx -; X86-NEXT:    movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X86-NEXT:    andl 264(%eax), %ecx -; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X86-NEXT:    andl 8(%eax), %edx -; X86-NEXT:    orl %ecx, %edx -; X86-NEXT:    movl %eax, %ebx -; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT:    andl 392(%ebx), %eax -; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X86-NEXT:    andl 136(%ebx), %ecx -; X86-NEXT:    orl %eax, %ecx -; X86-NEXT:    orl %edx, %ecx -; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X86-NEXT:    andl 328(%ebx), %edx -; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT:    andl 72(%ebx), %eax -; X86-NEXT:    orl %edx, %eax -; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X86-NEXT:    andl 456(%ebx), %edi -; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X86-NEXT:    andl 200(%ebx), %esi -; X86-NEXT:    orl %edi, %esi -; X86-NEXT:    orl %eax, %esi -; X86-NEXT:    orl %ecx, %esi -; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X86-NEXT:    andl 296(%ebx), %edi -; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT:    andl 40(%ebx), %eax -; X86-NEXT:    orl %edi, %eax -; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X86-NEXT:    andl 424(%ebx), %edi -; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X86-NEXT:    andl 168(%ebx), %edx -; X86-NEXT:    orl %edi, %edx -; X86-NEXT:    orl %eax, %edx -; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X86-NEXT:    andl 360(%ebx), %ecx -; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT:    andl 104(%ebx), %eax -; X86-NEXT:    orl %ecx, %eax -; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X86-NEXT:    andl 488(%ebx), %ecx -; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X86-NEXT:    andl 232(%ebx), %edi -; X86-NEXT:    orl %ecx, %edi -; X86-NEXT:    orl %eax, %edi -; X86-NEXT:    orl %edx, %edi -; X86-NEXT:    orl %esi, %edi -; X86-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT:    andl 280(%ebx), %eax -; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X86-NEXT:    andl 24(%ebx), %ecx -; X86-NEXT:    orl %eax, %ecx -; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT:    andl 408(%ebx), %eax -; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X86-NEXT:    andl 152(%ebx), %edx -; X86-NEXT:    orl %eax, %edx -; X86-NEXT:    orl %ecx, %edx -; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT:    andl 344(%ebx), %eax -; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X86-NEXT:    andl 88(%ebx), %ecx -; X86-NEXT:    orl %eax, %ecx -; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT:    andl 472(%ebx), %eax -; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X86-NEXT:    andl 216(%ebx), %esi -; X86-NEXT:    orl %eax, %esi -; X86-NEXT:    orl %ecx, %esi -; X86-NEXT:    orl %edx, %esi -; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT:    andl 312(%ebx), %eax -; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X86-NEXT:    andl 56(%ebx), %ecx -; X86-NEXT:    orl %eax, %ecx -; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT:    andl 440(%ebx), %eax -; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X86-NEXT:    andl 184(%ebx), %edx -; X86-NEXT:    orl %eax, %edx -; X86-NEXT:    orl %ecx, %edx -; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT:    andl 376(%ebx), %eax -; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X86-NEXT:    andl 120(%ebx), %ecx -; X86-NEXT:    orl %eax, %ecx -; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT:    andl 504(%ebx), %eax -; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X86-NEXT:    andl 248(%ebx), %edi -; X86-NEXT:    orl %eax, %edi -; X86-NEXT:    orl %ecx, %edi -; X86-NEXT:    orl %edx, %edi -; X86-NEXT:    orl %esi, %edi -; X86-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload -; X86-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT:    andl 324(%ebx), %eax -; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X86-NEXT:    andl 68(%ebx), %ecx -; X86-NEXT:    orl %eax, %ecx -; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT:    andl 452(%ebx), %eax -; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X86-NEXT:    andl 196(%ebx), %edx -; X86-NEXT:    orl %eax, %edx -; X86-NEXT:    orl %ecx, %edx -; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT:    andl 292(%ebx), %eax -; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X86-NEXT:    andl 36(%ebx), %ecx -; X86-NEXT:    orl %eax, %ecx -; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT:    andl 420(%ebx), %eax -; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X86-NEXT:    andl 164(%ebx), %edx -; X86-NEXT:    orl %eax, %edx -; X86-NEXT:    orl %ecx, %edx -; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT:    andl 356(%ebx), %eax -; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X86-NEXT:    andl 100(%ebx), %ecx -; X86-NEXT:    orl %eax, %ecx -; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT:    andl 484(%ebx), %eax -; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X86-NEXT:    andl 228(%ebx), %esi -; X86-NEXT:    orl %eax, %esi -; X86-NEXT:    orl %ecx, %esi -; X86-NEXT:    orl %edx, %esi -; X86-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT:    andl 276(%ebx), %eax -; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X86-NEXT:    andl 20(%ebx), %ecx -; X86-NEXT:    orl %eax, %ecx -; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT:    andl 404(%ebx), %eax -; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X86-NEXT:    andl 148(%ebx), %edx -; X86-NEXT:    orl %eax, %edx -; X86-NEXT:    orl %ecx, %edx -; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT:    andl 340(%ebx), %eax -; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X86-NEXT:    andl 84(%ebx), %ecx -; X86-NEXT:    orl %eax, %ecx -; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT:    andl 468(%ebx), %eax -; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X86-NEXT:    andl 212(%ebx), %esi -; X86-NEXT:    orl %eax, %esi -; X86-NEXT:    orl %ecx, %esi -; X86-NEXT:    orl %edx, %esi -; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT:    andl 308(%ebx), %eax -; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X86-NEXT:    andl 52(%ebx), %ecx -; X86-NEXT:    orl %eax, %ecx -; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT:    andl 436(%ebx), %eax -; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X86-NEXT:    andl 180(%ebx), %edx -; X86-NEXT:    orl %eax, %edx -; X86-NEXT:    orl %ecx, %edx -; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT:    andl 372(%ebx), %eax -; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X86-NEXT:    andl 116(%ebx), %ecx -; X86-NEXT:    orl %eax, %ecx -; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT:    andl 500(%ebx), %eax -; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X86-NEXT:    andl 244(%ebx), %edi -; X86-NEXT:    orl %eax, %edi -; X86-NEXT:    orl %ecx, %edi -; X86-NEXT:    orl %edx, %edi -; X86-NEXT:    orl %esi, %edi -; X86-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT:    andl 268(%ebx), %eax -; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X86-NEXT:    andl 12(%ebx), %ecx -; X86-NEXT:    orl %eax, %ecx -; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT:    andl 396(%ebx), %eax -; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X86-NEXT:    andl 140(%ebx), %edx -; X86-NEXT:    orl %eax, %edx -; X86-NEXT:    orl %ecx, %edx -; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT:    andl 332(%ebx), %eax -; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X86-NEXT:    andl 76(%ebx), %ecx -; X86-NEXT:    orl %eax, %ecx -; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT:    andl 460(%ebx), %eax -; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X86-NEXT:    andl 204(%ebx), %edi -; X86-NEXT:    orl %eax, %edi -; X86-NEXT:    orl %ecx, %edi -; X86-NEXT:    orl %edx, %edi -; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT:    andl 300(%ebx), %eax -; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X86-NEXT:    andl 44(%ebx), %ecx -; X86-NEXT:    orl %eax, %ecx -; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT:    andl 428(%ebx), %eax -; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X86-NEXT:    andl 172(%ebx), %edx -; X86-NEXT:    orl %eax, %edx -; X86-NEXT:    orl %ecx, %edx -; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT:    andl 364(%ebx), %eax -; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X86-NEXT:    andl 108(%ebx), %ecx -; X86-NEXT:    orl %eax, %ecx -; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT:    andl 492(%ebx), %eax -; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X86-NEXT:    andl 236(%ebx), %esi -; X86-NEXT:    orl %eax, %esi -; X86-NEXT:    orl %ecx, %esi -; X86-NEXT:    orl %edx, %esi -; X86-NEXT:    orl %edi, %esi -; X86-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT:    andl 284(%ebx), %eax -; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X86-NEXT:    andl 28(%ebx), %ecx -; X86-NEXT:    orl %eax, %ecx -; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT:    andl 412(%ebx), %eax -; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X86-NEXT:    andl 156(%ebx), %edi -; X86-NEXT:    orl %eax, %edi -; X86-NEXT:    orl %ecx, %edi -; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT:    andl 348(%ebx), %eax -; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X86-NEXT:    andl 92(%ebx), %edx -; X86-NEXT:    orl %eax, %edx -; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X86-NEXT:    andl 476(%ebx), %ecx -; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT:    andl 220(%ebx), %eax -; X86-NEXT:    orl %ecx, %eax -; X86-NEXT:    orl %edx, %eax -; X86-NEXT:    orl %edi, %eax -; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X86-NEXT:    andl 316(%ebx), %ecx -; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X86-NEXT:    andl 60(%ebx), %edx -; X86-NEXT:    orl %ecx, %edx -; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X86-NEXT:    andl 444(%ebx), %ecx -; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X86-NEXT:    andl 188(%ebx), %edi -; X86-NEXT:    orl %ecx, %edi -; X86-NEXT:    orl %edx, %edi -; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X86-NEXT:    andl 380(%ebx), %ecx -; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X86-NEXT:    andl 124(%ebx), %edx -; X86-NEXT:    orl %ecx, %edx -; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X86-NEXT:    andl 508(%ebx), %ecx -; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload -; X86-NEXT:    movl 8(%ebp), %esi -; X86-NEXT:    andl 252(%esi), %ebx -; X86-NEXT:    orl %ecx, %ebx -; X86-NEXT:    orl %edx, %ebx -; X86-NEXT:    orl %edi, %ebx -; X86-NEXT:    orl %eax, %ebx -; X86-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload -; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X86-NEXT:    negl %ecx -; X86-NEXT:    movl 1648(%esp,%ecx), %edi -; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X86-NEXT:    shldl %cl, %edi, %esi -; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT:    shldl %cl, %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill -; X86-NEXT:    # kill: def $cl killed $cl killed $ecx -; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X86-NEXT:    shldl %cl, %edx, %edi -; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X86-NEXT:    movl 8(%ebp), %edx -; X86-NEXT:    andl 128(%edx), %ecx -; X86-NEXT:    andl 384(%edx), %edi -; X86-NEXT:    orl %ecx, %edi -; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X86-NEXT:    # kill: def $cl killed $cl killed $ecx -; X86-NEXT:    shll %cl, %eax -; X86-NEXT:    andl (%edx), %eax -; X86-NEXT:    orl %eax, %edi -; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT:    andl 256(%edx), %eax -; X86-NEXT:    orl %eax, %edi -; X86-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload -; X86-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload -; X86-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload -; X86-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload -; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT:    andl 260(%edx), %eax -; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X86-NEXT:    andl 4(%edx), %ecx -; X86-NEXT:    orl %eax, %ecx -; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT:    andl 132(%edx), %eax -; X86-NEXT:    andl 388(%edx), %esi -; X86-NEXT:    orl %eax, %esi -; X86-NEXT:    orl %ecx, %esi -; X86-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload -; X86-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload -; X86-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload -; X86-NEXT:    orl %ebx, %esi -; X86-NEXT:    orl %edi, %esi -; X86-NEXT:    setne %al -; X86-NEXT:    leal -12(%ebp), %esp -; X86-NEXT:    popl %esi -; X86-NEXT:    popl %edi -; X86-NEXT:    popl %ebx -; X86-NEXT:    popl %ebp +; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax +; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT:    movl %ecx, %edx +; X86-NEXT:    andl $4064, %edx # imm = 0xFE0 +; X86-NEXT:    shrl $3, %edx +; X86-NEXT:    movl (%eax,%edx), %eax +; X86-NEXT:    btl %ecx, %eax +; X86-NEXT:    setb %al  ; X86-NEXT:    retl  ; -; SSE-LABEL: test_ne_i4096: -; SSE:       # %bb.0: -; SSE-NEXT:    pushq %rbp -; SSE-NEXT:    pushq %r15 -; SSE-NEXT:    pushq %r14 -; SSE-NEXT:    pushq %r13 -; SSE-NEXT:    pushq %r12 -; SSE-NEXT:    pushq %rbx -; SSE-NEXT:    subq $1576, %rsp # imm = 0x628 -; SSE-NEXT:    movl %esi, %ecx -; SSE-NEXT:    movl %esi, %eax -; SSE-NEXT:    andl $4032, %eax # imm = 0xFC0 -; SSE-NEXT:    xorps %xmm0, %xmm0 -; SSE-NEXT:    movups %xmm0, {{[0-9]+}}(%rsp) -; SSE-NEXT:    movups %xmm0, {{[0-9]+}}(%rsp) -; SSE-NEXT:    movups %xmm0, {{[0-9]+}}(%rsp) -; SSE-NEXT:    movups %xmm0, {{[0-9]+}}(%rsp) -; SSE-NEXT:    movups %xmm0, {{[0-9]+}}(%rsp) -; SSE-NEXT:    movups %xmm0, {{[0-9]+}}(%rsp) -; SSE-NEXT:    movups %xmm0, {{[0-9]+}}(%rsp) -; SSE-NEXT:    movups %xmm0, {{[0-9]+}}(%rsp) -; SSE-NEXT:    movups %xmm0, {{[0-9]+}}(%rsp) -; SSE-NEXT:    movups %xmm0, {{[0-9]+}}(%rsp) -; SSE-NEXT:    movups %xmm0, {{[0-9]+}}(%rsp) -; SSE-NEXT:    movups %xmm0, {{[0-9]+}}(%rsp) -; SSE-NEXT:    movups %xmm0, {{[0-9]+}}(%rsp) -; SSE-NEXT:    movups %xmm0, {{[0-9]+}}(%rsp) -; SSE-NEXT:    movups %xmm0, {{[0-9]+}}(%rsp) -; SSE-NEXT:    movups %xmm0, {{[0-9]+}}(%rsp) -; SSE-NEXT:    movups %xmm0, {{[0-9]+}}(%rsp) -; SSE-NEXT:    movups %xmm0, {{[0-9]+}}(%rsp) -; SSE-NEXT:    movups %xmm0, {{[0-9]+}}(%rsp) -; SSE-NEXT:    movups %xmm0, {{[0-9]+}}(%rsp) -; SSE-NEXT:    movups %xmm0, {{[0-9]+}}(%rsp) -; SSE-NEXT:    movups %xmm0, {{[0-9]+}}(%rsp) -; SSE-NEXT:    movups %xmm0, {{[0-9]+}}(%rsp) -; SSE-NEXT:    movups %xmm0, {{[0-9]+}}(%rsp) -; SSE-NEXT:    movups %xmm0, {{[0-9]+}}(%rsp) -; SSE-NEXT:    movups %xmm0, {{[0-9]+}}(%rsp) -; SSE-NEXT:    movups %xmm0, {{[0-9]+}}(%rsp) -; SSE-NEXT:    movups %xmm0, {{[0-9]+}}(%rsp) -; SSE-NEXT:    movups %xmm0, {{[0-9]+}}(%rsp) -; SSE-NEXT:    movups %xmm0, {{[0-9]+}}(%rsp) -; SSE-NEXT:    movups %xmm0, {{[0-9]+}}(%rsp) -; SSE-NEXT:    movaps %xmm0, {{[0-9]+}}(%rsp) -; SSE-NEXT:    movaps %xmm0, {{[0-9]+}}(%rsp) -; SSE-NEXT:    movaps %xmm0, {{[0-9]+}}(%rsp) -; SSE-NEXT:    movaps %xmm0, {{[0-9]+}}(%rsp) -; SSE-NEXT:    movaps %xmm0, {{[0-9]+}}(%rsp) -; SSE-NEXT:    movaps %xmm0, {{[0-9]+}}(%rsp) -; SSE-NEXT:    movaps %xmm0, {{[0-9]+}}(%rsp) -; SSE-NEXT:    movaps %xmm0, {{[0-9]+}}(%rsp) -; SSE-NEXT:    movaps %xmm0, {{[0-9]+}}(%rsp) -; SSE-NEXT:    movaps %xmm0, {{[0-9]+}}(%rsp) -; SSE-NEXT:    movaps %xmm0, {{[0-9]+}}(%rsp) -; SSE-NEXT:    movaps %xmm0, {{[0-9]+}}(%rsp) -; SSE-NEXT:    movaps %xmm0, {{[0-9]+}}(%rsp) -; SSE-NEXT:    movaps %xmm0, {{[0-9]+}}(%rsp) -; SSE-NEXT:    movaps %xmm0, {{[0-9]+}}(%rsp) -; SSE-NEXT:    movaps %xmm0, {{[0-9]+}}(%rsp) -; SSE-NEXT:    movaps %xmm0, {{[0-9]+}}(%rsp) -; SSE-NEXT:    movaps %xmm0, {{[0-9]+}}(%rsp) -; SSE-NEXT:    movaps %xmm0, {{[0-9]+}}(%rsp) -; SSE-NEXT:    movaps %xmm0, {{[0-9]+}}(%rsp) -; SSE-NEXT:    movaps %xmm0, {{[0-9]+}}(%rsp) -; SSE-NEXT:    movaps %xmm0, {{[0-9]+}}(%rsp) -; SSE-NEXT:    movaps %xmm0, {{[0-9]+}}(%rsp) -; SSE-NEXT:    movaps %xmm0, {{[0-9]+}}(%rsp) -; SSE-NEXT:    movaps %xmm0, {{[0-9]+}}(%rsp) -; SSE-NEXT:    movaps %xmm0, {{[0-9]+}}(%rsp) -; SSE-NEXT:    movaps %xmm0, {{[0-9]+}}(%rsp) -; SSE-NEXT:    movaps %xmm0, {{[0-9]+}}(%rsp) -; SSE-NEXT:    movaps %xmm0, {{[0-9]+}}(%rsp) -; SSE-NEXT:    movaps %xmm0, {{[0-9]+}}(%rsp) -; SSE-NEXT:    movaps %xmm0, {{[0-9]+}}(%rsp) -; SSE-NEXT:    movaps %xmm0, {{[0-9]+}}(%rsp) -; SSE-NEXT:    movq $0, {{[0-9]+}}(%rsp) -; SSE-NEXT:    movq $1, {{[0-9]+}}(%rsp) -; SSE-NEXT:    andl $63, %ecx -; SSE-NEXT:    shrl $3, %eax -; SSE-NEXT:    negl %eax -; SSE-NEXT:    movslq %eax, %rsi -; SSE-NEXT:    movq 1296(%rsp,%rsi), %rdx -; SSE-NEXT:    movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; SSE-NEXT:    movq 1304(%rsp,%rsi), %rax -; SSE-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; SSE-NEXT:    shldq %cl, %rdx, %rax -; SSE-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; SSE-NEXT:    movq 1552(%rsp,%rsi), %rdx -; SSE-NEXT:    movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; SSE-NEXT:    movq 1560(%rsp,%rsi), %rax -; SSE-NEXT:    shldq %cl, %rdx, %rax -; SSE-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; SSE-NEXT:    movq 1168(%rsp,%rsi), %rdx -; SSE-NEXT:    movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; SSE-NEXT:    movq 1176(%rsp,%rsi), %rax -; SSE-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; SSE-NEXT:    shldq %cl, %rdx, %rax -; SSE-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; SSE-NEXT:    movq 1424(%rsp,%rsi), %rdx -; SSE-NEXT:    movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; SSE-NEXT:    movq 1432(%rsp,%rsi), %rax -; SSE-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; SSE-NEXT:    shldq %cl, %rdx, %rax -; SSE-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; SSE-NEXT:    movq 1232(%rsp,%rsi), %rdx -; SSE-NEXT:    movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; SSE-NEXT:    movq 1240(%rsp,%rsi), %rax -; SSE-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; SSE-NEXT:    shldq %cl, %rdx, %rax -; SSE-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; SSE-NEXT:    movq 1488(%rsp,%rsi), %rdx -; SSE-NEXT:    movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; SSE-NEXT:    movq 1496(%rsp,%rsi), %rax -; SSE-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; SSE-NEXT:    shldq %cl, %rdx, %rax -; SSE-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; SSE-NEXT:    movq 1104(%rsp,%rsi), %rdx -; SSE-NEXT:    movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; SSE-NEXT:    movq 1112(%rsp,%rsi), %rax -; SSE-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; SSE-NEXT:    shldq %cl, %rdx, %rax -; SSE-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; SSE-NEXT:    movq 1360(%rsp,%rsi), %rdx -; SSE-NEXT:    movq %rdx, (%rsp) # 8-byte Spill -; SSE-NEXT:    movq 1368(%rsp,%rsi), %rax -; SSE-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; SSE-NEXT:    shldq %cl, %rdx, %rax -; SSE-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; SSE-NEXT:    movq 1264(%rsp,%rsi), %rdx -; SSE-NEXT:    movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; SSE-NEXT:    movq 1272(%rsp,%rsi), %rax -; SSE-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; SSE-NEXT:    shldq %cl, %rdx, %rax -; SSE-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; SSE-NEXT:    movq 1520(%rsp,%rsi), %rdx -; SSE-NEXT:    movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; SSE-NEXT:    movq 1528(%rsp,%rsi), %rax -; SSE-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; SSE-NEXT:    shldq %cl, %rdx, %rax -; SSE-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; SSE-NEXT:    movq 1136(%rsp,%rsi), %rdx -; SSE-NEXT:    movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; SSE-NEXT:    movq 1144(%rsp,%rsi), %rax -; SSE-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; SSE-NEXT:    shldq %cl, %rdx, %rax -; SSE-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; SSE-NEXT:    movq 1392(%rsp,%rsi), %rdx -; SSE-NEXT:    movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; SSE-NEXT:    movq 1400(%rsp,%rsi), %rax -; SSE-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; SSE-NEXT:    shldq %cl, %rdx, %rax -; SSE-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; SSE-NEXT:    movq 1200(%rsp,%rsi), %rdx -; SSE-NEXT:    movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; SSE-NEXT:    movq 1208(%rsp,%rsi), %rax -; SSE-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; SSE-NEXT:    shldq %cl, %rdx, %rax -; SSE-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; SSE-NEXT:    movq 1456(%rsp,%rsi), %rdx -; SSE-NEXT:    movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; SSE-NEXT:    movq 1464(%rsp,%rsi), %rax -; SSE-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; SSE-NEXT:    shldq %cl, %rdx, %rax -; SSE-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; SSE-NEXT:    movq 1072(%rsp,%rsi), %rdx -; SSE-NEXT:    movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; SSE-NEXT:    movq 1080(%rsp,%rsi), %rax -; SSE-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; SSE-NEXT:    shldq %cl, %rdx, %rax -; SSE-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; SSE-NEXT:    movq 1328(%rsp,%rsi), %rdx -; SSE-NEXT:    movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; SSE-NEXT:    movq 1336(%rsp,%rsi), %rax -; SSE-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; SSE-NEXT:    shldq %cl, %rdx, %rax -; SSE-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; SSE-NEXT:    movq 1280(%rsp,%rsi), %rdx -; SSE-NEXT:    movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; SSE-NEXT:    movq 1288(%rsp,%rsi), %rax -; SSE-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; SSE-NEXT:    shldq %cl, %rdx, %rax -; SSE-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; SSE-NEXT:    movq 1536(%rsp,%rsi), %rdx -; SSE-NEXT:    movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; SSE-NEXT:    movq 1544(%rsp,%rsi), %rax -; SSE-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; SSE-NEXT:    shldq %cl, %rdx, %rax -; SSE-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; SSE-NEXT:    movq 1152(%rsp,%rsi), %rdx -; SSE-NEXT:    movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; SSE-NEXT:    movq 1160(%rsp,%rsi), %rax -; SSE-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; SSE-NEXT:    shldq %cl, %rdx, %rax -; SSE-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; SSE-NEXT:    movq 1408(%rsp,%rsi), %rdx -; SSE-NEXT:    movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; SSE-NEXT:    movq 1416(%rsp,%rsi), %rax -; SSE-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; SSE-NEXT:    shldq %cl, %rdx, %rax -; SSE-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; SSE-NEXT:    movq 1216(%rsp,%rsi), %r11 -; SSE-NEXT:    movq 1224(%rsp,%rsi), %rax -; SSE-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; SSE-NEXT:    shldq %cl, %r11, %rax -; SSE-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; SSE-NEXT:    movq 1472(%rsp,%rsi), %rdx -; SSE-NEXT:    movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; SSE-NEXT:    movq 1480(%rsp,%rsi), %rax -; SSE-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; SSE-NEXT:    shldq %cl, %rdx, %rax -; SSE-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; SSE-NEXT:    movq 1088(%rsp,%rsi), %r9 -; SSE-NEXT:    movq 1096(%rsp,%rsi), %rax -; SSE-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; SSE-NEXT:    shldq %cl, %r9, %rax -; SSE-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; SSE-NEXT:    movq 1344(%rsp,%rsi), %rdx -; SSE-NEXT:    movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; SSE-NEXT:    movq 1352(%rsp,%rsi), %rax -; SSE-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; SSE-NEXT:    shldq %cl, %rdx, %rax -; SSE-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; SSE-NEXT:    movq 1248(%rsp,%rsi), %rax -; SSE-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; SSE-NEXT:    movq 1256(%rsp,%rsi), %rdx -; SSE-NEXT:    movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; SSE-NEXT:    shldq %cl, %rax, %rdx -; SSE-NEXT:    movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; SSE-NEXT:    movq 1504(%rsp,%rsi), %rdx -; SSE-NEXT:    movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; SSE-NEXT:    movq 1512(%rsp,%rsi), %rax -; SSE-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; SSE-NEXT:    shldq %cl, %rdx, %rax -; SSE-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; SSE-NEXT:    movq 1120(%rsp,%rsi), %rax -; SSE-NEXT:    movq 1128(%rsp,%rsi), %r8 -; SSE-NEXT:    movq %r8, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; SSE-NEXT:    shldq %cl, %rax, %r8 -; SSE-NEXT:    movq %r8, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; SSE-NEXT:    movq 1376(%rsp,%rsi), %r13 -; SSE-NEXT:    movq 1384(%rsp,%rsi), %rbx -; SSE-NEXT:    movq %rbx, %r8 -; SSE-NEXT:    shldq %cl, %r13, %r8 -; SSE-NEXT:    movq %r8, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; SSE-NEXT:    movq 1184(%rsp,%rsi), %rdx -; SSE-NEXT:    movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; SSE-NEXT:    movq 1192(%rsp,%rsi), %r15 -; SSE-NEXT:    movq %r15, %r14 -; SSE-NEXT:    shldq %cl, %rdx, %r14 -; SSE-NEXT:    movq %r14, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; SSE-NEXT:    movq 1440(%rsp,%rsi), %r10 -; SSE-NEXT:    movq 1448(%rsp,%rsi), %rdx -; SSE-NEXT:    movq %rdx, %r14 -; SSE-NEXT:    shldq %cl, %r10, %r14 -; SSE-NEXT:    movq %r14, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; SSE-NEXT:    movq 1312(%rsp,%rsi), %r14 -; SSE-NEXT:    movq %r14, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; SSE-NEXT:    movq 1320(%rsp,%rsi), %rbp -; SSE-NEXT:    movq %rbp, %r12 -; SSE-NEXT:    shldq %cl, %r14, %r12 -; SSE-NEXT:    movq %r12, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; SSE-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r8 # 8-byte Reload -; SSE-NEXT:    shldq %cl, %r8, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Folded Spill -; SSE-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r8 # 8-byte Reload -; SSE-NEXT:    shldq %cl, %r8, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Folded Spill -; SSE-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r8 # 8-byte Reload -; SSE-NEXT:    shldq %cl, %r8, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Folded Spill -; SSE-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r8 # 8-byte Reload -; SSE-NEXT:    shldq %cl, %r8, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Folded Spill -; SSE-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r8 # 8-byte Reload -; SSE-NEXT:    shldq %cl, %r8, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Folded Spill -; SSE-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r8 # 8-byte Reload -; SSE-NEXT:    shldq %cl, %r8, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Folded Spill -; SSE-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r8 # 8-byte Reload -; SSE-NEXT:    shldq %cl, %r8, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Folded Spill -; SSE-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r8 # 8-byte Reload -; SSE-NEXT:    shldq %cl, %r8, (%rsp) # 8-byte Folded Spill -; SSE-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r8 # 8-byte Reload -; SSE-NEXT:    shldq %cl, %r8, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Folded Spill -; SSE-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r8 # 8-byte Reload -; SSE-NEXT:    shldq %cl, %r8, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Folded Spill -; SSE-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r8 # 8-byte Reload -; SSE-NEXT:    shldq %cl, %r8, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Folded Spill -; SSE-NEXT:    shldq %cl, %rbx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Folded Spill -; SSE-NEXT:    shldq %cl, %r15, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Folded Spill -; SSE-NEXT:    shldq %cl, %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Folded Spill -; SSE-NEXT:    movq 1064(%rsp,%rsi), %rbx -; SSE-NEXT:    shldq %cl, %rbx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Folded Spill -; SSE-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r14 # 8-byte Reload -; SSE-NEXT:    shldq %cl, %rbp, %r14 -; SSE-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r15 # 8-byte Reload -; SSE-NEXT:    shldq %cl, %r15, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Folded Spill -; SSE-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rdx # 8-byte Reload -; SSE-NEXT:    shldq %cl, %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Folded Spill -; SSE-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rdx # 8-byte Reload -; SSE-NEXT:    shldq %cl, %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Folded Spill -; SSE-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rdx # 8-byte Reload -; SSE-NEXT:    shldq %cl, %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Folded Spill -; SSE-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rdx # 8-byte Reload -; SSE-NEXT:    shldq %cl, %rdx, %r11 -; SSE-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rdx # 8-byte Reload -; SSE-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r15 # 8-byte Reload -; SSE-NEXT:    shldq %cl, %r15, %rdx -; SSE-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r15 # 8-byte Reload -; SSE-NEXT:    shldq %cl, %r15, %r9 -; SSE-NEXT:    movq %r9, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; SSE-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r8 # 8-byte Reload -; SSE-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r15 # 8-byte Reload -; SSE-NEXT:    shldq %cl, %r15, %r8 -; SSE-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rbp # 8-byte Reload -; SSE-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r15 # 8-byte Reload -; SSE-NEXT:    shldq %cl, %r15, %rbp -; SSE-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r15 # 8-byte Reload -; SSE-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r9 # 8-byte Reload -; SSE-NEXT:    shldq %cl, %r15, %r9 -; SSE-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r15 # 8-byte Reload -; SSE-NEXT:    shldq %cl, %r15, %rax -; SSE-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r15 # 8-byte Reload -; SSE-NEXT:    shldq %cl, %r15, %r13 -; SSE-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r15 # 8-byte Reload -; SSE-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r12 # 8-byte Reload -; SSE-NEXT:    shldq %cl, %r12, %r15 -; SSE-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r12 # 8-byte Reload -; SSE-NEXT:    shldq %cl, %r12, %r10 -; SSE-NEXT:    andq 384(%rdi), %r10 -; SSE-NEXT:    andq 128(%rdi), %r15 -; SSE-NEXT:    andq 320(%rdi), %r13 -; SSE-NEXT:    andq 64(%rdi), %rax -; SSE-NEXT:    orq %r10, %r15 -; SSE-NEXT:    movq %r15, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; SSE-NEXT:    orq %r13, %rax -; SSE-NEXT:    andq 448(%rdi), %r9 -; SSE-NEXT:    andq 192(%rdi), %rbp -; SSE-NEXT:    orq %r9, %rbp -; SSE-NEXT:    orq %rax, %rbp -; SSE-NEXT:    movq %rbp, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; SSE-NEXT:    andq 288(%rdi), %r8 -; SSE-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r9 # 8-byte Reload -; SSE-NEXT:    andq 32(%rdi), %r9 -; SSE-NEXT:    andq 416(%rdi), %rdx -; SSE-NEXT:    andq 160(%rdi), %r11 -; SSE-NEXT:    orq %r8, %r9 -; SSE-NEXT:    orq %rdx, %r11 -; SSE-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rdx # 8-byte Reload -; SSE-NEXT:    andq 352(%rdi), %rdx -; SSE-NEXT:    orq %r9, %r11 -; SSE-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload -; SSE-NEXT:    andq 96(%rdi), %rax -; SSE-NEXT:    orq %rdx, %rax -; SSE-NEXT:    movq %rax, %rdx -; SSE-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload -; SSE-NEXT:    andq 480(%rdi), %rax -; SSE-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r8 # 8-byte Reload -; SSE-NEXT:    andq 224(%rdi), %r8 -; SSE-NEXT:    orq %rax, %r8 -; SSE-NEXT:    orq %rdx, %r8 -; SSE-NEXT:    andq 272(%rdi), %r14 -; SSE-NEXT:    orq %r11, %r8 -; SSE-NEXT:    movq %r8, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; SSE-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload -; SSE-NEXT:    andq 16(%rdi), %rax -; SSE-NEXT:    orq %r14, %rax -; SSE-NEXT:    movq %rax, %r8 -; SSE-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rdx # 8-byte Reload -; SSE-NEXT:    andq 400(%rdi), %rdx -; SSE-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload -; SSE-NEXT:    andq 144(%rdi), %rax -; SSE-NEXT:    orq %rdx, %rax -; SSE-NEXT:    orq %r8, %rax -; SSE-NEXT:    movq %rax, %r8 -; SSE-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r9 # 8-byte Reload -; SSE-NEXT:    andq 336(%rdi), %r9 -; SSE-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload -; SSE-NEXT:    andq 80(%rdi), %rax -; SSE-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rdx # 8-byte Reload -; SSE-NEXT:    andq 464(%rdi), %rdx -; SSE-NEXT:    orq %r9, %rax -; SSE-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r11 # 8-byte Reload -; SSE-NEXT:    andq 208(%rdi), %r11 -; SSE-NEXT:    orq %rdx, %r11 -; SSE-NEXT:    orq %rax, %r11 -; SSE-NEXT:    orq %r8, %r11 -; SSE-NEXT:    movq (%rsp), %rdx # 8-byte Reload -; SSE-NEXT:    andq 304(%rdi), %rdx -; SSE-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload -; SSE-NEXT:    andq 48(%rdi), %rax -; SSE-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r9 # 8-byte Reload -; SSE-NEXT:    andq 432(%rdi), %r9 -; SSE-NEXT:    orq %rdx, %rax -; SSE-NEXT:    movq %rax, %r10 -; SSE-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r8 # 8-byte Reload -; SSE-NEXT:    andq 176(%rdi), %r8 -; SSE-NEXT:    orq %r9, %r8 -; SSE-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r9 # 8-byte Reload -; SSE-NEXT:    andq 368(%rdi), %r9 -; SSE-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload -; SSE-NEXT:    andq 112(%rdi), %rax -; SSE-NEXT:    orq %r10, %r8 -; SSE-NEXT:    movq %r8, %r10 -; SSE-NEXT:    orq %r9, %rax -; SSE-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r8 # 8-byte Reload -; SSE-NEXT:    andq 496(%rdi), %r8 -; SSE-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rbp # 8-byte Reload -; SSE-NEXT:    andq 240(%rdi), %rbp -; SSE-NEXT:    orq %r8, %rbp -; SSE-NEXT:    orq %rax, %rbp -; SSE-NEXT:    orq %r10, %rbp -; SSE-NEXT:    orq %r11, %rbp -; SSE-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload -; SSE-NEXT:    andq 392(%rdi), %rax -; SSE-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r12 # 8-byte Reload -; SSE-NEXT:    andq 136(%rdi), %r12 -; SSE-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rdx # 8-byte Reload -; SSE-NEXT:    andq 328(%rdi), %rdx -; SSE-NEXT:    orq %rax, %r12 -; SSE-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload -; SSE-NEXT:    andq 72(%rdi), %rax -; SSE-NEXT:    orq %rdx, %rax -; SSE-NEXT:    movq %rax, %rdx -; SSE-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload -; SSE-NEXT:    andq 456(%rdi), %rax -; SSE-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r13 # 8-byte Reload -; SSE-NEXT:    andq 200(%rdi), %r13 -; SSE-NEXT:    orq %rax, %r13 -; SSE-NEXT:    orq %rdx, %r13 -; SSE-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rdx # 8-byte Reload -; SSE-NEXT:    andq 296(%rdi), %rdx -; SSE-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload -; SSE-NEXT:    andq 40(%rdi), %rax -; SSE-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r8 # 8-byte Reload -; SSE-NEXT:    andq 424(%rdi), %r8 -; SSE-NEXT:    orq %rdx, %rax -; SSE-NEXT:    movq %rax, %r9 -; SSE-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rdx # 8-byte Reload -; SSE-NEXT:    andq 168(%rdi), %rdx -; SSE-NEXT:    orq %r8, %rdx -; SSE-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r8 # 8-byte Reload -; SSE-NEXT:    andq 360(%rdi), %r8 -; SSE-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload -; SSE-NEXT:    andq 104(%rdi), %rax -; SSE-NEXT:    orq %r9, %rdx -; SSE-NEXT:    orq %r8, %rax -; SSE-NEXT:    movq %rax, %r8 -; SSE-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload -; SSE-NEXT:    andq 488(%rdi), %rax -; SSE-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r15 # 8-byte Reload -; SSE-NEXT:    andq 232(%rdi), %r15 -; SSE-NEXT:    orq %rax, %r15 -; SSE-NEXT:    orq %r8, %r15 -; SSE-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r8 # 8-byte Reload -; SSE-NEXT:    andq 280(%rdi), %r8 -; SSE-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload -; SSE-NEXT:    andq 24(%rdi), %rax -; SSE-NEXT:    orq %rdx, %r15 -; SSE-NEXT:    orq %r8, %rax -; SSE-NEXT:    movq %rax, %r10 -; SSE-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r8 # 8-byte Reload -; SSE-NEXT:    andq 408(%rdi), %r8 -; SSE-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload -; SSE-NEXT:    andq 152(%rdi), %rax -; SSE-NEXT:    orq %r8, %rax -; SSE-NEXT:    orq %r10, %rax -; SSE-NEXT:    movq %rax, %r10 -; SSE-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r11 # 8-byte Reload -; SSE-NEXT:    andq 344(%rdi), %r11 -; SSE-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r8 # 8-byte Reload -; SSE-NEXT:    andq 88(%rdi), %r8 -; SSE-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload -; SSE-NEXT:    andq 472(%rdi), %rax -; SSE-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r14 # 8-byte Reload -; SSE-NEXT:    andq 216(%rdi), %r14 -; SSE-NEXT:    orq %r11, %r8 -; SSE-NEXT:    orq %rax, %r14 -; SSE-NEXT:    orq %r8, %r14 -; SSE-NEXT:    orq %r10, %r14 -; SSE-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r11 # 8-byte Reload -; SSE-NEXT:    andq 312(%rdi), %r11 -; SSE-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r10 # 8-byte Reload -; SSE-NEXT:    andq 56(%rdi), %r10 -; SSE-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r8 # 8-byte Reload -; SSE-NEXT:    andq 440(%rdi), %r8 -; SSE-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r9 # 8-byte Reload -; SSE-NEXT:    andq 184(%rdi), %r9 -; SSE-NEXT:    orq %r11, %r10 -; SSE-NEXT:    orq %r8, %r9 -; SSE-NEXT:    orq %r10, %r9 -; SSE-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload -; SSE-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rdx # 8-byte Reload -; SSE-NEXT:    shldq %cl, %rax, %rdx -; SSE-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r10 # 8-byte Reload -; SSE-NEXT:    andq 376(%rdi), %r10 -; SSE-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload -; SSE-NEXT:    andq 120(%rdi), %rax -; SSE-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r11 # 8-byte Reload -; SSE-NEXT:    andq 504(%rdi), %r11 -; SSE-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r8 # 8-byte Reload -; SSE-NEXT:    andq 248(%rdi), %r8 -; SSE-NEXT:    orq %r10, %rax -; SSE-NEXT:    movq %rax, %r10 -; SSE-NEXT:    orq %r11, %r8 -; SSE-NEXT:    movq 1056(%rsp,%rsi), %rax -; SSE-NEXT:    shldq %cl, %rax, %rbx -; SSE-NEXT:    # kill: def $cl killed $cl killed $ecx -; SSE-NEXT:    shlq %cl, %rax -; SSE-NEXT:    orq %r10, %r8 -; SSE-NEXT:    orq %r9, %r8 -; SSE-NEXT:    andq 256(%rdi), %rdx -; SSE-NEXT:    orq %r14, %r8 -; SSE-NEXT:    andq (%rdi), %rax -; SSE-NEXT:    orq %rdx, %rax -; SSE-NEXT:    orq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Folded Reload -; SSE-NEXT:    orq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Folded Reload -; SSE-NEXT:    orq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Folded Reload -; SSE-NEXT:    orq %rbp, %rax -; SSE-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload -; SSE-NEXT:    andq 264(%rdi), %rcx -; SSE-NEXT:    andq 8(%rdi), %rbx -; SSE-NEXT:    orq %rcx, %rbx -; SSE-NEXT:    orq %r12, %rbx -; SSE-NEXT:    orq %r13, %rbx -; SSE-NEXT:    orq %r15, %rbx -; SSE-NEXT:    orq %r8, %rbx -; SSE-NEXT:    orq %rax, %rbx -; SSE-NEXT:    setne %al -; SSE-NEXT:    addq $1576, %rsp # imm = 0x628 -; SSE-NEXT:    popq %rbx -; SSE-NEXT:    popq %r12 -; SSE-NEXT:    popq %r13 -; SSE-NEXT:    popq %r14 -; SSE-NEXT:    popq %r15 -; SSE-NEXT:    popq %rbp -; SSE-NEXT:    retq -; -; AVX2-LABEL: test_ne_i4096: -; AVX2:       # %bb.0: -; AVX2-NEXT:    pushq %rbp -; AVX2-NEXT:    pushq %r15 -; AVX2-NEXT:    pushq %r14 -; AVX2-NEXT:    pushq %r13 -; AVX2-NEXT:    pushq %r12 -; AVX2-NEXT:    pushq %rbx -; AVX2-NEXT:    subq $1560, %rsp # imm = 0x618 -; AVX2-NEXT:    movl %esi, %ecx -; AVX2-NEXT:    movl %esi, %eax -; AVX2-NEXT:    vxorps %xmm0, %xmm0, %xmm0 -; AVX2-NEXT:    vmovups %ymm0, {{[0-9]+}}(%rsp) -; AVX2-NEXT:    vmovups %ymm0, {{[0-9]+}}(%rsp) -; AVX2-NEXT:    vmovups %ymm0, {{[0-9]+}}(%rsp) -; AVX2-NEXT:    vmovups %ymm0, {{[0-9]+}}(%rsp) -; AVX2-NEXT:    vmovups %ymm0, {{[0-9]+}}(%rsp) -; AVX2-NEXT:    vmovups %ymm0, {{[0-9]+}}(%rsp) -; AVX2-NEXT:    vmovups %ymm0, {{[0-9]+}}(%rsp) -; AVX2-NEXT:    vmovups %ymm0, {{[0-9]+}}(%rsp) -; AVX2-NEXT:    vmovups %ymm0, {{[0-9]+}}(%rsp) -; AVX2-NEXT:    vmovups %ymm0, {{[0-9]+}}(%rsp) -; AVX2-NEXT:    vmovups %ymm0, {{[0-9]+}}(%rsp) -; AVX2-NEXT:    vmovups %ymm0, {{[0-9]+}}(%rsp) -; AVX2-NEXT:    vmovups %ymm0, {{[0-9]+}}(%rsp) -; AVX2-NEXT:    vmovups %ymm0, {{[0-9]+}}(%rsp) -; AVX2-NEXT:    vmovups %ymm0, {{[0-9]+}}(%rsp) -; AVX2-NEXT:    vmovups %ymm0, {{[0-9]+}}(%rsp) -; AVX2-NEXT:    vmovups %ymm0, {{[0-9]+}}(%rsp) -; AVX2-NEXT:    vmovups %ymm0, {{[0-9]+}}(%rsp) -; AVX2-NEXT:    vmovups %ymm0, {{[0-9]+}}(%rsp) -; AVX2-NEXT:    vmovups %ymm0, {{[0-9]+}}(%rsp) -; AVX2-NEXT:    vmovups %ymm0, {{[0-9]+}}(%rsp) -; AVX2-NEXT:    vmovups %ymm0, {{[0-9]+}}(%rsp) -; AVX2-NEXT:    vmovups %ymm0, {{[0-9]+}}(%rsp) -; AVX2-NEXT:    vmovups %ymm0, {{[0-9]+}}(%rsp) -; AVX2-NEXT:    vmovups %ymm0, {{[0-9]+}}(%rsp) -; AVX2-NEXT:    vmovups %ymm0, {{[0-9]+}}(%rsp) -; AVX2-NEXT:    vmovups %ymm0, {{[0-9]+}}(%rsp) -; AVX2-NEXT:    vmovups %ymm0, {{[0-9]+}}(%rsp) -; AVX2-NEXT:    vmovups %ymm0, {{[0-9]+}}(%rsp) -; AVX2-NEXT:    vmovups %ymm0, {{[0-9]+}}(%rsp) -; AVX2-NEXT:    andl $4032, %eax # imm = 0xFC0 -; AVX2-NEXT:    vmovups %ymm0, {{[0-9]+}}(%rsp) -; AVX2-NEXT:    vmovss {{.*#+}} xmm0 = [1,0,0,0] -; AVX2-NEXT:    vmovups %ymm0, {{[0-9]+}}(%rsp) -; AVX2-NEXT:    andl $63, %ecx -; AVX2-NEXT:    shrl $3, %eax -; AVX2-NEXT:    negl %eax -; AVX2-NEXT:    movslq %eax, %rsi -; AVX2-NEXT:    movq 1280(%rsp,%rsi), %rdx -; AVX2-NEXT:    movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; AVX2-NEXT:    movq 1288(%rsp,%rsi), %rax -; AVX2-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; AVX2-NEXT:    shldq %cl, %rdx, %rax -; AVX2-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; AVX2-NEXT:    movq 1536(%rsp,%rsi), %rdx -; AVX2-NEXT:    movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; AVX2-NEXT:    movq 1544(%rsp,%rsi), %rax -; AVX2-NEXT:    shldq %cl, %rdx, %rax -; AVX2-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; AVX2-NEXT:    movq 1152(%rsp,%rsi), %rdx -; AVX2-NEXT:    movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; AVX2-NEXT:    movq 1160(%rsp,%rsi), %rax -; AVX2-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; AVX2-NEXT:    shldq %cl, %rdx, %rax -; AVX2-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; AVX2-NEXT:    movq 1408(%rsp,%rsi), %rdx -; AVX2-NEXT:    movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; AVX2-NEXT:    movq 1416(%rsp,%rsi), %rax -; AVX2-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; AVX2-NEXT:    shldq %cl, %rdx, %rax -; AVX2-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; AVX2-NEXT:    movq 1216(%rsp,%rsi), %rdx -; AVX2-NEXT:    movq %rdx, (%rsp) # 8-byte Spill -; AVX2-NEXT:    movq 1224(%rsp,%rsi), %rax -; AVX2-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; AVX2-NEXT:    shldq %cl, %rdx, %rax -; AVX2-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; AVX2-NEXT:    movq 1472(%rsp,%rsi), %rdx -; AVX2-NEXT:    movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; AVX2-NEXT:    movq 1480(%rsp,%rsi), %rax -; AVX2-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; AVX2-NEXT:    shldq %cl, %rdx, %rax -; AVX2-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; AVX2-NEXT:    movq 1088(%rsp,%rsi), %rdx -; AVX2-NEXT:    movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; AVX2-NEXT:    movq 1096(%rsp,%rsi), %rax -; AVX2-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; AVX2-NEXT:    shldq %cl, %rdx, %rax -; AVX2-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; AVX2-NEXT:    movq 1344(%rsp,%rsi), %rdx -; AVX2-NEXT:    movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; AVX2-NEXT:    movq 1352(%rsp,%rsi), %rax -; AVX2-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; AVX2-NEXT:    shldq %cl, %rdx, %rax -; AVX2-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; AVX2-NEXT:    movq 1248(%rsp,%rsi), %rdx -; AVX2-NEXT:    movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; AVX2-NEXT:    movq 1256(%rsp,%rsi), %rax -; AVX2-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; AVX2-NEXT:    shldq %cl, %rdx, %rax -; AVX2-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; AVX2-NEXT:    movq 1504(%rsp,%rsi), %rdx -; AVX2-NEXT:    movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; AVX2-NEXT:    movq 1512(%rsp,%rsi), %rax -; AVX2-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; AVX2-NEXT:    shldq %cl, %rdx, %rax -; AVX2-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; AVX2-NEXT:    movq 1120(%rsp,%rsi), %rdx -; AVX2-NEXT:    movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; AVX2-NEXT:    movq 1128(%rsp,%rsi), %rax -; AVX2-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; AVX2-NEXT:    shldq %cl, %rdx, %rax -; AVX2-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; AVX2-NEXT:    movq 1376(%rsp,%rsi), %rdx -; AVX2-NEXT:    movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; AVX2-NEXT:    movq 1384(%rsp,%rsi), %rax -; AVX2-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; AVX2-NEXT:    shldq %cl, %rdx, %rax -; AVX2-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; AVX2-NEXT:    movq 1184(%rsp,%rsi), %rdx -; AVX2-NEXT:    movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; AVX2-NEXT:    movq 1192(%rsp,%rsi), %rax -; AVX2-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; AVX2-NEXT:    shldq %cl, %rdx, %rax -; AVX2-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; AVX2-NEXT:    movq 1440(%rsp,%rsi), %rdx -; AVX2-NEXT:    movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; AVX2-NEXT:    movq 1448(%rsp,%rsi), %rax -; AVX2-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; AVX2-NEXT:    shldq %cl, %rdx, %rax -; AVX2-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; AVX2-NEXT:    movq 1056(%rsp,%rsi), %rdx -; AVX2-NEXT:    movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; AVX2-NEXT:    movq 1064(%rsp,%rsi), %rax -; AVX2-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; AVX2-NEXT:    shldq %cl, %rdx, %rax -; AVX2-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; AVX2-NEXT:    movq 1312(%rsp,%rsi), %rdx -; AVX2-NEXT:    movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; AVX2-NEXT:    movq 1320(%rsp,%rsi), %rax -; AVX2-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; AVX2-NEXT:    shldq %cl, %rdx, %rax -; AVX2-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; AVX2-NEXT:    movq 1264(%rsp,%rsi), %rdx -; AVX2-NEXT:    movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; AVX2-NEXT:    movq 1272(%rsp,%rsi), %rax -; AVX2-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; AVX2-NEXT:    shldq %cl, %rdx, %rax -; AVX2-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; AVX2-NEXT:    movq 1520(%rsp,%rsi), %rdx -; AVX2-NEXT:    movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; AVX2-NEXT:    movq 1528(%rsp,%rsi), %rax -; AVX2-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; AVX2-NEXT:    shldq %cl, %rdx, %rax -; AVX2-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; AVX2-NEXT:    movq 1136(%rsp,%rsi), %rdx -; AVX2-NEXT:    movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; AVX2-NEXT:    movq 1144(%rsp,%rsi), %rax -; AVX2-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; AVX2-NEXT:    shldq %cl, %rdx, %rax -; AVX2-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; AVX2-NEXT:    movq 1392(%rsp,%rsi), %rdx -; AVX2-NEXT:    movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; AVX2-NEXT:    movq 1400(%rsp,%rsi), %rax -; AVX2-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; AVX2-NEXT:    shldq %cl, %rdx, %rax -; AVX2-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; AVX2-NEXT:    movq 1200(%rsp,%rsi), %r11 -; AVX2-NEXT:    movq 1208(%rsp,%rsi), %rax -; AVX2-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; AVX2-NEXT:    shldq %cl, %r11, %rax -; AVX2-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; AVX2-NEXT:    movq 1456(%rsp,%rsi), %rdx -; AVX2-NEXT:    movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; AVX2-NEXT:    movq 1464(%rsp,%rsi), %rax -; AVX2-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; AVX2-NEXT:    shldq %cl, %rdx, %rax -; AVX2-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; AVX2-NEXT:    movq 1072(%rsp,%rsi), %r12 -; AVX2-NEXT:    movq 1080(%rsp,%rsi), %rax -; AVX2-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; AVX2-NEXT:    shldq %cl, %r12, %rax -; AVX2-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; AVX2-NEXT:    movq 1328(%rsp,%rsi), %rdx -; AVX2-NEXT:    movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; AVX2-NEXT:    movq 1336(%rsp,%rsi), %rax -; AVX2-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; AVX2-NEXT:    shldq %cl, %rdx, %rax -; AVX2-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; AVX2-NEXT:    movq 1232(%rsp,%rsi), %rax -; AVX2-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; AVX2-NEXT:    movq 1240(%rsp,%rsi), %rdx -; AVX2-NEXT:    movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; AVX2-NEXT:    shldq %cl, %rax, %rdx -; AVX2-NEXT:    movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; AVX2-NEXT:    movq 1488(%rsp,%rsi), %rbp -; AVX2-NEXT:    movq 1496(%rsp,%rsi), %rax -; AVX2-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; AVX2-NEXT:    shldq %cl, %rbp, %rax -; AVX2-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; AVX2-NEXT:    movq 1104(%rsp,%rsi), %rax -; AVX2-NEXT:    movq 1112(%rsp,%rsi), %rdx -; AVX2-NEXT:    movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; AVX2-NEXT:    shldq %cl, %rax, %rdx -; AVX2-NEXT:    movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; AVX2-NEXT:    movq 1360(%rsp,%rsi), %r10 -; AVX2-NEXT:    movq 1368(%rsp,%rsi), %r8 -; AVX2-NEXT:    movq %r8, %rdx -; AVX2-NEXT:    shldq %cl, %r10, %rdx -; AVX2-NEXT:    movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; AVX2-NEXT:    movq 1168(%rsp,%rsi), %r9 -; AVX2-NEXT:    movq %r9, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; AVX2-NEXT:    movq 1176(%rsp,%rsi), %rbx -; AVX2-NEXT:    movq %rbx, %rdx -; AVX2-NEXT:    shldq %cl, %r9, %rdx -; AVX2-NEXT:    movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; AVX2-NEXT:    movq 1424(%rsp,%rsi), %r9 -; AVX2-NEXT:    movq 1432(%rsp,%rsi), %rdx -; AVX2-NEXT:    movq %rdx, %r14 -; AVX2-NEXT:    shldq %cl, %r9, %r14 -; AVX2-NEXT:    movq %r14, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; AVX2-NEXT:    movq 1296(%rsp,%rsi), %r15 -; AVX2-NEXT:    movq %r15, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; AVX2-NEXT:    movq 1304(%rsp,%rsi), %r14 -; AVX2-NEXT:    movq %r14, %r13 -; AVX2-NEXT:    shldq %cl, %r15, %r13 -; AVX2-NEXT:    movq %r13, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; AVX2-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r15 # 8-byte Reload -; AVX2-NEXT:    shldq %cl, %r15, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Folded Spill -; AVX2-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r15 # 8-byte Reload -; AVX2-NEXT:    shldq %cl, %r15, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Folded Spill -; AVX2-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r15 # 8-byte Reload -; AVX2-NEXT:    shldq %cl, %r15, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Folded Spill -; AVX2-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r15 # 8-byte Reload -; AVX2-NEXT:    shldq %cl, %r15, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Folded Spill -; AVX2-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r15 # 8-byte Reload -; AVX2-NEXT:    shldq %cl, %r15, (%rsp) # 8-byte Folded Spill -; AVX2-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r15 # 8-byte Reload -; AVX2-NEXT:    shldq %cl, %r15, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Folded Spill -; AVX2-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r15 # 8-byte Reload -; AVX2-NEXT:    shldq %cl, %r15, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Folded Spill -; AVX2-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r15 # 8-byte Reload -; AVX2-NEXT:    shldq %cl, %r15, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Folded Spill -; AVX2-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r15 # 8-byte Reload -; AVX2-NEXT:    shldq %cl, %r15, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Folded Spill -; AVX2-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r15 # 8-byte Reload -; AVX2-NEXT:    shldq %cl, %r15, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Folded Spill -; AVX2-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r15 # 8-byte Reload -; AVX2-NEXT:    shldq %cl, %r15, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Folded Spill -; AVX2-NEXT:    shldq %cl, %r8, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Folded Spill -; AVX2-NEXT:    shldq %cl, %rbx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Folded Spill -; AVX2-NEXT:    shldq %cl, %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Folded Spill -; AVX2-NEXT:    movq 1048(%rsp,%rsi), %rdx -; AVX2-NEXT:    shldq %cl, %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Folded Spill -; AVX2-NEXT:    shldq %cl, %r14, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Folded Spill -; AVX2-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rbx # 8-byte Reload -; AVX2-NEXT:    shldq %cl, %rbx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Folded Spill -; AVX2-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rbx # 8-byte Reload -; AVX2-NEXT:    shldq %cl, %rbx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Folded Spill -; AVX2-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rbx # 8-byte Reload -; AVX2-NEXT:    shldq %cl, %rbx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Folded Spill -; AVX2-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rbx # 8-byte Reload -; AVX2-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r14 # 8-byte Reload -; AVX2-NEXT:    shldq %cl, %r14, %rbx -; AVX2-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r14 # 8-byte Reload -; AVX2-NEXT:    shldq %cl, %r14, %r11 -; AVX2-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r14 # 8-byte Reload -; AVX2-NEXT:    shldq %cl, %r14, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Folded Spill -; AVX2-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r14 # 8-byte Reload -; AVX2-NEXT:    shldq %cl, %r14, %r12 -; AVX2-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r8 # 8-byte Reload -; AVX2-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r14 # 8-byte Reload -; AVX2-NEXT:    shldq %cl, %r14, %r8 -; AVX2-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r13 # 8-byte Reload -; AVX2-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r14 # 8-byte Reload -; AVX2-NEXT:    shldq %cl, %r14, %r13 -; AVX2-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r14 # 8-byte Reload -; AVX2-NEXT:    shldq %cl, %r14, %rbp -; AVX2-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r14 # 8-byte Reload -; AVX2-NEXT:    shldq %cl, %r14, %rax -; AVX2-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r14 # 8-byte Reload -; AVX2-NEXT:    shldq %cl, %r14, %r10 -; AVX2-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r14 # 8-byte Reload -; AVX2-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r15 # 8-byte Reload -; AVX2-NEXT:    shldq %cl, %r15, %r14 -; AVX2-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r15 # 8-byte Reload -; AVX2-NEXT:    shldq %cl, %r15, %r9 -; AVX2-NEXT:    andq 384(%rdi), %r9 -; AVX2-NEXT:    andq 128(%rdi), %r14 -; AVX2-NEXT:    andq 320(%rdi), %r10 -; AVX2-NEXT:    orq %r9, %r14 -; AVX2-NEXT:    movq %r14, %r15 -; AVX2-NEXT:    andq 64(%rdi), %rax -; AVX2-NEXT:    orq %r10, %rax -; AVX2-NEXT:    andq 448(%rdi), %rbp -; AVX2-NEXT:    andq 192(%rdi), %r13 -; AVX2-NEXT:    orq %rbp, %r13 -; AVX2-NEXT:    orq %rax, %r13 -; AVX2-NEXT:    movq %r13, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; AVX2-NEXT:    andq 288(%rdi), %r8 -; AVX2-NEXT:    andq 32(%rdi), %r12 -; AVX2-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload -; AVX2-NEXT:    andq 416(%rdi), %rax -; AVX2-NEXT:    orq %r8, %r12 -; AVX2-NEXT:    andq 160(%rdi), %r11 -; AVX2-NEXT:    orq %rax, %r11 -; AVX2-NEXT:    andq 352(%rdi), %rbx -; AVX2-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload -; AVX2-NEXT:    andq 96(%rdi), %rax -; AVX2-NEXT:    orq %r12, %r11 -; AVX2-NEXT:    orq %rbx, %rax -; AVX2-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r10 # 8-byte Reload -; AVX2-NEXT:    andq 480(%rdi), %r10 -; AVX2-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r13 # 8-byte Reload -; AVX2-NEXT:    andq 224(%rdi), %r13 -; AVX2-NEXT:    orq %r10, %r13 -; AVX2-NEXT:    orq %rax, %r13 -; AVX2-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r8 # 8-byte Reload -; AVX2-NEXT:    andq 272(%rdi), %r8 -; AVX2-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload -; AVX2-NEXT:    andq 16(%rdi), %rax -; AVX2-NEXT:    orq %r11, %r13 -; AVX2-NEXT:    orq %r8, %rax -; AVX2-NEXT:    movq %rax, %r8 -; AVX2-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r9 # 8-byte Reload -; AVX2-NEXT:    andq 400(%rdi), %r9 -; AVX2-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload -; AVX2-NEXT:    andq 144(%rdi), %rax -; AVX2-NEXT:    orq %r9, %rax -; AVX2-NEXT:    orq %r8, %rax -; AVX2-NEXT:    movq %rax, %r9 -; AVX2-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r10 # 8-byte Reload -; AVX2-NEXT:    andq 336(%rdi), %r10 -; AVX2-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload -; AVX2-NEXT:    andq 80(%rdi), %rax -; AVX2-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r8 # 8-byte Reload -; AVX2-NEXT:    andq 464(%rdi), %r8 -; AVX2-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r11 # 8-byte Reload -; AVX2-NEXT:    andq 208(%rdi), %r11 -; AVX2-NEXT:    orq %r10, %rax -; AVX2-NEXT:    orq %r8, %r11 -; AVX2-NEXT:    orq %rax, %r11 -; AVX2-NEXT:    orq %r9, %r11 -; AVX2-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r9 # 8-byte Reload -; AVX2-NEXT:    andq 304(%rdi), %r9 -; AVX2-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r8 # 8-byte Reload -; AVX2-NEXT:    andq 48(%rdi), %r8 -; AVX2-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r10 # 8-byte Reload -; AVX2-NEXT:    andq 432(%rdi), %r10 -; AVX2-NEXT:    movq (%rsp), %rax # 8-byte Reload -; AVX2-NEXT:    andq 176(%rdi), %rax -; AVX2-NEXT:    orq %r9, %r8 -; AVX2-NEXT:    movq %r8, %r9 -; AVX2-NEXT:    orq %r10, %rax -; AVX2-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r8 # 8-byte Reload -; AVX2-NEXT:    andq 368(%rdi), %r8 -; AVX2-NEXT:    orq %r9, %rax -; AVX2-NEXT:    movq %rax, %r10 -; AVX2-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload -; AVX2-NEXT:    andq 112(%rdi), %rax -; AVX2-NEXT:    orq %r8, %rax -; AVX2-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r8 # 8-byte Reload -; AVX2-NEXT:    andq 496(%rdi), %r8 -; AVX2-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r9 # 8-byte Reload -; AVX2-NEXT:    andq 240(%rdi), %r9 -; AVX2-NEXT:    orq %r8, %r9 -; AVX2-NEXT:    orq %rax, %r9 -; AVX2-NEXT:    orq %r10, %r9 -; AVX2-NEXT:    orq %r11, %r9 -; AVX2-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r10 # 8-byte Reload -; AVX2-NEXT:    andq 392(%rdi), %r10 -; AVX2-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rbp # 8-byte Reload -; AVX2-NEXT:    andq 136(%rdi), %rbp -; AVX2-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r8 # 8-byte Reload -; AVX2-NEXT:    andq 328(%rdi), %r8 -; AVX2-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload -; AVX2-NEXT:    andq 72(%rdi), %rax -; AVX2-NEXT:    orq %r10, %rbp -; AVX2-NEXT:    orq %r8, %rax -; AVX2-NEXT:    movq %rax, %r8 -; AVX2-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload -; AVX2-NEXT:    andq 456(%rdi), %rax -; AVX2-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r12 # 8-byte Reload -; AVX2-NEXT:    andq 200(%rdi), %r12 -; AVX2-NEXT:    orq %rax, %r12 -; AVX2-NEXT:    orq %r8, %r12 -; AVX2-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r10 # 8-byte Reload -; AVX2-NEXT:    andq 296(%rdi), %r10 -; AVX2-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r8 # 8-byte Reload -; AVX2-NEXT:    andq 40(%rdi), %r8 -; AVX2-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r11 # 8-byte Reload -; AVX2-NEXT:    andq 424(%rdi), %r11 -; AVX2-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload -; AVX2-NEXT:    andq 168(%rdi), %rax -; AVX2-NEXT:    orq %r10, %r8 -; AVX2-NEXT:    movq %r8, %r10 -; AVX2-NEXT:    orq %r11, %rax -; AVX2-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r8 # 8-byte Reload -; AVX2-NEXT:    andq 360(%rdi), %r8 -; AVX2-NEXT:    orq %r10, %rax -; AVX2-NEXT:    movq %rax, %r10 -; AVX2-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload -; AVX2-NEXT:    andq 104(%rdi), %rax -; AVX2-NEXT:    orq %r8, %rax -; AVX2-NEXT:    movq %rax, %r8 -; AVX2-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload -; AVX2-NEXT:    andq 488(%rdi), %rax -; AVX2-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r14 # 8-byte Reload -; AVX2-NEXT:    andq 232(%rdi), %r14 -; AVX2-NEXT:    orq %rax, %r14 -; AVX2-NEXT:    orq %r8, %r14 -; AVX2-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r8 # 8-byte Reload -; AVX2-NEXT:    andq 280(%rdi), %r8 -; AVX2-NEXT:    orq %r10, %r14 -; AVX2-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload -; AVX2-NEXT:    andq 24(%rdi), %rax -; AVX2-NEXT:    orq %r8, %rax -; AVX2-NEXT:    movq %rax, %r10 -; AVX2-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r8 # 8-byte Reload -; AVX2-NEXT:    andq 408(%rdi), %r8 -; AVX2-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload -; AVX2-NEXT:    andq 152(%rdi), %rax -; AVX2-NEXT:    orq %r8, %rax -; AVX2-NEXT:    orq %r10, %rax -; AVX2-NEXT:    movq %rax, %r10 -; AVX2-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r11 # 8-byte Reload -; AVX2-NEXT:    andq 344(%rdi), %r11 -; AVX2-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r8 # 8-byte Reload -; AVX2-NEXT:    andq 88(%rdi), %r8 -; AVX2-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload -; AVX2-NEXT:    andq 472(%rdi), %rax -; AVX2-NEXT:    orq %r11, %r8 -; AVX2-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rbx # 8-byte Reload -; AVX2-NEXT:    andq 216(%rdi), %rbx -; AVX2-NEXT:    orq %rax, %rbx -; AVX2-NEXT:    orq %r8, %rbx -; AVX2-NEXT:    orq %r10, %rbx -; AVX2-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r8 # 8-byte Reload -; AVX2-NEXT:    andq 312(%rdi), %r8 -; AVX2-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload -; AVX2-NEXT:    andq 56(%rdi), %rax -; AVX2-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r10 # 8-byte Reload -; AVX2-NEXT:    andq 440(%rdi), %r10 -; AVX2-NEXT:    orq %r8, %rax -; AVX2-NEXT:    movq %rax, %r11 -; AVX2-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r8 # 8-byte Reload -; AVX2-NEXT:    andq 184(%rdi), %r8 -; AVX2-NEXT:    orq %r10, %r8 -; AVX2-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r10 # 8-byte Reload -; AVX2-NEXT:    andq 376(%rdi), %r10 -; AVX2-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload -; AVX2-NEXT:    andq 120(%rdi), %rax -; AVX2-NEXT:    orq %r11, %r8 -; AVX2-NEXT:    movq %r8, %r11 -; AVX2-NEXT:    orq %r10, %rax -; AVX2-NEXT:    movq %rax, %r10 -; AVX2-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r8 # 8-byte Reload -; AVX2-NEXT:    andq 504(%rdi), %r8 -; AVX2-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload -; AVX2-NEXT:    andq 248(%rdi), %rax -; AVX2-NEXT:    orq %r8, %rax -; AVX2-NEXT:    orq %r10, %rax -; AVX2-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r8 # 8-byte Reload -; AVX2-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r10 # 8-byte Reload -; AVX2-NEXT:    shldq %cl, %r8, %r10 -; AVX2-NEXT:    orq %r11, %rax -; AVX2-NEXT:    movq 1040(%rsp,%rsi), %rsi -; AVX2-NEXT:    orq %rbx, %rax -; AVX2-NEXT:    movq %rax, %r8 -; AVX2-NEXT:    shlxq %rcx, %rsi, %rax -; AVX2-NEXT:    andq 256(%rdi), %r10 -; AVX2-NEXT:    andq (%rdi), %rax -; AVX2-NEXT:    orq %r10, %rax -; AVX2-NEXT:    orq %r15, %rax -; AVX2-NEXT:    orq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Folded Reload -; AVX2-NEXT:    orq %r13, %rax -; AVX2-NEXT:    # kill: def $cl killed $cl killed $rcx -; AVX2-NEXT:    shldq %cl, %rsi, %rdx -; AVX2-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload -; AVX2-NEXT:    andq 264(%rdi), %rcx -; AVX2-NEXT:    andq 8(%rdi), %rdx -; AVX2-NEXT:    orq %r9, %rax -; AVX2-NEXT:    orq %rcx, %rdx -; AVX2-NEXT:    orq %rbp, %rdx -; AVX2-NEXT:    orq %r12, %rdx -; AVX2-NEXT:    orq %r14, %rdx -; AVX2-NEXT:    orq %r8, %rdx -; AVX2-NEXT:    orq %rax, %rdx -; AVX2-NEXT:    setne %al -; AVX2-NEXT:    addq $1560, %rsp # imm = 0x618 -; AVX2-NEXT:    popq %rbx -; AVX2-NEXT:    popq %r12 -; AVX2-NEXT:    popq %r13 -; AVX2-NEXT:    popq %r14 -; AVX2-NEXT:    popq %r15 -; AVX2-NEXT:    popq %rbp -; AVX2-NEXT:    vzeroupper -; AVX2-NEXT:    retq -; -; AVX512-LABEL: test_ne_i4096: -; AVX512:       # %bb.0: -; AVX512-NEXT:    pushq %rbp -; AVX512-NEXT:    pushq %r15 -; AVX512-NEXT:    pushq %r14 -; AVX512-NEXT:    pushq %r13 -; AVX512-NEXT:    pushq %r12 -; AVX512-NEXT:    pushq %rbx -; AVX512-NEXT:    subq $1560, %rsp # imm = 0x618 -; AVX512-NEXT:    movl %esi, %ecx -; AVX512-NEXT:    movl %esi, %eax -; AVX512-NEXT:    andl $4032, %eax # imm = 0xFC0 -; AVX512-NEXT:    vxorps %xmm0, %xmm0, %xmm0 -; AVX512-NEXT:    vmovups %ymm0, {{[0-9]+}}(%rsp) -; AVX512-NEXT:    vmovups %ymm0, {{[0-9]+}}(%rsp) -; AVX512-NEXT:    vmovups %ymm0, {{[0-9]+}}(%rsp) -; AVX512-NEXT:    vmovups %ymm0, {{[0-9]+}}(%rsp) -; AVX512-NEXT:    vmovups %ymm0, {{[0-9]+}}(%rsp) -; AVX512-NEXT:    vmovups %ymm0, {{[0-9]+}}(%rsp) -; AVX512-NEXT:    vmovups %ymm0, {{[0-9]+}}(%rsp) -; AVX512-NEXT:    vmovups %ymm0, {{[0-9]+}}(%rsp) -; AVX512-NEXT:    vmovups %ymm0, {{[0-9]+}}(%rsp) -; AVX512-NEXT:    vmovups %ymm0, {{[0-9]+}}(%rsp) -; AVX512-NEXT:    vmovups %ymm0, {{[0-9]+}}(%rsp) -; AVX512-NEXT:    vmovups %ymm0, {{[0-9]+}}(%rsp) -; AVX512-NEXT:    vmovups %ymm0, {{[0-9]+}}(%rsp) -; AVX512-NEXT:    vmovups %ymm0, {{[0-9]+}}(%rsp) -; AVX512-NEXT:    vmovups %ymm0, {{[0-9]+}}(%rsp) -; AVX512-NEXT:    vmovups %ymm0, {{[0-9]+}}(%rsp) -; AVX512-NEXT:    vmovups %ymm0, {{[0-9]+}}(%rsp) -; AVX512-NEXT:    vmovups %ymm0, {{[0-9]+}}(%rsp) -; AVX512-NEXT:    vmovups %ymm0, {{[0-9]+}}(%rsp) -; AVX512-NEXT:    vmovups %ymm0, {{[0-9]+}}(%rsp) -; AVX512-NEXT:    vmovups %ymm0, {{[0-9]+}}(%rsp) -; AVX512-NEXT:    vmovups %ymm0, {{[0-9]+}}(%rsp) -; AVX512-NEXT:    vmovups %ymm0, {{[0-9]+}}(%rsp) -; AVX512-NEXT:    vmovups %ymm0, {{[0-9]+}}(%rsp) -; AVX512-NEXT:    vmovups %ymm0, {{[0-9]+}}(%rsp) -; AVX512-NEXT:    vmovups %ymm0, {{[0-9]+}}(%rsp) -; AVX512-NEXT:    vmovups %ymm0, {{[0-9]+}}(%rsp) -; AVX512-NEXT:    vmovups %ymm0, {{[0-9]+}}(%rsp) -; AVX512-NEXT:    vmovups %ymm0, {{[0-9]+}}(%rsp) -; AVX512-NEXT:    vmovups %ymm0, {{[0-9]+}}(%rsp) -; AVX512-NEXT:    vmovups %ymm0, {{[0-9]+}}(%rsp) -; AVX512-NEXT:    vmovaps {{.*#+}} xmm0 = [1,0,0,0] -; AVX512-NEXT:    vmovups %ymm0, {{[0-9]+}}(%rsp) -; AVX512-NEXT:    andl $63, %ecx -; AVX512-NEXT:    shrl $3, %eax -; AVX512-NEXT:    negl %eax -; AVX512-NEXT:    movslq %eax, %rsi -; AVX512-NEXT:    movq 1280(%rsp,%rsi), %rdx -; AVX512-NEXT:    movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; AVX512-NEXT:    movq 1288(%rsp,%rsi), %rax -; AVX512-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; AVX512-NEXT:    shldq %cl, %rdx, %rax -; AVX512-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; AVX512-NEXT:    movq 1536(%rsp,%rsi), %rdx -; AVX512-NEXT:    movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; AVX512-NEXT:    movq 1544(%rsp,%rsi), %rax -; AVX512-NEXT:    shldq %cl, %rdx, %rax -; AVX512-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; AVX512-NEXT:    movq 1152(%rsp,%rsi), %rdx -; AVX512-NEXT:    movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; AVX512-NEXT:    movq 1160(%rsp,%rsi), %rax -; AVX512-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; AVX512-NEXT:    shldq %cl, %rdx, %rax -; AVX512-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; AVX512-NEXT:    movq 1408(%rsp,%rsi), %rdx -; AVX512-NEXT:    movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; AVX512-NEXT:    movq 1416(%rsp,%rsi), %rax -; AVX512-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; AVX512-NEXT:    shldq %cl, %rdx, %rax -; AVX512-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; AVX512-NEXT:    movq 1216(%rsp,%rsi), %rdx -; AVX512-NEXT:    movq %rdx, (%rsp) # 8-byte Spill -; AVX512-NEXT:    movq 1224(%rsp,%rsi), %rax -; AVX512-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; AVX512-NEXT:    shldq %cl, %rdx, %rax -; AVX512-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; AVX512-NEXT:    movq 1472(%rsp,%rsi), %rdx -; AVX512-NEXT:    movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; AVX512-NEXT:    movq 1480(%rsp,%rsi), %rax -; AVX512-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; AVX512-NEXT:    shldq %cl, %rdx, %rax -; AVX512-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; AVX512-NEXT:    movq 1088(%rsp,%rsi), %rdx -; AVX512-NEXT:    movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; AVX512-NEXT:    movq 1096(%rsp,%rsi), %rax -; AVX512-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; AVX512-NEXT:    shldq %cl, %rdx, %rax -; AVX512-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; AVX512-NEXT:    movq 1344(%rsp,%rsi), %rdx -; AVX512-NEXT:    movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; AVX512-NEXT:    movq 1352(%rsp,%rsi), %rax -; AVX512-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; AVX512-NEXT:    shldq %cl, %rdx, %rax -; AVX512-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; AVX512-NEXT:    movq 1248(%rsp,%rsi), %rdx -; AVX512-NEXT:    movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; AVX512-NEXT:    movq 1256(%rsp,%rsi), %rax -; AVX512-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; AVX512-NEXT:    shldq %cl, %rdx, %rax -; AVX512-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; AVX512-NEXT:    movq 1504(%rsp,%rsi), %rdx -; AVX512-NEXT:    movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; AVX512-NEXT:    movq 1512(%rsp,%rsi), %rax -; AVX512-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; AVX512-NEXT:    shldq %cl, %rdx, %rax -; AVX512-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; AVX512-NEXT:    movq 1120(%rsp,%rsi), %rdx -; AVX512-NEXT:    movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; AVX512-NEXT:    movq 1128(%rsp,%rsi), %rax -; AVX512-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; AVX512-NEXT:    shldq %cl, %rdx, %rax -; AVX512-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; AVX512-NEXT:    movq 1376(%rsp,%rsi), %rdx -; AVX512-NEXT:    movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; AVX512-NEXT:    movq 1384(%rsp,%rsi), %rax -; AVX512-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; AVX512-NEXT:    shldq %cl, %rdx, %rax -; AVX512-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; AVX512-NEXT:    movq 1184(%rsp,%rsi), %rdx -; AVX512-NEXT:    movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; AVX512-NEXT:    movq 1192(%rsp,%rsi), %rax -; AVX512-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; AVX512-NEXT:    shldq %cl, %rdx, %rax -; AVX512-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; AVX512-NEXT:    movq 1440(%rsp,%rsi), %rdx -; AVX512-NEXT:    movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; AVX512-NEXT:    movq 1448(%rsp,%rsi), %rax -; AVX512-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; AVX512-NEXT:    shldq %cl, %rdx, %rax -; AVX512-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; AVX512-NEXT:    movq 1056(%rsp,%rsi), %rdx -; AVX512-NEXT:    movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; AVX512-NEXT:    movq 1064(%rsp,%rsi), %rax -; AVX512-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; AVX512-NEXT:    shldq %cl, %rdx, %rax -; AVX512-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; AVX512-NEXT:    movq 1312(%rsp,%rsi), %rdx -; AVX512-NEXT:    movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; AVX512-NEXT:    movq 1320(%rsp,%rsi), %rax -; AVX512-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; AVX512-NEXT:    shldq %cl, %rdx, %rax -; AVX512-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; AVX512-NEXT:    movq 1264(%rsp,%rsi), %rdx -; AVX512-NEXT:    movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; AVX512-NEXT:    movq 1272(%rsp,%rsi), %rax -; AVX512-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; AVX512-NEXT:    shldq %cl, %rdx, %rax -; AVX512-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; AVX512-NEXT:    movq 1520(%rsp,%rsi), %rdx -; AVX512-NEXT:    movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; AVX512-NEXT:    movq 1528(%rsp,%rsi), %rax -; AVX512-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; AVX512-NEXT:    shldq %cl, %rdx, %rax -; AVX512-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; AVX512-NEXT:    movq 1136(%rsp,%rsi), %rdx -; AVX512-NEXT:    movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; AVX512-NEXT:    movq 1144(%rsp,%rsi), %rax -; AVX512-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; AVX512-NEXT:    shldq %cl, %rdx, %rax -; AVX512-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; AVX512-NEXT:    movq 1392(%rsp,%rsi), %rdx -; AVX512-NEXT:    movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; AVX512-NEXT:    movq 1400(%rsp,%rsi), %rax -; AVX512-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; AVX512-NEXT:    shldq %cl, %rdx, %rax -; AVX512-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; AVX512-NEXT:    movq 1200(%rsp,%rsi), %r10 -; AVX512-NEXT:    movq 1208(%rsp,%rsi), %rax -; AVX512-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; AVX512-NEXT:    shldq %cl, %r10, %rax -; AVX512-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; AVX512-NEXT:    movq 1456(%rsp,%rsi), %rdx -; AVX512-NEXT:    movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; AVX512-NEXT:    movq 1464(%rsp,%rsi), %rax -; AVX512-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; AVX512-NEXT:    shldq %cl, %rdx, %rax -; AVX512-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; AVX512-NEXT:    movq 1072(%rsp,%rsi), %r14 -; AVX512-NEXT:    movq 1080(%rsp,%rsi), %rax -; AVX512-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; AVX512-NEXT:    shldq %cl, %r14, %rax -; AVX512-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; AVX512-NEXT:    movq 1328(%rsp,%rsi), %rdx -; AVX512-NEXT:    movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; AVX512-NEXT:    movq 1336(%rsp,%rsi), %rax -; AVX512-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; AVX512-NEXT:    shldq %cl, %rdx, %rax -; AVX512-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; AVX512-NEXT:    movq 1232(%rsp,%rsi), %rax -; AVX512-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; AVX512-NEXT:    movq 1240(%rsp,%rsi), %rdx -; AVX512-NEXT:    movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; AVX512-NEXT:    shldq %cl, %rax, %rdx -; AVX512-NEXT:    movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; AVX512-NEXT:    movq 1488(%rsp,%rsi), %r12 -; AVX512-NEXT:    movq 1496(%rsp,%rsi), %rax -; AVX512-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; AVX512-NEXT:    shldq %cl, %r12, %rax -; AVX512-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; AVX512-NEXT:    movq 1104(%rsp,%rsi), %rax -; AVX512-NEXT:    movq 1112(%rsp,%rsi), %rdx -; AVX512-NEXT:    movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; AVX512-NEXT:    shldq %cl, %rax, %rdx -; AVX512-NEXT:    movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; AVX512-NEXT:    movq 1360(%rsp,%rsi), %r11 -; AVX512-NEXT:    movq 1368(%rsp,%rsi), %rbx -; AVX512-NEXT:    movq %rbx, %rdx -; AVX512-NEXT:    shldq %cl, %r11, %rdx -; AVX512-NEXT:    movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; AVX512-NEXT:    movq 1168(%rsp,%rsi), %r9 -; AVX512-NEXT:    movq %r9, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; AVX512-NEXT:    movq 1176(%rsp,%rsi), %r8 -; AVX512-NEXT:    movq %r8, %rdx -; AVX512-NEXT:    shldq %cl, %r9, %rdx -; AVX512-NEXT:    movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; AVX512-NEXT:    movq 1424(%rsp,%rsi), %r9 -; AVX512-NEXT:    movq 1432(%rsp,%rsi), %rdx -; AVX512-NEXT:    movq %rdx, %r15 -; AVX512-NEXT:    shldq %cl, %r9, %r15 -; AVX512-NEXT:    movq %r15, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; AVX512-NEXT:    movq 1296(%rsp,%rsi), %rbp -; AVX512-NEXT:    movq %rbp, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; AVX512-NEXT:    movq 1304(%rsp,%rsi), %r15 -; AVX512-NEXT:    movq %r15, %r13 -; AVX512-NEXT:    shldq %cl, %rbp, %r13 -; AVX512-NEXT:    movq %r13, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; AVX512-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r13 # 8-byte Reload -; AVX512-NEXT:    shldq %cl, %r13, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Folded Spill -; AVX512-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r13 # 8-byte Reload -; AVX512-NEXT:    shldq %cl, %r13, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Folded Spill -; AVX512-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r13 # 8-byte Reload -; AVX512-NEXT:    shldq %cl, %r13, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Folded Spill -; AVX512-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r13 # 8-byte Reload -; AVX512-NEXT:    shldq %cl, %r13, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Folded Spill -; AVX512-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r13 # 8-byte Reload -; AVX512-NEXT:    shldq %cl, %r13, (%rsp) # 8-byte Folded Spill -; AVX512-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r13 # 8-byte Reload -; AVX512-NEXT:    shldq %cl, %r13, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Folded Spill -; AVX512-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r13 # 8-byte Reload -; AVX512-NEXT:    shldq %cl, %r13, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Folded Spill -; AVX512-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r13 # 8-byte Reload -; AVX512-NEXT:    shldq %cl, %r13, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Folded Spill -; AVX512-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r13 # 8-byte Reload -; AVX512-NEXT:    shldq %cl, %r13, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Folded Spill -; AVX512-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r13 # 8-byte Reload -; AVX512-NEXT:    shldq %cl, %r13, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Folded Spill -; AVX512-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r13 # 8-byte Reload -; AVX512-NEXT:    shldq %cl, %r13, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Folded Spill -; AVX512-NEXT:    shldq %cl, %rbx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Folded Spill -; AVX512-NEXT:    shldq %cl, %r8, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Folded Spill -; AVX512-NEXT:    shldq %cl, %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Folded Spill -; AVX512-NEXT:    movq 1048(%rsp,%rsi), %rdx -; AVX512-NEXT:    shldq %cl, %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Folded Spill -; AVX512-NEXT:    shldq %cl, %r15, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Folded Spill -; AVX512-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rbx # 8-byte Reload -; AVX512-NEXT:    shldq %cl, %rbx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Folded Spill -; AVX512-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rbx # 8-byte Reload -; AVX512-NEXT:    shldq %cl, %rbx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Folded Spill -; AVX512-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rbx # 8-byte Reload -; AVX512-NEXT:    shldq %cl, %rbx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Folded Spill -; AVX512-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rbx # 8-byte Reload -; AVX512-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r15 # 8-byte Reload -; AVX512-NEXT:    shldq %cl, %r15, %rbx -; AVX512-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r15 # 8-byte Reload -; AVX512-NEXT:    shldq %cl, %r15, %r10 -; AVX512-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r15 # 8-byte Reload -; AVX512-NEXT:    shldq %cl, %r15, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Folded Spill -; AVX512-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r15 # 8-byte Reload -; AVX512-NEXT:    shldq %cl, %r15, %r14 -; AVX512-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r8 # 8-byte Reload -; AVX512-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r15 # 8-byte Reload -; AVX512-NEXT:    shldq %cl, %r15, %r8 -; AVX512-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r13 # 8-byte Reload -; AVX512-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r15 # 8-byte Reload -; AVX512-NEXT:    shldq %cl, %r15, %r13 -; AVX512-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r15 # 8-byte Reload -; AVX512-NEXT:    shldq %cl, %r15, %r12 -; AVX512-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r15 # 8-byte Reload -; AVX512-NEXT:    shldq %cl, %r15, %rax -; AVX512-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r15 # 8-byte Reload -; AVX512-NEXT:    shldq %cl, %r15, %r11 -; AVX512-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r15 # 8-byte Reload -; AVX512-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rbp # 8-byte Reload -; AVX512-NEXT:    shldq %cl, %rbp, %r15 -; AVX512-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rbp # 8-byte Reload -; AVX512-NEXT:    shldq %cl, %rbp, %r9 -; AVX512-NEXT:    andq 384(%rdi), %r9 -; AVX512-NEXT:    andq 128(%rdi), %r15 -; AVX512-NEXT:    orq %r9, %r15 -; AVX512-NEXT:    movq %r15, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; AVX512-NEXT:    andq 320(%rdi), %r11 -; AVX512-NEXT:    andq 64(%rdi), %rax -; AVX512-NEXT:    orq %r11, %rax -; AVX512-NEXT:    andq 448(%rdi), %r12 -; AVX512-NEXT:    andq 192(%rdi), %r13 -; AVX512-NEXT:    orq %r12, %r13 -; AVX512-NEXT:    orq %rax, %r13 -; AVX512-NEXT:    andq 288(%rdi), %r8 -; AVX512-NEXT:    andq 32(%rdi), %r14 -; AVX512-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload -; AVX512-NEXT:    andq 416(%rdi), %rax -; AVX512-NEXT:    orq %r8, %r14 -; AVX512-NEXT:    andq 160(%rdi), %r10 -; AVX512-NEXT:    orq %rax, %r10 -; AVX512-NEXT:    andq 352(%rdi), %rbx -; AVX512-NEXT:    orq %r14, %r10 -; AVX512-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload -; AVX512-NEXT:    andq 96(%rdi), %rax -; AVX512-NEXT:    orq %rbx, %rax -; AVX512-NEXT:    movq %rax, %r8 -; AVX512-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload -; AVX512-NEXT:    andq 480(%rdi), %rax -; AVX512-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r15 # 8-byte Reload -; AVX512-NEXT:    andq 224(%rdi), %r15 -; AVX512-NEXT:    orq %rax, %r15 -; AVX512-NEXT:    orq %r8, %r15 -; AVX512-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r8 # 8-byte Reload -; AVX512-NEXT:    andq 272(%rdi), %r8 -; AVX512-NEXT:    orq %r10, %r15 -; AVX512-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload -; AVX512-NEXT:    andq 16(%rdi), %rax -; AVX512-NEXT:    orq %r8, %rax -; AVX512-NEXT:    movq %rax, %r8 -; AVX512-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r9 # 8-byte Reload -; AVX512-NEXT:    andq 400(%rdi), %r9 -; AVX512-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload -; AVX512-NEXT:    andq 144(%rdi), %rax -; AVX512-NEXT:    orq %r9, %rax -; AVX512-NEXT:    orq %r8, %rax -; AVX512-NEXT:    movq %rax, %r9 -; AVX512-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r10 # 8-byte Reload -; AVX512-NEXT:    andq 336(%rdi), %r10 -; AVX512-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload -; AVX512-NEXT:    andq 80(%rdi), %rax -; AVX512-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r8 # 8-byte Reload -; AVX512-NEXT:    andq 464(%rdi), %r8 -; AVX512-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r11 # 8-byte Reload -; AVX512-NEXT:    andq 208(%rdi), %r11 -; AVX512-NEXT:    orq %r10, %rax -; AVX512-NEXT:    orq %r8, %r11 -; AVX512-NEXT:    orq %rax, %r11 -; AVX512-NEXT:    orq %r9, %r11 -; AVX512-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r10 # 8-byte Reload -; AVX512-NEXT:    andq 304(%rdi), %r10 -; AVX512-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload -; AVX512-NEXT:    andq 48(%rdi), %rax -; AVX512-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r9 # 8-byte Reload -; AVX512-NEXT:    andq 432(%rdi), %r9 -; AVX512-NEXT:    movq (%rsp), %r8 # 8-byte Reload -; AVX512-NEXT:    andq 176(%rdi), %r8 -; AVX512-NEXT:    orq %r10, %rax -; AVX512-NEXT:    movq %rax, %r10 -; AVX512-NEXT:    orq %r9, %r8 -; AVX512-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r9 # 8-byte Reload -; AVX512-NEXT:    andq 368(%rdi), %r9 -; AVX512-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload -; AVX512-NEXT:    andq 112(%rdi), %rax -; AVX512-NEXT:    orq %r10, %r8 -; AVX512-NEXT:    movq %r8, %r10 -; AVX512-NEXT:    orq %r9, %rax -; AVX512-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r8 # 8-byte Reload -; AVX512-NEXT:    andq 496(%rdi), %r8 -; AVX512-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r9 # 8-byte Reload -; AVX512-NEXT:    andq 240(%rdi), %r9 -; AVX512-NEXT:    orq %r8, %r9 -; AVX512-NEXT:    orq %rax, %r9 -; AVX512-NEXT:    orq %r10, %r9 -; AVX512-NEXT:    orq %r11, %r9 -; AVX512-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r10 # 8-byte Reload -; AVX512-NEXT:    andq 392(%rdi), %r10 -; AVX512-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rbp # 8-byte Reload -; AVX512-NEXT:    andq 136(%rdi), %rbp -; AVX512-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r8 # 8-byte Reload -; AVX512-NEXT:    andq 328(%rdi), %r8 -; AVX512-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload -; AVX512-NEXT:    andq 72(%rdi), %rax -; AVX512-NEXT:    orq %r10, %rbp -; AVX512-NEXT:    orq %r8, %rax -; AVX512-NEXT:    movq %rax, %r8 -; AVX512-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload -; AVX512-NEXT:    andq 456(%rdi), %rax -; AVX512-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r12 # 8-byte Reload -; AVX512-NEXT:    andq 200(%rdi), %r12 -; AVX512-NEXT:    orq %rax, %r12 -; AVX512-NEXT:    orq %r8, %r12 -; AVX512-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r8 # 8-byte Reload -; AVX512-NEXT:    andq 296(%rdi), %r8 -; AVX512-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload -; AVX512-NEXT:    andq 40(%rdi), %rax -; AVX512-NEXT:    orq %r8, %rax -; AVX512-NEXT:    movq %rax, %r10 -; AVX512-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r8 # 8-byte Reload -; AVX512-NEXT:    andq 424(%rdi), %r8 -; AVX512-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload -; AVX512-NEXT:    andq 168(%rdi), %rax -; AVX512-NEXT:    orq %r8, %rax -; AVX512-NEXT:    orq %r10, %rax -; AVX512-NEXT:    movq %rax, %r10 -; AVX512-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r8 # 8-byte Reload -; AVX512-NEXT:    andq 360(%rdi), %r8 -; AVX512-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload -; AVX512-NEXT:    andq 104(%rdi), %rax -; AVX512-NEXT:    orq %r8, %rax -; AVX512-NEXT:    movq %rax, %r8 -; AVX512-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload -; AVX512-NEXT:    andq 488(%rdi), %rax -; AVX512-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r14 # 8-byte Reload -; AVX512-NEXT:    andq 232(%rdi), %r14 -; AVX512-NEXT:    orq %rax, %r14 -; AVX512-NEXT:    orq %r8, %r14 -; AVX512-NEXT:    orq %r10, %r14 -; AVX512-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r8 # 8-byte Reload -; AVX512-NEXT:    andq 280(%rdi), %r8 -; AVX512-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload -; AVX512-NEXT:    andq 24(%rdi), %rax -; AVX512-NEXT:    orq %r8, %rax -; AVX512-NEXT:    movq %rax, %r10 -; AVX512-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r8 # 8-byte Reload -; AVX512-NEXT:    andq 408(%rdi), %r8 -; AVX512-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload -; AVX512-NEXT:    andq 152(%rdi), %rax -; AVX512-NEXT:    orq %r8, %rax -; AVX512-NEXT:    orq %r10, %rax -; AVX512-NEXT:    movq %rax, %r10 -; AVX512-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r11 # 8-byte Reload -; AVX512-NEXT:    andq 344(%rdi), %r11 -; AVX512-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r8 # 8-byte Reload -; AVX512-NEXT:    andq 88(%rdi), %r8 -; AVX512-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload -; AVX512-NEXT:    andq 472(%rdi), %rax -; AVX512-NEXT:    orq %r11, %r8 -; AVX512-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rbx # 8-byte Reload -; AVX512-NEXT:    andq 216(%rdi), %rbx -; AVX512-NEXT:    orq %rax, %rbx -; AVX512-NEXT:    orq %r8, %rbx -; AVX512-NEXT:    orq %r10, %rbx -; AVX512-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r10 # 8-byte Reload -; AVX512-NEXT:    andq 312(%rdi), %r10 -; AVX512-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload -; AVX512-NEXT:    andq 56(%rdi), %rax -; AVX512-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r8 # 8-byte Reload -; AVX512-NEXT:    andq 440(%rdi), %r8 -; AVX512-NEXT:    orq %r10, %rax -; AVX512-NEXT:    movq %rax, %r10 -; AVX512-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload -; AVX512-NEXT:    andq 184(%rdi), %rax -; AVX512-NEXT:    orq %r8, %rax -; AVX512-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r8 # 8-byte Reload -; AVX512-NEXT:    andq 376(%rdi), %r8 -; AVX512-NEXT:    orq %r10, %rax -; AVX512-NEXT:    movq %rax, %r11 -; AVX512-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload -; AVX512-NEXT:    andq 120(%rdi), %rax -; AVX512-NEXT:    orq %r8, %rax -; AVX512-NEXT:    movq %rax, %r10 -; AVX512-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload -; AVX512-NEXT:    andq 504(%rdi), %rax -; AVX512-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r8 # 8-byte Reload -; AVX512-NEXT:    andq 248(%rdi), %r8 -; AVX512-NEXT:    orq %rax, %r8 -; AVX512-NEXT:    orq %r10, %r8 -; AVX512-NEXT:    orq %r11, %r8 -; AVX512-NEXT:    movq 1040(%rsp,%rsi), %rax -; AVX512-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rsi # 8-byte Reload -; AVX512-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r10 # 8-byte Reload -; AVX512-NEXT:    shldq %cl, %rsi, %r10 -; AVX512-NEXT:    orq %rbx, %r8 -; AVX512-NEXT:    shlxq %rcx, %rax, %rsi -; AVX512-NEXT:    andq 256(%rdi), %r10 -; AVX512-NEXT:    andq (%rdi), %rsi -; AVX512-NEXT:    orq %r10, %rsi -; AVX512-NEXT:    orq {{[-0-9]+}}(%r{{[sb]}}p), %rsi # 8-byte Folded Reload -; AVX512-NEXT:    orq %r13, %rsi -; AVX512-NEXT:    orq %r15, %rsi -; AVX512-NEXT:    # kill: def $cl killed $cl killed $rcx -; AVX512-NEXT:    shldq %cl, %rax, %rdx -; AVX512-NEXT:    orq %r9, %rsi -; AVX512-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload -; AVX512-NEXT:    andq 264(%rdi), %rax -; AVX512-NEXT:    andq 8(%rdi), %rdx -; AVX512-NEXT:    orq %rax, %rdx -; AVX512-NEXT:    orq %rbp, %rdx -; AVX512-NEXT:    orq %r12, %rdx -; AVX512-NEXT:    orq %r14, %rdx -; AVX512-NEXT:    orq %r8, %rdx -; AVX512-NEXT:    orq %rsi, %rdx -; AVX512-NEXT:    setne %al -; AVX512-NEXT:    addq $1560, %rsp # imm = 0x618 -; AVX512-NEXT:    popq %rbx -; AVX512-NEXT:    popq %r12 -; AVX512-NEXT:    popq %r13 -; AVX512-NEXT:    popq %r14 -; AVX512-NEXT:    popq %r15 -; AVX512-NEXT:    popq %rbp -; AVX512-NEXT:    vzeroupper -; AVX512-NEXT:    retq +; X64-LABEL: test_ne_i4096: +; X64:       # %bb.0: +; X64-NEXT:    movl %esi, %eax +; X64-NEXT:    andl $4064, %eax # imm = 0xFE0 +; X64-NEXT:    shrl $3, %eax +; X64-NEXT:    movl (%rdi,%rax), %eax +; X64-NEXT:    btl %esi, %eax +; X64-NEXT:    setb %al +; X64-NEXT:    retq    %rem = and i32 %position, 4095    %ofs = zext nneg i32 %rem to i4096    %bit = shl nuw i4096 1, %ofs @@ -7161,8 +1827,8 @@ define i32 @reset_multiload_i128(ptr %word, i32 %position, ptr %p) nounwind {  ; X86-NEXT:    pushl %edi  ; X86-NEXT:    pushl %esi  ; X86-NEXT:    andl $-16, %esp -; X86-NEXT:    subl $80, %esp -; X86-NEXT:    movzbl 12(%ebp), %ecx +; X86-NEXT:    subl $64, %esp +; X86-NEXT:    movl 12(%ebp), %ecx  ; X86-NEXT:    movl $0, {{[0-9]+}}(%esp)  ; X86-NEXT:    movl $0, {{[0-9]+}}(%esp)  ; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) @@ -7176,51 +1842,33 @@ define i32 @reset_multiload_i128(ptr %word, i32 %position, ptr %p) nounwind {  ; X86-NEXT:    andb $12, %al  ; X86-NEXT:    negb %al  ; X86-NEXT:    movsbl %al, %eax -; X86-NEXT:    movl 56(%esp,%eax), %esi -; X86-NEXT:    movl 60(%esp,%eax), %edx -; X86-NEXT:    shldl %cl, %esi, %edx -; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    movl 48(%esp,%eax), %edi -; X86-NEXT:    movl 52(%esp,%eax), %eax -; X86-NEXT:    shldl %cl, %eax, %esi -; X86-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    shldl %cl, %edi, %eax -; X86-NEXT:    movl 8(%ebp), %ebx -; X86-NEXT:    shll %cl, %edi -; X86-NEXT:    movl 8(%ebx), %ecx -; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    andl %esi, %ecx -; X86-NEXT:    movl (%ebx), %esi -; X86-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    andl %edi, %esi -; X86-NEXT:    orl %ecx, %esi -; X86-NEXT:    movl 12(%ebx), %ecx -; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    andl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload -; X86-NEXT:    movl %eax, %edx -; X86-NEXT:    movl 4(%ebx), %ebx -; X86-NEXT:    movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    andl %ebx, %eax -; X86-NEXT:    orl %ecx, %eax -; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X86-NEXT:    notl %ecx -; X86-NEXT:    andl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload -; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload +; X86-NEXT:    movl 40(%esp,%eax), %edx +; X86-NEXT:    movl 44(%esp,%eax), %esi +; X86-NEXT:    shldl %cl, %edx, %esi +; X86-NEXT:    movl 32(%esp,%eax), %edi +; X86-NEXT:    movl 36(%esp,%eax), %ebx +; X86-NEXT:    shldl %cl, %ebx, %edx +; X86-NEXT:    shldl %cl, %edi, %ebx  ; X86-NEXT:    notl %ebx -; X86-NEXT:    andl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload -; X86-NEXT:    notl %edx -; X86-NEXT:    andl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X86-NEXT:    notl %edi -; X86-NEXT:    andl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload -; X86-NEXT:    orl %esi, %eax  ; X86-NEXT:    movl 16(%ebp), %eax  ; X86-NEXT:    movl (%eax), %eax -; X86-NEXT:    movl 8(%ebp), %esi -; X86-NEXT:    movl %ebx, 8(%esi) -; X86-NEXT:    movl %ecx, 12(%esi) -; X86-NEXT:    movl %edi, (%esi) -; X86-NEXT:    movl %edx, 4(%esi) -; X86-NEXT:    je .LBB22_2 +; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT:    movl 8(%ebp), %eax +; X86-NEXT:    andl %ebx, 4(%eax) +; X86-NEXT:    shll %cl, %edi +; X86-NEXT:    notl %edi +; X86-NEXT:    movl %ecx, %ebx +; X86-NEXT:    andl $96, %ebx +; X86-NEXT:    shrl $3, %ebx +; X86-NEXT:    movl (%eax,%ebx), %ebx +; X86-NEXT:    andl %edi, (%eax) +; X86-NEXT:    notl %esi +; X86-NEXT:    andl %esi, 12(%eax) +; X86-NEXT:    notl %edx +; X86-NEXT:    andl %edx, 8(%eax) +; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X86-NEXT:    btl %ecx, %ebx +; X86-NEXT:    jae .LBB22_2  ; X86-NEXT:  # %bb.1:  ; X86-NEXT:    xorl %eax, %eax  ; X86-NEXT:  .LBB22_2: @@ -7242,52 +1890,75 @@ define i32 @reset_multiload_i128(ptr %word, i32 %position, ptr %p) nounwind {  ; SSE-NEXT:    testb $64, %cl  ; SSE-NEXT:    cmovneq %rsi, %r8  ; SSE-NEXT:    cmovneq %rax, %rsi -; SSE-NEXT:    movq (%rdi), %rcx -; SSE-NEXT:    movq 8(%rdi), %r9 -; SSE-NEXT:    movq %r9, %r10 -; SSE-NEXT:    andq %r8, %r10  ; SSE-NEXT:    notq %r8 -; SSE-NEXT:    movq %rcx, %r11 -; SSE-NEXT:    andq %rsi, %r11  ; SSE-NEXT:    notq %rsi -; SSE-NEXT:    andq %r9, %r8 -; SSE-NEXT:    andq %rcx, %rsi -; SSE-NEXT:    orq %r10, %r11 -; SSE-NEXT:    jne .LBB22_2 +; SSE-NEXT:    movl %ecx, %r9d +; SSE-NEXT:    andl $96, %r9d +; SSE-NEXT:    shrl $3, %r9d +; SSE-NEXT:    movl (%rdi,%r9), %r9d +; SSE-NEXT:    btl %ecx, %r9d +; SSE-NEXT:    jb .LBB22_2  ; SSE-NEXT:  # %bb.1:  ; SSE-NEXT:    movl (%rdx), %eax  ; SSE-NEXT:  .LBB22_2: -; SSE-NEXT:    movq %rsi, (%rdi) -; SSE-NEXT:    movq %r8, 8(%rdi) +; SSE-NEXT:    andq %rsi, (%rdi) +; SSE-NEXT:    andq %r8, 8(%rdi)  ; SSE-NEXT:    # kill: def $eax killed $eax killed $rax  ; SSE-NEXT:    retq  ; -; AVX-LABEL: reset_multiload_i128: -; AVX:       # %bb.0: -; AVX-NEXT:    movl %esi, %ecx -; AVX-NEXT:    movl $1, %esi -; AVX-NEXT:    xorl %r8d, %r8d -; AVX-NEXT:    shldq %cl, %rsi, %r8 -; AVX-NEXT:    xorl %eax, %eax -; AVX-NEXT:    shlxq %rcx, %rsi, %r9 -; AVX-NEXT:    testb $64, %cl -; AVX-NEXT:    cmovneq %r9, %r8 -; AVX-NEXT:    cmovneq %rax, %r9 -; AVX-NEXT:    movq (%rdi), %r10 -; AVX-NEXT:    movq 8(%rdi), %r11 -; AVX-NEXT:    andnq %r11, %r8, %rcx -; AVX-NEXT:    andq %r8, %r11 -; AVX-NEXT:    andnq %r10, %r9, %rsi -; AVX-NEXT:    andq %r9, %r10 -; AVX-NEXT:    orq %r11, %r10 -; AVX-NEXT:    jne .LBB22_2 -; AVX-NEXT:  # %bb.1: -; AVX-NEXT:    movl (%rdx), %eax -; AVX-NEXT:  .LBB22_2: -; AVX-NEXT:    movq %rsi, (%rdi) -; AVX-NEXT:    movq %rcx, 8(%rdi) -; AVX-NEXT:    # kill: def $eax killed $eax killed $rax -; AVX-NEXT:    retq +; AVX2-LABEL: reset_multiload_i128: +; AVX2:       # %bb.0: +; AVX2-NEXT:    movl %esi, %ecx +; AVX2-NEXT:    xorl %eax, %eax +; AVX2-NEXT:    movl $1, %r8d +; AVX2-NEXT:    xorl %esi, %esi +; AVX2-NEXT:    shldq %cl, %r8, %rsi +; AVX2-NEXT:    shlxq %rcx, %r8, %r8 +; AVX2-NEXT:    testb $64, %cl +; AVX2-NEXT:    cmovneq %r8, %rsi +; AVX2-NEXT:    cmovneq %rax, %r8 +; AVX2-NEXT:    notq %rsi +; AVX2-NEXT:    notq %r8 +; AVX2-NEXT:    movl %ecx, %r9d +; AVX2-NEXT:    andl $96, %r9d +; AVX2-NEXT:    shrl $3, %r9d +; AVX2-NEXT:    movl (%rdi,%r9), %r9d +; AVX2-NEXT:    btl %ecx, %r9d +; AVX2-NEXT:    jb .LBB22_2 +; AVX2-NEXT:  # %bb.1: +; AVX2-NEXT:    movl (%rdx), %eax +; AVX2-NEXT:  .LBB22_2: +; AVX2-NEXT:    andq %r8, (%rdi) +; AVX2-NEXT:    andq %rsi, 8(%rdi) +; AVX2-NEXT:    # kill: def $eax killed $eax killed $rax +; AVX2-NEXT:    retq +; +; AVX512-LABEL: reset_multiload_i128: +; AVX512:       # %bb.0: +; AVX512-NEXT:    movl %esi, %ecx +; AVX512-NEXT:    movl $1, %r8d +; AVX512-NEXT:    xorl %esi, %esi +; AVX512-NEXT:    shldq %cl, %r8, %rsi +; AVX512-NEXT:    xorl %eax, %eax +; AVX512-NEXT:    shlxq %rcx, %r8, %r8 +; AVX512-NEXT:    testb $64, %cl +; AVX512-NEXT:    cmovneq %r8, %rsi +; AVX512-NEXT:    cmovneq %rax, %r8 +; AVX512-NEXT:    notq %rsi +; AVX512-NEXT:    notq %r8 +; AVX512-NEXT:    movl %ecx, %r9d +; AVX512-NEXT:    andl $96, %r9d +; AVX512-NEXT:    shrl $3, %r9d +; AVX512-NEXT:    movl (%rdi,%r9), %r9d +; AVX512-NEXT:    btl %ecx, %r9d +; AVX512-NEXT:    jb .LBB22_2 +; AVX512-NEXT:  # %bb.1: +; AVX512-NEXT:    movl (%rdx), %eax +; AVX512-NEXT:  .LBB22_2: +; AVX512-NEXT:    andq %r8, (%rdi) +; AVX512-NEXT:    andq %rsi, 8(%rdi) +; AVX512-NEXT:    # kill: def $eax killed $eax killed $rax +; AVX512-NEXT:    retq    %rem = and i32 %position, 127    %ofs = zext nneg i32 %rem to i128    %bit = shl nuw i128 1, %ofs diff --git a/llvm/test/CodeGen/X86/isel-llvm.sincos.ll b/llvm/test/CodeGen/X86/isel-llvm.sincos.ll index 065710f..8576f8f 100644 --- a/llvm/test/CodeGen/X86/isel-llvm.sincos.ll +++ b/llvm/test/CodeGen/X86/isel-llvm.sincos.ll @@ -3,6 +3,9 @@  ; RUN: llc < %s -mtriple=x86_64-linux-gnu -fast-isel  | FileCheck %s --check-prefixes=X64,FASTISEL-X64  ; RUN: llc < %s -mtriple=i686-linux-gnu -global-isel=0 -fast-isel=0  | FileCheck %s --check-prefixes=X86,SDAG-X86  ; RUN: llc < %s -mtriple=x86_64-linux-gnu -global-isel=0 -fast-isel=0  | FileCheck %s --check-prefixes=X64,SDAG-X64 +; RUN: llc < %s -mtriple=x86_64-apple-macosx10.9.0 -mcpu=core2 | FileCheck %s --check-prefix=MACOS-SINCOS-STRET +; RUN: llc < %s -mtriple=x86_64-apple-macosx10.8.0 -mcpu=core2 | FileCheck %s --check-prefix=MACOS-NOSINCOS-STRET +  ; TODO: The below RUN line will fails GISEL selection and will fallback to DAG selection due to lack of support for loads/stores in i686 mode, support is expected soon enough, for this reason the llvm/test/CodeGen/X86/GlobalISel/llvm.sincos.mir test is added for now because of the lack of support for i686 in GlobalISel.  ; RUN: llc < %s -mtriple=i686-linux-gnu -global-isel=1 -global-isel-abort=2 | FileCheck %s --check-prefixes=GISEL-X86  ; RUN: llc < %s -mtriple=x86_64-linux-gnu -global-isel=1 -global-isel-abort=1 | FileCheck %s --check-prefixes=GISEL-X64 @@ -34,6 +37,29 @@ define { float, float } @test_sincos_f32(float %Val) nounwind {  ; X64-NEXT:    popq %rax  ; X64-NEXT:    retq  ; +; MACOS-SINCOS-STRET-LABEL: test_sincos_f32: +; MACOS-SINCOS-STRET:       ## %bb.0: +; MACOS-SINCOS-STRET-NEXT:    pushq %rax +; MACOS-SINCOS-STRET-NEXT:    callq ___sincosf_stret +; MACOS-SINCOS-STRET-NEXT:    movshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] +; MACOS-SINCOS-STRET-NEXT:    popq %rax +; MACOS-SINCOS-STRET-NEXT:    retq +; +; MACOS-NOSINCOS-STRET-LABEL: test_sincos_f32: +; MACOS-NOSINCOS-STRET:       ## %bb.0: +; MACOS-NOSINCOS-STRET-NEXT:    pushq %rax +; MACOS-NOSINCOS-STRET-NEXT:    movss %xmm0, (%rsp) ## 4-byte Spill +; MACOS-NOSINCOS-STRET-NEXT:    callq _sinf +; MACOS-NOSINCOS-STRET-NEXT:    movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) ## 4-byte Spill +; MACOS-NOSINCOS-STRET-NEXT:    movss (%rsp), %xmm0 ## 4-byte Reload +; MACOS-NOSINCOS-STRET-NEXT:    ## xmm0 = mem[0],zero,zero,zero +; MACOS-NOSINCOS-STRET-NEXT:    callq _cosf +; MACOS-NOSINCOS-STRET-NEXT:    movaps %xmm0, %xmm1 +; MACOS-NOSINCOS-STRET-NEXT:    movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 ## 4-byte Reload +; MACOS-NOSINCOS-STRET-NEXT:    ## xmm0 = mem[0],zero,zero,zero +; MACOS-NOSINCOS-STRET-NEXT:    popq %rax +; MACOS-NOSINCOS-STRET-NEXT:    retq +;  ; GISEL-X86-LABEL: test_sincos_f32:  ; GISEL-X86:       # %bb.0:  ; GISEL-X86-NEXT:    subl $28, %esp @@ -93,6 +119,28 @@ define { double, double } @test_sincos_f64(double %Val) nounwind  {  ; X64-NEXT:    addq $24, %rsp  ; X64-NEXT:    retq  ; +; MACOS-SINCOS-STRET-LABEL: test_sincos_f64: +; MACOS-SINCOS-STRET:       ## %bb.0: +; MACOS-SINCOS-STRET-NEXT:    pushq %rax +; MACOS-SINCOS-STRET-NEXT:    callq ___sincos_stret +; MACOS-SINCOS-STRET-NEXT:    popq %rax +; MACOS-SINCOS-STRET-NEXT:    retq +; +; MACOS-NOSINCOS-STRET-LABEL: test_sincos_f64: +; MACOS-NOSINCOS-STRET:       ## %bb.0: +; MACOS-NOSINCOS-STRET-NEXT:    subq $24, %rsp +; MACOS-NOSINCOS-STRET-NEXT:    movsd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill +; MACOS-NOSINCOS-STRET-NEXT:    callq _sin +; MACOS-NOSINCOS-STRET-NEXT:    movsd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill +; MACOS-NOSINCOS-STRET-NEXT:    movsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 ## 8-byte Reload +; MACOS-NOSINCOS-STRET-NEXT:    ## xmm0 = mem[0],zero +; MACOS-NOSINCOS-STRET-NEXT:    callq _cos +; MACOS-NOSINCOS-STRET-NEXT:    movaps %xmm0, %xmm1 +; MACOS-NOSINCOS-STRET-NEXT:    movsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 ## 8-byte Reload +; MACOS-NOSINCOS-STRET-NEXT:    ## xmm0 = mem[0],zero +; MACOS-NOSINCOS-STRET-NEXT:    addq $24, %rsp +; MACOS-NOSINCOS-STRET-NEXT:    retq +;  ; GISEL-X86-LABEL: test_sincos_f64:  ; GISEL-X86:       # %bb.0:  ; GISEL-X86-NEXT:    subl $44, %esp @@ -153,6 +201,40 @@ define { x86_fp80, x86_fp80 } @test_sincos_f80(x86_fp80 %Val) nounwind {  ; X64-NEXT:    addq $56, %rsp  ; X64-NEXT:    retq  ; +; MACOS-SINCOS-STRET-LABEL: test_sincos_f80: +; MACOS-SINCOS-STRET:       ## %bb.0: +; MACOS-SINCOS-STRET-NEXT:    subq $40, %rsp +; MACOS-SINCOS-STRET-NEXT:    fldt {{[0-9]+}}(%rsp) +; MACOS-SINCOS-STRET-NEXT:    fld %st(0) +; MACOS-SINCOS-STRET-NEXT:    fstpt {{[-0-9]+}}(%r{{[sb]}}p) ## 10-byte Folded Spill +; MACOS-SINCOS-STRET-NEXT:    fstpt (%rsp) +; MACOS-SINCOS-STRET-NEXT:    callq _cosl +; MACOS-SINCOS-STRET-NEXT:    fstpt {{[-0-9]+}}(%r{{[sb]}}p) ## 10-byte Folded Spill +; MACOS-SINCOS-STRET-NEXT:    fldt {{[-0-9]+}}(%r{{[sb]}}p) ## 10-byte Folded Reload +; MACOS-SINCOS-STRET-NEXT:    fstpt (%rsp) +; MACOS-SINCOS-STRET-NEXT:    callq _sinl +; MACOS-SINCOS-STRET-NEXT:    fldt {{[-0-9]+}}(%r{{[sb]}}p) ## 10-byte Folded Reload +; MACOS-SINCOS-STRET-NEXT:    fxch %st(1) +; MACOS-SINCOS-STRET-NEXT:    addq $40, %rsp +; MACOS-SINCOS-STRET-NEXT:    retq +; +; MACOS-NOSINCOS-STRET-LABEL: test_sincos_f80: +; MACOS-NOSINCOS-STRET:       ## %bb.0: +; MACOS-NOSINCOS-STRET-NEXT:    subq $40, %rsp +; MACOS-NOSINCOS-STRET-NEXT:    fldt {{[0-9]+}}(%rsp) +; MACOS-NOSINCOS-STRET-NEXT:    fld %st(0) +; MACOS-NOSINCOS-STRET-NEXT:    fstpt {{[-0-9]+}}(%r{{[sb]}}p) ## 10-byte Folded Spill +; MACOS-NOSINCOS-STRET-NEXT:    fstpt (%rsp) +; MACOS-NOSINCOS-STRET-NEXT:    callq _cosl +; MACOS-NOSINCOS-STRET-NEXT:    fstpt {{[-0-9]+}}(%r{{[sb]}}p) ## 10-byte Folded Spill +; MACOS-NOSINCOS-STRET-NEXT:    fldt {{[-0-9]+}}(%r{{[sb]}}p) ## 10-byte Folded Reload +; MACOS-NOSINCOS-STRET-NEXT:    fstpt (%rsp) +; MACOS-NOSINCOS-STRET-NEXT:    callq _sinl +; MACOS-NOSINCOS-STRET-NEXT:    fldt {{[-0-9]+}}(%r{{[sb]}}p) ## 10-byte Folded Reload +; MACOS-NOSINCOS-STRET-NEXT:    fxch %st(1) +; MACOS-NOSINCOS-STRET-NEXT:    addq $40, %rsp +; MACOS-NOSINCOS-STRET-NEXT:    retq +;  ; GISEL-X86-LABEL: test_sincos_f80:  ; GISEL-X86:       # %bb.0:  ; GISEL-X86-NEXT:    subl $60, %esp @@ -288,6 +370,57 @@ define void @can_fold_with_call_in_chain(float %x, ptr noalias %a, ptr noalias %  ; SDAG-X64-NEXT:    popq %r14  ; SDAG-X64-NEXT:    retq  ; +; MACOS-SINCOS-STRET-LABEL: can_fold_with_call_in_chain: +; MACOS-SINCOS-STRET:       ## %bb.0: ## %entry +; MACOS-SINCOS-STRET-NEXT:    pushq %r14 +; MACOS-SINCOS-STRET-NEXT:    pushq %rbx +; MACOS-SINCOS-STRET-NEXT:    subq $40, %rsp +; MACOS-SINCOS-STRET-NEXT:    movq %rsi, %rbx +; MACOS-SINCOS-STRET-NEXT:    movq %rdi, %r14 +; MACOS-SINCOS-STRET-NEXT:    callq ___sincosf_stret +; MACOS-SINCOS-STRET-NEXT:    movaps %xmm0, (%rsp) ## 16-byte Spill +; MACOS-SINCOS-STRET-NEXT:    movshdup {{.*#+}} xmm0 = xmm0[1,1,3,3] +; MACOS-SINCOS-STRET-NEXT:    movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) ## 16-byte Spill +; MACOS-SINCOS-STRET-NEXT:    movq %r14, %rdi +; MACOS-SINCOS-STRET-NEXT:    movq %rbx, %rsi +; MACOS-SINCOS-STRET-NEXT:    callq _foo +; MACOS-SINCOS-STRET-NEXT:    movaps (%rsp), %xmm0 ## 16-byte Reload +; MACOS-SINCOS-STRET-NEXT:    movss %xmm0, (%r14) +; MACOS-SINCOS-STRET-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 ## 16-byte Reload +; MACOS-SINCOS-STRET-NEXT:    movss %xmm0, (%rbx) +; MACOS-SINCOS-STRET-NEXT:    addq $40, %rsp +; MACOS-SINCOS-STRET-NEXT:    popq %rbx +; MACOS-SINCOS-STRET-NEXT:    popq %r14 +; MACOS-SINCOS-STRET-NEXT:    retq +; +; MACOS-NOSINCOS-STRET-LABEL: can_fold_with_call_in_chain: +; MACOS-NOSINCOS-STRET:       ## %bb.0: ## %entry +; MACOS-NOSINCOS-STRET-NEXT:    pushq %r14 +; MACOS-NOSINCOS-STRET-NEXT:    pushq %rbx +; MACOS-NOSINCOS-STRET-NEXT:    pushq %rax +; MACOS-NOSINCOS-STRET-NEXT:    movq %rsi, %rbx +; MACOS-NOSINCOS-STRET-NEXT:    movq %rdi, %r14 +; MACOS-NOSINCOS-STRET-NEXT:    movss %xmm0, (%rsp) ## 4-byte Spill +; MACOS-NOSINCOS-STRET-NEXT:    callq _sinf +; MACOS-NOSINCOS-STRET-NEXT:    movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) ## 4-byte Spill +; MACOS-NOSINCOS-STRET-NEXT:    movss (%rsp), %xmm0 ## 4-byte Reload +; MACOS-NOSINCOS-STRET-NEXT:    ## xmm0 = mem[0],zero,zero,zero +; MACOS-NOSINCOS-STRET-NEXT:    callq _cosf +; MACOS-NOSINCOS-STRET-NEXT:    movss %xmm0, (%rsp) ## 4-byte Spill +; MACOS-NOSINCOS-STRET-NEXT:    movq %r14, %rdi +; MACOS-NOSINCOS-STRET-NEXT:    movq %rbx, %rsi +; MACOS-NOSINCOS-STRET-NEXT:    callq _foo +; MACOS-NOSINCOS-STRET-NEXT:    movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 ## 4-byte Reload +; MACOS-NOSINCOS-STRET-NEXT:    ## xmm0 = mem[0],zero,zero,zero +; MACOS-NOSINCOS-STRET-NEXT:    movss %xmm0, (%r14) +; MACOS-NOSINCOS-STRET-NEXT:    movss (%rsp), %xmm0 ## 4-byte Reload +; MACOS-NOSINCOS-STRET-NEXT:    ## xmm0 = mem[0],zero,zero,zero +; MACOS-NOSINCOS-STRET-NEXT:    movss %xmm0, (%rbx) +; MACOS-NOSINCOS-STRET-NEXT:    addq $8, %rsp +; MACOS-NOSINCOS-STRET-NEXT:    popq %rbx +; MACOS-NOSINCOS-STRET-NEXT:    popq %r14 +; MACOS-NOSINCOS-STRET-NEXT:    retq +;  ; GISEL-X86-LABEL: can_fold_with_call_in_chain:  ; GISEL-X86:       # %bb.0: # %entry  ; GISEL-X86-NEXT:    pushl %ebx diff --git a/llvm/test/CodeGen/X86/llvm.sincos.vec.ll b/llvm/test/CodeGen/X86/llvm.sincos.vec.ll index 834dd78..9b02438 100644 --- a/llvm/test/CodeGen/X86/llvm.sincos.vec.ll +++ b/llvm/test/CodeGen/X86/llvm.sincos.vec.ll @@ -1,59 +1,213 @@  ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --no_x86_scrub_sp --version 5 -; RUN: llc < %s -mtriple=i386-unknown-linux-gnu  | FileCheck %s +; RUN: llc < %s -mtriple=i386-unknown-linux-gnu  | FileCheck -check-prefix=X86 %s +; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu  | FileCheck -check-prefix=X64 %s +; RUN: llc < %s -mtriple=x86_64-apple-macosx10.9.0 | FileCheck --check-prefix=MACOS-SINCOS-STRET %s +; RUN: llc < %s -mtriple=x86_64-apple-macosx10.8.0 | FileCheck --check-prefix=MACOS-NOSINCOS-STRET %s  define void @test_sincos_v4f32(<4 x float> %x, ptr noalias %out_sin, ptr noalias %out_cos) nounwind { -; CHECK-LABEL: test_sincos_v4f32: -; CHECK:       # %bb.0: -; CHECK-NEXT:    pushl %edi -; CHECK-NEXT:    pushl %esi -; CHECK-NEXT:    subl $52, %esp -; CHECK-NEXT:    movl 84(%esp), %esi -; CHECK-NEXT:    flds 76(%esp) -; CHECK-NEXT:    fstps {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill -; CHECK-NEXT:    flds 64(%esp) -; CHECK-NEXT:    fstps {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill -; CHECK-NEXT:    flds 72(%esp) -; CHECK-NEXT:    fstps {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill -; CHECK-NEXT:    flds 68(%esp) -; CHECK-NEXT:    movl 80(%esp), %edi -; CHECK-NEXT:    leal 40(%esp), %eax -; CHECK-NEXT:    movl %eax, 8(%esp) -; CHECK-NEXT:    leal 4(%edi), %eax -; CHECK-NEXT:    movl %eax, 4(%esp) -; CHECK-NEXT:    fstps (%esp) -; CHECK-NEXT:    calll sincosf -; CHECK-NEXT:    leal 44(%esp), %eax -; CHECK-NEXT:    movl %eax, 8(%esp) -; CHECK-NEXT:    leal 8(%edi), %eax -; CHECK-NEXT:    movl %eax, 4(%esp) -; CHECK-NEXT:    flds {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload -; CHECK-NEXT:    fstps (%esp) -; CHECK-NEXT:    calll sincosf -; CHECK-NEXT:    leal 36(%esp), %eax -; CHECK-NEXT:    movl %eax, 8(%esp) -; CHECK-NEXT:    movl %edi, 4(%esp) -; CHECK-NEXT:    flds {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload -; CHECK-NEXT:    fstps (%esp) -; CHECK-NEXT:    calll sincosf -; CHECK-NEXT:    leal 48(%esp), %eax -; CHECK-NEXT:    movl %eax, 8(%esp) -; CHECK-NEXT:    addl $12, %edi -; CHECK-NEXT:    movl %edi, 4(%esp) -; CHECK-NEXT:    flds {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload -; CHECK-NEXT:    fstps (%esp) -; CHECK-NEXT:    calll sincosf -; CHECK-NEXT:    flds 36(%esp) -; CHECK-NEXT:    flds 40(%esp) -; CHECK-NEXT:    flds 44(%esp) -; CHECK-NEXT:    flds 48(%esp) -; CHECK-NEXT:    fstps 12(%esi) -; CHECK-NEXT:    fstps 8(%esi) -; CHECK-NEXT:    fstps 4(%esi) -; CHECK-NEXT:    fstps (%esi) -; CHECK-NEXT:    addl $52, %esp -; CHECK-NEXT:    popl %esi -; CHECK-NEXT:    popl %edi -; CHECK-NEXT:    retl +; X86-LABEL: test_sincos_v4f32: +; X86:       # %bb.0: +; X86-NEXT:    pushl %edi +; X86-NEXT:    pushl %esi +; X86-NEXT:    subl $52, %esp +; X86-NEXT:    movl 84(%esp), %esi +; X86-NEXT:    flds 76(%esp) +; X86-NEXT:    fstps {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; X86-NEXT:    flds 64(%esp) +; X86-NEXT:    fstps {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; X86-NEXT:    flds 72(%esp) +; X86-NEXT:    fstps {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; X86-NEXT:    flds 68(%esp) +; X86-NEXT:    movl 80(%esp), %edi +; X86-NEXT:    leal 40(%esp), %eax +; X86-NEXT:    movl %eax, 8(%esp) +; X86-NEXT:    leal 4(%edi), %eax +; X86-NEXT:    movl %eax, 4(%esp) +; X86-NEXT:    fstps (%esp) +; X86-NEXT:    calll sincosf +; X86-NEXT:    leal 44(%esp), %eax +; X86-NEXT:    movl %eax, 8(%esp) +; X86-NEXT:    leal 8(%edi), %eax +; X86-NEXT:    movl %eax, 4(%esp) +; X86-NEXT:    flds {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload +; X86-NEXT:    fstps (%esp) +; X86-NEXT:    calll sincosf +; X86-NEXT:    leal 36(%esp), %eax +; X86-NEXT:    movl %eax, 8(%esp) +; X86-NEXT:    movl %edi, 4(%esp) +; X86-NEXT:    flds {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload +; X86-NEXT:    fstps (%esp) +; X86-NEXT:    calll sincosf +; X86-NEXT:    leal 48(%esp), %eax +; X86-NEXT:    movl %eax, 8(%esp) +; X86-NEXT:    addl $12, %edi +; X86-NEXT:    movl %edi, 4(%esp) +; X86-NEXT:    flds {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload +; X86-NEXT:    fstps (%esp) +; X86-NEXT:    calll sincosf +; X86-NEXT:    flds 36(%esp) +; X86-NEXT:    flds 40(%esp) +; X86-NEXT:    flds 44(%esp) +; X86-NEXT:    flds 48(%esp) +; X86-NEXT:    fstps 12(%esi) +; X86-NEXT:    fstps 8(%esi) +; X86-NEXT:    fstps 4(%esi) +; X86-NEXT:    fstps (%esi) +; X86-NEXT:    addl $52, %esp +; X86-NEXT:    popl %esi +; X86-NEXT:    popl %edi +; X86-NEXT:    retl +; +; X64-LABEL: test_sincos_v4f32: +; X64:       # %bb.0: +; X64-NEXT:    pushq %r14 +; X64-NEXT:    pushq %rbx +; X64-NEXT:    subq $56, %rsp +; X64-NEXT:    movq %rsi, %rbx +; X64-NEXT:    movq %rdi, %r14 +; X64-NEXT:    movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; X64-NEXT:    shufps {{.*#+}} xmm0 = xmm0[3,3,3,3] +; X64-NEXT:    leaq 4(%rsp), %rdi +; X64-NEXT:    movq %rsp, %rsi +; X64-NEXT:    callq sincosf@PLT +; X64-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload +; X64-NEXT:    movhlps {{.*#+}} xmm0 = xmm0[1,1] +; X64-NEXT:    leaq 12(%rsp), %rdi +; X64-NEXT:    leaq 8(%rsp), %rsi +; X64-NEXT:    callq sincosf@PLT +; X64-NEXT:    leaq 28(%rsp), %rdi +; X64-NEXT:    leaq 24(%rsp), %rsi +; X64-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload +; X64-NEXT:    callq sincosf@PLT +; X64-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload +; X64-NEXT:    shufps {{.*#+}} xmm0 = xmm0[1,1,1,1] +; X64-NEXT:    leaq 20(%rsp), %rdi +; X64-NEXT:    leaq 16(%rsp), %rsi +; X64-NEXT:    callq sincosf@PLT +; X64-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; X64-NEXT:    movss {{.*#+}} xmm1 = mem[0],zero,zero,zero +; X64-NEXT:    unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] +; X64-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; X64-NEXT:    movss {{.*#+}} xmm2 = mem[0],zero,zero,zero +; X64-NEXT:    unpcklps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] +; X64-NEXT:    movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] +; X64-NEXT:    movss {{.*#+}} xmm1 = mem[0],zero,zero,zero +; X64-NEXT:    movss {{.*#+}} xmm2 = mem[0],zero,zero,zero +; X64-NEXT:    unpcklps {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1] +; X64-NEXT:    movss {{.*#+}} xmm1 = mem[0],zero,zero,zero +; X64-NEXT:    movss {{.*#+}} xmm3 = mem[0],zero,zero,zero +; X64-NEXT:    unpcklps {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1] +; X64-NEXT:    movlhps {{.*#+}} xmm1 = xmm1[0],xmm2[0] +; X64-NEXT:    movups %xmm1, (%r14) +; X64-NEXT:    movups %xmm0, (%rbx) +; X64-NEXT:    addq $56, %rsp +; X64-NEXT:    popq %rbx +; X64-NEXT:    popq %r14 +; X64-NEXT:    retq +; +; MACOS-SINCOS-STRET-LABEL: test_sincos_v4f32: +; MACOS-SINCOS-STRET:       ## %bb.0: +; MACOS-SINCOS-STRET-NEXT:    pushq %r14 +; MACOS-SINCOS-STRET-NEXT:    pushq %rbx +; MACOS-SINCOS-STRET-NEXT:    subq $104, %rsp +; MACOS-SINCOS-STRET-NEXT:    movq %rsi, %rbx +; MACOS-SINCOS-STRET-NEXT:    movq %rdi, %r14 +; MACOS-SINCOS-STRET-NEXT:    movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) ## 16-byte Spill +; MACOS-SINCOS-STRET-NEXT:    shufps {{.*#+}} xmm0 = xmm0[3,3,3,3] +; MACOS-SINCOS-STRET-NEXT:    callq ___sincosf_stret +; MACOS-SINCOS-STRET-NEXT:    movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) ## 16-byte Spill +; MACOS-SINCOS-STRET-NEXT:    shufps {{.*#+}} xmm0 = xmm0[1,1,1,1] +; MACOS-SINCOS-STRET-NEXT:    movaps %xmm0, (%rsp) ## 16-byte Spill +; MACOS-SINCOS-STRET-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 ## 16-byte Reload +; MACOS-SINCOS-STRET-NEXT:    movhlps {{.*#+}} xmm0 = xmm0[1,1] +; MACOS-SINCOS-STRET-NEXT:    callq ___sincosf_stret +; MACOS-SINCOS-STRET-NEXT:    movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) ## 16-byte Spill +; MACOS-SINCOS-STRET-NEXT:    shufps {{.*#+}} xmm0 = xmm0[1,1,1,1] +; MACOS-SINCOS-STRET-NEXT:    unpcklps (%rsp), %xmm0 ## 16-byte Folded Reload +; MACOS-SINCOS-STRET-NEXT:    ## xmm0 = xmm0[0],mem[0],xmm0[1],mem[1] +; MACOS-SINCOS-STRET-NEXT:    movaps %xmm0, (%rsp) ## 16-byte Spill +; MACOS-SINCOS-STRET-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 ## 16-byte Reload +; MACOS-SINCOS-STRET-NEXT:    callq ___sincosf_stret +; MACOS-SINCOS-STRET-NEXT:    movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) ## 16-byte Spill +; MACOS-SINCOS-STRET-NEXT:    movaps %xmm0, %xmm1 +; MACOS-SINCOS-STRET-NEXT:    shufps {{.*#+}} xmm1 = xmm1[1,1],xmm0[1,1] +; MACOS-SINCOS-STRET-NEXT:    movaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) ## 16-byte Spill +; MACOS-SINCOS-STRET-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 ## 16-byte Reload +; MACOS-SINCOS-STRET-NEXT:    shufps {{.*#+}} xmm0 = xmm0[1,1,1,1] +; MACOS-SINCOS-STRET-NEXT:    callq ___sincosf_stret +; MACOS-SINCOS-STRET-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 ## 16-byte Reload +; MACOS-SINCOS-STRET-NEXT:    unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] +; MACOS-SINCOS-STRET-NEXT:    shufps {{.*#+}} xmm0 = xmm0[1,1,1,1] +; MACOS-SINCOS-STRET-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm2 ## 16-byte Reload +; MACOS-SINCOS-STRET-NEXT:    unpcklps {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1] +; MACOS-SINCOS-STRET-NEXT:    unpcklpd (%rsp), %xmm2 ## 16-byte Folded Reload +; MACOS-SINCOS-STRET-NEXT:    ## xmm2 = xmm2[0],mem[0] +; MACOS-SINCOS-STRET-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 ## 16-byte Reload +; MACOS-SINCOS-STRET-NEXT:    unpcklps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 ## 16-byte Folded Reload +; MACOS-SINCOS-STRET-NEXT:    ## xmm0 = xmm0[0],mem[0],xmm0[1],mem[1] +; MACOS-SINCOS-STRET-NEXT:    movlhps {{.*#+}} xmm1 = xmm1[0],xmm0[0] +; MACOS-SINCOS-STRET-NEXT:    movups %xmm1, (%r14) +; MACOS-SINCOS-STRET-NEXT:    movups %xmm2, (%rbx) +; MACOS-SINCOS-STRET-NEXT:    addq $104, %rsp +; MACOS-SINCOS-STRET-NEXT:    popq %rbx +; MACOS-SINCOS-STRET-NEXT:    popq %r14 +; MACOS-SINCOS-STRET-NEXT:    retq +; +; MACOS-NOSINCOS-STRET-LABEL: test_sincos_v4f32: +; MACOS-NOSINCOS-STRET:       ## %bb.0: +; MACOS-NOSINCOS-STRET-NEXT:    pushq %r14 +; MACOS-NOSINCOS-STRET-NEXT:    pushq %rbx +; MACOS-NOSINCOS-STRET-NEXT:    subq $104, %rsp +; MACOS-NOSINCOS-STRET-NEXT:    movq %rsi, %rbx +; MACOS-NOSINCOS-STRET-NEXT:    movq %rdi, %r14 +; MACOS-NOSINCOS-STRET-NEXT:    movaps %xmm0, (%rsp) ## 16-byte Spill +; MACOS-NOSINCOS-STRET-NEXT:    shufps {{.*#+}} xmm0 = xmm0[3,3,3,3] +; MACOS-NOSINCOS-STRET-NEXT:    movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) ## 16-byte Spill +; MACOS-NOSINCOS-STRET-NEXT:    callq _cosf +; MACOS-NOSINCOS-STRET-NEXT:    movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) ## 16-byte Spill +; MACOS-NOSINCOS-STRET-NEXT:    movaps (%rsp), %xmm0 ## 16-byte Reload +; MACOS-NOSINCOS-STRET-NEXT:    movhlps {{.*#+}} xmm0 = xmm0[1,1] +; MACOS-NOSINCOS-STRET-NEXT:    movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) ## 16-byte Spill +; MACOS-NOSINCOS-STRET-NEXT:    callq _cosf +; MACOS-NOSINCOS-STRET-NEXT:    unpcklps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 ## 16-byte Folded Reload +; MACOS-NOSINCOS-STRET-NEXT:    ## xmm0 = xmm0[0],mem[0],xmm0[1],mem[1] +; MACOS-NOSINCOS-STRET-NEXT:    movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) ## 16-byte Spill +; MACOS-NOSINCOS-STRET-NEXT:    movaps (%rsp), %xmm0 ## 16-byte Reload +; MACOS-NOSINCOS-STRET-NEXT:    callq _cosf +; MACOS-NOSINCOS-STRET-NEXT:    movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) ## 16-byte Spill +; MACOS-NOSINCOS-STRET-NEXT:    movaps (%rsp), %xmm0 ## 16-byte Reload +; MACOS-NOSINCOS-STRET-NEXT:    shufps {{.*#+}} xmm0 = xmm0[1,1,1,1] +; MACOS-NOSINCOS-STRET-NEXT:    movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) ## 16-byte Spill +; MACOS-NOSINCOS-STRET-NEXT:    callq _cosf +; MACOS-NOSINCOS-STRET-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 ## 16-byte Reload +; MACOS-NOSINCOS-STRET-NEXT:    unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] +; MACOS-NOSINCOS-STRET-NEXT:    unpcklpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 ## 16-byte Folded Reload +; MACOS-NOSINCOS-STRET-NEXT:    ## xmm1 = xmm1[0],mem[0] +; MACOS-NOSINCOS-STRET-NEXT:    movaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) ## 16-byte Spill +; MACOS-NOSINCOS-STRET-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 ## 16-byte Reload +; MACOS-NOSINCOS-STRET-NEXT:    callq _sinf +; MACOS-NOSINCOS-STRET-NEXT:    movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) ## 16-byte Spill +; MACOS-NOSINCOS-STRET-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 ## 16-byte Reload +; MACOS-NOSINCOS-STRET-NEXT:    callq _sinf +; MACOS-NOSINCOS-STRET-NEXT:    unpcklps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 ## 16-byte Folded Reload +; MACOS-NOSINCOS-STRET-NEXT:    ## xmm0 = xmm0[0],mem[0],xmm0[1],mem[1] +; MACOS-NOSINCOS-STRET-NEXT:    movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) ## 16-byte Spill +; MACOS-NOSINCOS-STRET-NEXT:    movaps (%rsp), %xmm0 ## 16-byte Reload +; MACOS-NOSINCOS-STRET-NEXT:    callq _sinf +; MACOS-NOSINCOS-STRET-NEXT:    movaps %xmm0, (%rsp) ## 16-byte Spill +; MACOS-NOSINCOS-STRET-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 ## 16-byte Reload +; MACOS-NOSINCOS-STRET-NEXT:    callq _sinf +; MACOS-NOSINCOS-STRET-NEXT:    movaps (%rsp), %xmm1 ## 16-byte Reload +; MACOS-NOSINCOS-STRET-NEXT:    unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] +; MACOS-NOSINCOS-STRET-NEXT:    unpcklpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 ## 16-byte Folded Reload +; MACOS-NOSINCOS-STRET-NEXT:    ## xmm1 = xmm1[0],mem[0] +; MACOS-NOSINCOS-STRET-NEXT:    movups %xmm1, (%r14) +; MACOS-NOSINCOS-STRET-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 ## 16-byte Reload +; MACOS-NOSINCOS-STRET-NEXT:    movups %xmm0, (%rbx) +; MACOS-NOSINCOS-STRET-NEXT:    addq $104, %rsp +; MACOS-NOSINCOS-STRET-NEXT:    popq %rbx +; MACOS-NOSINCOS-STRET-NEXT:    popq %r14 +; MACOS-NOSINCOS-STRET-NEXT:    retq    %result = call { <4 x float>, <4 x float> } @llvm.sincos.v4f32(<4 x float> %x)    %result.0 = extractvalue { <4 x float>, <4 x float> } %result, 0    %result.1 = extractvalue { <4 x float>, <4 x float> } %result, 1 @@ -63,36 +217,120 @@ define void @test_sincos_v4f32(<4 x float> %x, ptr noalias %out_sin, ptr noalias  }  define void @test_sincos_v2f64(<2 x double> %x, ptr noalias %out_sin, ptr noalias %out_cos) nounwind { -; CHECK-LABEL: test_sincos_v2f64: -; CHECK:       # %bb.0: -; CHECK-NEXT:    pushl %edi -; CHECK-NEXT:    pushl %esi -; CHECK-NEXT:    subl $52, %esp -; CHECK-NEXT:    movl 84(%esp), %esi -; CHECK-NEXT:    fldl 72(%esp) -; CHECK-NEXT:    fstpl {{[-0-9]+}}(%e{{[sb]}}p) # 8-byte Folded Spill -; CHECK-NEXT:    fldl 64(%esp) -; CHECK-NEXT:    movl 80(%esp), %edi -; CHECK-NEXT:    leal 24(%esp), %eax -; CHECK-NEXT:    movl %eax, 12(%esp) -; CHECK-NEXT:    movl %edi, 8(%esp) -; CHECK-NEXT:    fstpl (%esp) -; CHECK-NEXT:    calll sincos -; CHECK-NEXT:    leal 32(%esp), %eax -; CHECK-NEXT:    movl %eax, 12(%esp) -; CHECK-NEXT:    addl $8, %edi -; CHECK-NEXT:    movl %edi, 8(%esp) -; CHECK-NEXT:    fldl {{[-0-9]+}}(%e{{[sb]}}p) # 8-byte Folded Reload -; CHECK-NEXT:    fstpl (%esp) -; CHECK-NEXT:    calll sincos -; CHECK-NEXT:    fldl 24(%esp) -; CHECK-NEXT:    fldl 32(%esp) -; CHECK-NEXT:    fstpl 8(%esi) -; CHECK-NEXT:    fstpl (%esi) -; CHECK-NEXT:    addl $52, %esp -; CHECK-NEXT:    popl %esi -; CHECK-NEXT:    popl %edi -; CHECK-NEXT:    retl +; X86-LABEL: test_sincos_v2f64: +; X86:       # %bb.0: +; X86-NEXT:    pushl %edi +; X86-NEXT:    pushl %esi +; X86-NEXT:    subl $52, %esp +; X86-NEXT:    movl 84(%esp), %esi +; X86-NEXT:    fldl 72(%esp) +; X86-NEXT:    fstpl {{[-0-9]+}}(%e{{[sb]}}p) # 8-byte Folded Spill +; X86-NEXT:    fldl 64(%esp) +; X86-NEXT:    movl 80(%esp), %edi +; X86-NEXT:    leal 24(%esp), %eax +; X86-NEXT:    movl %eax, 12(%esp) +; X86-NEXT:    movl %edi, 8(%esp) +; X86-NEXT:    fstpl (%esp) +; X86-NEXT:    calll sincos +; X86-NEXT:    leal 32(%esp), %eax +; X86-NEXT:    movl %eax, 12(%esp) +; X86-NEXT:    addl $8, %edi +; X86-NEXT:    movl %edi, 8(%esp) +; X86-NEXT:    fldl {{[-0-9]+}}(%e{{[sb]}}p) # 8-byte Folded Reload +; X86-NEXT:    fstpl (%esp) +; X86-NEXT:    calll sincos +; X86-NEXT:    fldl 24(%esp) +; X86-NEXT:    fldl 32(%esp) +; X86-NEXT:    fstpl 8(%esi) +; X86-NEXT:    fstpl (%esi) +; X86-NEXT:    addl $52, %esp +; X86-NEXT:    popl %esi +; X86-NEXT:    popl %edi +; X86-NEXT:    retl +; +; X64-LABEL: test_sincos_v2f64: +; X64:       # %bb.0: +; X64-NEXT:    pushq %r14 +; X64-NEXT:    pushq %rbx +; X64-NEXT:    subq $56, %rsp +; X64-NEXT:    movq %rsi, %rbx +; X64-NEXT:    movq %rdi, %r14 +; X64-NEXT:    movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; X64-NEXT:    leaq 24(%rsp), %rdi +; X64-NEXT:    leaq 16(%rsp), %rsi +; X64-NEXT:    callq sincos@PLT +; X64-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload +; X64-NEXT:    movhlps {{.*#+}} xmm0 = xmm0[1,1] +; X64-NEXT:    leaq 8(%rsp), %rdi +; X64-NEXT:    movq %rsp, %rsi +; X64-NEXT:    callq sincos@PLT +; X64-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero +; X64-NEXT:    movhps {{.*#+}} xmm0 = xmm0[0,1],mem[0,1] +; X64-NEXT:    movsd {{.*#+}} xmm1 = mem[0],zero +; X64-NEXT:    movhps {{.*#+}} xmm1 = xmm1[0,1],mem[0,1] +; X64-NEXT:    movups %xmm1, (%r14) +; X64-NEXT:    movups %xmm0, (%rbx) +; X64-NEXT:    addq $56, %rsp +; X64-NEXT:    popq %rbx +; X64-NEXT:    popq %r14 +; X64-NEXT:    retq +; +; MACOS-SINCOS-STRET-LABEL: test_sincos_v2f64: +; MACOS-SINCOS-STRET:       ## %bb.0: +; MACOS-SINCOS-STRET-NEXT:    pushq %r14 +; MACOS-SINCOS-STRET-NEXT:    pushq %rbx +; MACOS-SINCOS-STRET-NEXT:    subq $56, %rsp +; MACOS-SINCOS-STRET-NEXT:    movq %rsi, %rbx +; MACOS-SINCOS-STRET-NEXT:    movq %rdi, %r14 +; MACOS-SINCOS-STRET-NEXT:    movaps %xmm0, (%rsp) ## 16-byte Spill +; MACOS-SINCOS-STRET-NEXT:    callq ___sincos_stret +; MACOS-SINCOS-STRET-NEXT:    movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) ## 16-byte Spill +; MACOS-SINCOS-STRET-NEXT:    movaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) ## 16-byte Spill +; MACOS-SINCOS-STRET-NEXT:    movaps (%rsp), %xmm0 ## 16-byte Reload +; MACOS-SINCOS-STRET-NEXT:    movhlps {{.*#+}} xmm0 = xmm0[1,1] +; MACOS-SINCOS-STRET-NEXT:    callq ___sincos_stret +; MACOS-SINCOS-STRET-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm2 ## 16-byte Reload +; MACOS-SINCOS-STRET-NEXT:    movlhps {{.*#+}} xmm2 = xmm2[0],xmm1[0] +; MACOS-SINCOS-STRET-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 ## 16-byte Reload +; MACOS-SINCOS-STRET-NEXT:    movlhps {{.*#+}} xmm1 = xmm1[0],xmm0[0] +; MACOS-SINCOS-STRET-NEXT:    movups %xmm1, (%r14) +; MACOS-SINCOS-STRET-NEXT:    movups %xmm2, (%rbx) +; MACOS-SINCOS-STRET-NEXT:    addq $56, %rsp +; MACOS-SINCOS-STRET-NEXT:    popq %rbx +; MACOS-SINCOS-STRET-NEXT:    popq %r14 +; MACOS-SINCOS-STRET-NEXT:    retq +; +; MACOS-NOSINCOS-STRET-LABEL: test_sincos_v2f64: +; MACOS-NOSINCOS-STRET:       ## %bb.0: +; MACOS-NOSINCOS-STRET-NEXT:    pushq %r14 +; MACOS-NOSINCOS-STRET-NEXT:    pushq %rbx +; MACOS-NOSINCOS-STRET-NEXT:    subq $56, %rsp +; MACOS-NOSINCOS-STRET-NEXT:    movq %rsi, %rbx +; MACOS-NOSINCOS-STRET-NEXT:    movq %rdi, %r14 +; MACOS-NOSINCOS-STRET-NEXT:    movaps %xmm0, (%rsp) ## 16-byte Spill +; MACOS-NOSINCOS-STRET-NEXT:    callq _cos +; MACOS-NOSINCOS-STRET-NEXT:    movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) ## 16-byte Spill +; MACOS-NOSINCOS-STRET-NEXT:    movaps (%rsp), %xmm0 ## 16-byte Reload +; MACOS-NOSINCOS-STRET-NEXT:    movhlps {{.*#+}} xmm0 = xmm0[1,1] +; MACOS-NOSINCOS-STRET-NEXT:    movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) ## 16-byte Spill +; MACOS-NOSINCOS-STRET-NEXT:    callq _cos +; MACOS-NOSINCOS-STRET-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 ## 16-byte Reload +; MACOS-NOSINCOS-STRET-NEXT:    movlhps {{.*#+}} xmm1 = xmm1[0],xmm0[0] +; MACOS-NOSINCOS-STRET-NEXT:    movaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) ## 16-byte Spill +; MACOS-NOSINCOS-STRET-NEXT:    movaps (%rsp), %xmm0 ## 16-byte Reload +; MACOS-NOSINCOS-STRET-NEXT:    callq _sin +; MACOS-NOSINCOS-STRET-NEXT:    movaps %xmm0, (%rsp) ## 16-byte Spill +; MACOS-NOSINCOS-STRET-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 ## 16-byte Reload +; MACOS-NOSINCOS-STRET-NEXT:    callq _sin +; MACOS-NOSINCOS-STRET-NEXT:    movaps (%rsp), %xmm1 ## 16-byte Reload +; MACOS-NOSINCOS-STRET-NEXT:    movlhps {{.*#+}} xmm1 = xmm1[0],xmm0[0] +; MACOS-NOSINCOS-STRET-NEXT:    movups %xmm1, (%r14) +; MACOS-NOSINCOS-STRET-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 ## 16-byte Reload +; MACOS-NOSINCOS-STRET-NEXT:    movups %xmm0, (%rbx) +; MACOS-NOSINCOS-STRET-NEXT:    addq $56, %rsp +; MACOS-NOSINCOS-STRET-NEXT:    popq %rbx +; MACOS-NOSINCOS-STRET-NEXT:    popq %r14 +; MACOS-NOSINCOS-STRET-NEXT:    retq    %result = call { <2 x double>, <2 x double> } @llvm.sincos.v2f64(<2 x double> %x)    %result.0 = extractvalue { <2 x double>, <2 x double> } %result, 0    %result.1 = extractvalue { <2 x double>, <2 x double> } %result, 1 diff --git a/llvm/test/DebugInfo/debug-bool-const-value.ll b/llvm/test/DebugInfo/debug-bool-const-value.ll new file mode 100644 index 0000000..84cf993 --- /dev/null +++ b/llvm/test/DebugInfo/debug-bool-const-value.ll @@ -0,0 +1,29 @@ +; REQUIRES: object-emission +; RUN: %llc_dwarf %s -filetype=obj -o - | llvm-dwarfdump - | FileCheck %s + +; CHECK: {{.*}}DW_TAG_variable +; CHECK-NEXT: {{.*}} DW_AT_const_value     (1) +; CHECK-NEXT: {{.*}} DW_AT_name    ("arg") + +define void @test() !dbg !5 +{ +entry: +  call void @"llvm.dbg.value"(metadata i1 true, metadata !7, metadata !8), !dbg !6 +  ret void, !dbg !6 +} + +declare void @"llvm.dbg.value"(metadata %".1", metadata %".2", metadata %".3") + +!llvm.dbg.cu = !{ !2 } +!llvm.module.flags = !{ !9, !10 } + +!1 = !DIFile(directory: "", filename: "test") +!2 = distinct !DICompileUnit(emissionKind: FullDebug, file: !1, isOptimized: false, language: DW_LANG_C_plus_plus, runtimeVersion: 0) +!3 = !DIBasicType(encoding: DW_ATE_boolean, name: "bool", size: 8) +!4 = !DISubroutineType(types: !{null}) +!5 = distinct !DISubprogram(file: !1, isDefinition: true, isLocal: false, isOptimized: false, line: 5, linkageName: "test", name: "test", scope: !1, scopeLine: 5, type: !4, unit: !2) +!6 = !DILocation(column: 1, line: 5, scope: !5) +!7 = !DILocalVariable(arg: 0, file: !1, line: 5, name: "arg", scope: !5, type: !3) +!8 = !DIExpression() +!9 = !{ i32 2, !"Dwarf Version", i32 4 } +!10 = !{ i32 2, !"Debug Info Version", i32 3 } diff --git a/llvm/test/MC/Disassembler/PowerPC/ppc-encoding-ISAFuture.txt b/llvm/test/MC/Disassembler/PowerPC/ppc-encoding-ISAFuture.txt index f5cb4b7..2661ed5 100644 --- a/llvm/test/MC/Disassembler/PowerPC/ppc-encoding-ISAFuture.txt +++ b/llvm/test/MC/Disassembler/PowerPC/ppc-encoding-ISAFuture.txt @@ -82,12 +82,18 @@  #CHECK: lxvprll 6, 2, 1  0x7c 0xc2 0x0c 0xda +#CHECK: lxvpb32x 2, 15, 16 +0x7c,0x4f,0x86,0xda +  #CHECK: stxvprl 0, 1, 2  0x7c 0x01 0x15 0x9a  #CHECK: stxvprll 6, 0, 1  0x7c 0xc0 0x0d 0xda +#CHECK: stxvpb32x 2, 15, 16 +0x7c,0x4f,0x87,0xda +  #CHECK: dmxvi8gerx4 1, 2, 4  0xec,0x82,0x20,0x58 diff --git a/llvm/test/MC/Disassembler/PowerPC/ppc64le-encoding-ISAFuture.txt b/llvm/test/MC/Disassembler/PowerPC/ppc64le-encoding-ISAFuture.txt index f0df8ce..7fb8254 100644 --- a/llvm/test/MC/Disassembler/PowerPC/ppc64le-encoding-ISAFuture.txt +++ b/llvm/test/MC/Disassembler/PowerPC/ppc64le-encoding-ISAFuture.txt @@ -76,12 +76,18 @@  #CHECK: lxvprll 6, 2, 1  0xda 0x0c 0xc2 0x7c +#CHECK: lxvpb32x 2, 15, 16 +0xda,0x86,0x4f,0x7c +  #CHECK: stxvprl 0, 1, 2  0x9a 0x15 0x01 0x7c  #CHECK: stxvprll 6, 0, 1  0xda 0x0d 0xc0 0x7c +#CHECK: stxvpb32x 2, 15, 16 +0xda,0x87,0x4f,0x7c +  #CHECK: dmxvi8gerx4 1, 2, 4  0x58,0x20,0x82,0xec diff --git a/llvm/test/MC/PowerPC/ppc-encoding-ISAFuture.s b/llvm/test/MC/PowerPC/ppc-encoding-ISAFuture.s index bc0683e..40059c4 100644 --- a/llvm/test/MC/PowerPC/ppc-encoding-ISAFuture.s +++ b/llvm/test/MC/PowerPC/ppc-encoding-ISAFuture.s @@ -105,6 +105,10 @@  # CHECK-LE: lxvprll 6, 2, 1               # encoding: [0xda,0x0c,0xc2,0x7c]              lxvprll 6, 2, 1 +           lxvpb32x 2, 15, 16 +#CHECK-BE: lxvpb32x 2, 15, 16            # encoding: [0x7c,0x4f,0x86,0xda] +#CHECK-LE: lxvpb32x 2, 15, 16            # encoding: [0xda,0x86,0x4f,0x7c] +  # CHECK-BE: stxvprl 0, 1, 2               # encoding: [0x7c,0x01,0x15,0x9a]  # CHECK-LE: stxvprl 0, 1, 2               # encoding: [0x9a,0x15,0x01,0x7c]              stxvprl 0, 1, 2 @@ -113,6 +117,10 @@  # CHECK-LE: stxvprll 6, 0, 1              # encoding: [0xda,0x0d,0xc0,0x7c]              stxvprll 6, 0, 1 +           stxvpb32x 2, 15, 16 +#CHECK-BE: stxvpb32x 2, 15, 16            # encoding: [0x7c,0x4f,0x87,0xda] +#CHECK-LE: stxvpb32x 2, 15, 16            # encoding: [0xda,0x87,0x4f,0x7c] +              dmxvi8gerx4 1, 2, 4  # CHECK-BE: dmxvi8gerx4 1, 2, 4                     # encoding: [0xec,0x82,0x20,0x58]  # CHECK-LE: dmxvi8gerx4 1, 2, 4                     # encoding: [0x58,0x20,0x82,0xec] diff --git a/llvm/test/Other/new-pm-defaults.ll b/llvm/test/Other/new-pm-defaults.ll index 65b96c8..62975a3 100644 --- a/llvm/test/Other/new-pm-defaults.ll +++ b/llvm/test/Other/new-pm-defaults.ll @@ -208,6 +208,7 @@  ; CHECK-O-NEXT: Running analysis: DemandedBitsAnalysis  ; CHECK-O-NEXT: Running pass: InstCombinePass  ; CHECK-EP-PEEPHOLE-NEXT: Running pass: NoOpFunctionPass +; CHECK-O23SZ-NEXT: Running pass: DFAJumpThreadingPass  ; CHECK-O23SZ-NEXT: Running pass: JumpThreadingPass  ; CHECK-O23SZ-NEXT: Running analysis: LazyValueAnalysis  ; CHECK-O23SZ-NEXT: Running pass: CorrelatedValuePropagationPass diff --git a/llvm/test/Other/new-pm-thinlto-postlink-defaults.ll b/llvm/test/Other/new-pm-thinlto-postlink-defaults.ll index 3a0fffe..012a1ab 100644 --- a/llvm/test/Other/new-pm-thinlto-postlink-defaults.ll +++ b/llvm/test/Other/new-pm-thinlto-postlink-defaults.ll @@ -133,6 +133,7 @@  ; CHECK-O-NEXT: Running pass: BDCEPass  ; CHECK-O-NEXT: Running analysis: DemandedBitsAnalysis  ; CHECK-O-NEXT: Running pass: InstCombinePass +; CHECK-O23SZ-NEXT: Running pass: DFAJumpThreadingPass  ; CHECK-O23SZ-NEXT: Running pass: JumpThreadingPass  ; CHECK-O23SZ-NEXT: Running analysis: LazyValueAnalysis  ; CHECK-O23SZ-NEXT: Running pass: CorrelatedValuePropagationPass diff --git a/llvm/test/Other/new-pm-thinlto-postlink-pgo-defaults.ll b/llvm/test/Other/new-pm-thinlto-postlink-pgo-defaults.ll index 4623edc..e021ff3 100644 --- a/llvm/test/Other/new-pm-thinlto-postlink-pgo-defaults.ll +++ b/llvm/test/Other/new-pm-thinlto-postlink-pgo-defaults.ll @@ -118,6 +118,7 @@  ; CHECK-O-NEXT: Running pass: BDCEPass  ; CHECK-O-NEXT: Running analysis: DemandedBitsAnalysis  ; CHECK-O-NEXT: Running pass: InstCombinePass +; CHECK-O23SZ-NEXT: Running pass: DFAJumpThreadingPass  ; CHECK-O23SZ-NEXT: Running pass: JumpThreadingPass  ; CHECK-O23SZ-NEXT: Running analysis: LazyValueAnalysis  ; CHECK-O23SZ-NEXT: Running pass: CorrelatedValuePropagationPass diff --git a/llvm/test/Other/new-pm-thinlto-postlink-samplepgo-defaults.ll b/llvm/test/Other/new-pm-thinlto-postlink-samplepgo-defaults.ll index 590afd9..20f94bc 100644 --- a/llvm/test/Other/new-pm-thinlto-postlink-samplepgo-defaults.ll +++ b/llvm/test/Other/new-pm-thinlto-postlink-samplepgo-defaults.ll @@ -127,6 +127,7 @@  ; CHECK-O-NEXT: Running pass: BDCEPass  ; CHECK-O-NEXT: Running analysis: DemandedBitsAnalysis  ; CHECK-O-NEXT: Running pass: InstCombinePass +; CHECK-O23SZ-NEXT: Running pass: DFAJumpThreadingPass  ; CHECK-O23SZ-NEXT: Running pass: JumpThreadingPass  ; CHECK-O23SZ-NEXT: Running analysis: LazyValueAnalysis  ; CHECK-O23SZ-NEXT: Running pass: CorrelatedValuePropagationPass diff --git a/llvm/test/Other/new-pm-thinlto-prelink-defaults.ll b/llvm/test/Other/new-pm-thinlto-prelink-defaults.ll index dd6acd2..b61edc8 100644 --- a/llvm/test/Other/new-pm-thinlto-prelink-defaults.ll +++ b/llvm/test/Other/new-pm-thinlto-prelink-defaults.ll @@ -165,6 +165,7 @@  ; CHECK-O-NEXT: Running pass: BDCEPass  ; CHECK-O-NEXT: Running analysis: DemandedBitsAnalysis  ; CHECK-O-NEXT: Running pass: InstCombinePass +; CHECK-O23SZ-NEXT: Running pass: DFAJumpThreadingPass  ; CHECK-O23SZ-NEXT: Running pass: JumpThreadingPass  ; CHECK-O23SZ-NEXT: Running analysis: LazyValueAnalysis  ; CHECK-O23SZ-NEXT: Running pass: CorrelatedValuePropagationPass diff --git a/llvm/test/Other/new-pm-thinlto-prelink-pgo-defaults.ll b/llvm/test/Other/new-pm-thinlto-prelink-pgo-defaults.ll index ee05452..acf8c05 100644 --- a/llvm/test/Other/new-pm-thinlto-prelink-pgo-defaults.ll +++ b/llvm/test/Other/new-pm-thinlto-prelink-pgo-defaults.ll @@ -167,6 +167,7 @@  ; CHECK-O-NEXT: Running pass: BDCEPass  ; CHECK-O-NEXT: Running analysis: DemandedBitsAnalysis  ; CHECK-O-NEXT: Running pass: InstCombinePass +; CHECK-O23SZ-NEXT: Running pass: DFAJumpThreadingPass  ; CHECK-O23SZ-NEXT: Running pass: JumpThreadingPass  ; CHECK-O23SZ-NEXT: Running analysis: LazyValueAnalysis  ; CHECK-O23SZ-NEXT: Running pass: CorrelatedValuePropagationPass diff --git a/llvm/test/Other/new-pm-thinlto-prelink-samplepgo-defaults.ll b/llvm/test/Other/new-pm-thinlto-prelink-samplepgo-defaults.ll index fd95e94..6b3c5ca 100644 --- a/llvm/test/Other/new-pm-thinlto-prelink-samplepgo-defaults.ll +++ b/llvm/test/Other/new-pm-thinlto-prelink-samplepgo-defaults.ll @@ -131,6 +131,7 @@  ; CHECK-O-NEXT: Running pass: BDCEPass  ; CHECK-O-NEXT: Running analysis: DemandedBitsAnalysis  ; CHECK-O-NEXT: Running pass: InstCombinePass +; CHECK-O23SZ-NEXT: Running pass: DFAJumpThreadingPass  ; CHECK-O23SZ-NEXT: Running pass: JumpThreadingPass  ; CHECK-O23SZ-NEXT: Running analysis: LazyValueAnalysis  ; CHECK-O23SZ-NEXT: Running pass: CorrelatedValuePropagationPass diff --git a/llvm/test/Transforms/Attributor/ArgumentPromotion/X86/min-legal-vector-width.ll b/llvm/test/Transforms/Attributor/ArgumentPromotion/X86/min-legal-vector-width.ll index 649e946..fffe50f 100644 --- a/llvm/test/Transforms/Attributor/ArgumentPromotion/X86/min-legal-vector-width.ll +++ b/llvm/test/Transforms/Attributor/ArgumentPromotion/X86/min-legal-vector-width.ll @@ -9,15 +9,25 @@ target triple = "x86_64-unknown-linux-gnu"  ; This should promote  define internal fastcc void @callee_avx512_legal512_prefer512_call_avx512_legal512_prefer512(ptr %arg, ptr readonly %arg1) #0 {  ; -; CHECK: Function Attrs: inlinehint mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: readwrite) uwtable -; CHECK-LABEL: define {{[^@]+}}@callee_avx512_legal512_prefer512_call_avx512_legal512_prefer512 -; CHECK-SAME: (ptr noalias nofree noundef nonnull writeonly align 64 captures(none) dereferenceable(64) [[ARG:%.*]], <8 x i64> [[TMP0:%.*]]) #[[ATTR0:[0-9]+]] { -; CHECK-NEXT:  bb: -; CHECK-NEXT:    [[ARG1_PRIV:%.*]] = alloca <8 x i64>, align 64 -; CHECK-NEXT:    store <8 x i64> [[TMP0]], ptr [[ARG1_PRIV]], align 64 -; CHECK-NEXT:    [[TMP:%.*]] = load <8 x i64>, ptr [[ARG1_PRIV]], align 64 -; CHECK-NEXT:    store <8 x i64> [[TMP]], ptr [[ARG]], align 64 -; CHECK-NEXT:    ret void +; TUNIT: Function Attrs: inlinehint mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: readwrite) uwtable +; TUNIT-LABEL: define {{[^@]+}}@callee_avx512_legal512_prefer512_call_avx512_legal512_prefer512 +; TUNIT-SAME: (ptr noalias nofree noundef nonnull writeonly align 64 captures(none) dereferenceable(64) [[ARG:%.*]], <8 x i64> [[TMP0:%.*]]) #[[ATTR0:[0-9]+]] { +; TUNIT-NEXT:  bb: +; TUNIT-NEXT:    [[ARG1_PRIV:%.*]] = alloca <8 x i64>, align 64 +; TUNIT-NEXT:    store <8 x i64> [[TMP0]], ptr [[ARG1_PRIV]], align 64 +; TUNIT-NEXT:    [[TMP:%.*]] = load <8 x i64>, ptr [[ARG1_PRIV]], align 64 +; TUNIT-NEXT:    store <8 x i64> [[TMP]], ptr [[ARG]], align 64 +; TUNIT-NEXT:    ret void +; +; CGSCC: Function Attrs: inlinehint mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: readwrite) uwtable +; CGSCC-LABEL: define {{[^@]+}}@callee_avx512_legal512_prefer512_call_avx512_legal512_prefer512 +; CGSCC-SAME: (ptr noalias nofree noundef nonnull writeonly align 64 captures(none) dereferenceable(64) [[ARG:%.*]], <8 x i64> [[TMP0:%.*]]) #[[ATTR0:[0-9]+]] { +; CGSCC-NEXT:  bb: +; CGSCC-NEXT:    [[ARG1_PRIV:%.*]] = alloca <8 x i64>, align 64 +; CGSCC-NEXT:    store <8 x i64> [[TMP0]], ptr [[ARG1_PRIV]], align 64 +; CGSCC-NEXT:    [[TMP:%.*]] = load <8 x i64>, ptr [[ARG1_PRIV]], align 64, !invariant.load [[META0:![0-9]+]] +; CGSCC-NEXT:    store <8 x i64> [[TMP]], ptr [[ARG]], align 64 +; CGSCC-NEXT:    ret void  ;  bb:    %tmp = load <8 x i64>, ptr %arg1 @@ -66,15 +76,25 @@ bb:  ; This should promote  define internal fastcc void @callee_avx512_legal512_prefer256_call_avx512_legal512_prefer256(ptr %arg, ptr readonly %arg1) #1 {  ; -; CHECK: Function Attrs: inlinehint mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: readwrite) uwtable -; CHECK-LABEL: define {{[^@]+}}@callee_avx512_legal512_prefer256_call_avx512_legal512_prefer256 -; CHECK-SAME: (ptr noalias nofree noundef nonnull writeonly align 64 captures(none) dereferenceable(64) [[ARG:%.*]], <8 x i64> [[TMP0:%.*]]) #[[ATTR1:[0-9]+]] { -; CHECK-NEXT:  bb: -; CHECK-NEXT:    [[ARG1_PRIV:%.*]] = alloca <8 x i64>, align 64 -; CHECK-NEXT:    store <8 x i64> [[TMP0]], ptr [[ARG1_PRIV]], align 64 -; CHECK-NEXT:    [[TMP:%.*]] = load <8 x i64>, ptr [[ARG1_PRIV]], align 64 -; CHECK-NEXT:    store <8 x i64> [[TMP]], ptr [[ARG]], align 64 -; CHECK-NEXT:    ret void +; TUNIT: Function Attrs: inlinehint mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: readwrite) uwtable +; TUNIT-LABEL: define {{[^@]+}}@callee_avx512_legal512_prefer256_call_avx512_legal512_prefer256 +; TUNIT-SAME: (ptr noalias nofree noundef nonnull writeonly align 64 captures(none) dereferenceable(64) [[ARG:%.*]], <8 x i64> [[TMP0:%.*]]) #[[ATTR1:[0-9]+]] { +; TUNIT-NEXT:  bb: +; TUNIT-NEXT:    [[ARG1_PRIV:%.*]] = alloca <8 x i64>, align 64 +; TUNIT-NEXT:    store <8 x i64> [[TMP0]], ptr [[ARG1_PRIV]], align 64 +; TUNIT-NEXT:    [[TMP:%.*]] = load <8 x i64>, ptr [[ARG1_PRIV]], align 64 +; TUNIT-NEXT:    store <8 x i64> [[TMP]], ptr [[ARG]], align 64 +; TUNIT-NEXT:    ret void +; +; CGSCC: Function Attrs: inlinehint mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: readwrite) uwtable +; CGSCC-LABEL: define {{[^@]+}}@callee_avx512_legal512_prefer256_call_avx512_legal512_prefer256 +; CGSCC-SAME: (ptr noalias nofree noundef nonnull writeonly align 64 captures(none) dereferenceable(64) [[ARG:%.*]], <8 x i64> [[TMP0:%.*]]) #[[ATTR1:[0-9]+]] { +; CGSCC-NEXT:  bb: +; CGSCC-NEXT:    [[ARG1_PRIV:%.*]] = alloca <8 x i64>, align 64 +; CGSCC-NEXT:    store <8 x i64> [[TMP0]], ptr [[ARG1_PRIV]], align 64 +; CGSCC-NEXT:    [[TMP:%.*]] = load <8 x i64>, ptr [[ARG1_PRIV]], align 64, !invariant.load [[META0]] +; CGSCC-NEXT:    store <8 x i64> [[TMP]], ptr [[ARG]], align 64 +; CGSCC-NEXT:    ret void  ;  bb:    %tmp = load <8 x i64>, ptr %arg1 @@ -123,15 +143,25 @@ bb:  ; This should promote  define internal fastcc void @callee_avx512_legal512_prefer512_call_avx512_legal512_prefer256(ptr %arg, ptr readonly %arg1) #1 {  ; -; CHECK: Function Attrs: inlinehint mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: readwrite) uwtable -; CHECK-LABEL: define {{[^@]+}}@callee_avx512_legal512_prefer512_call_avx512_legal512_prefer256 -; CHECK-SAME: (ptr noalias nofree noundef nonnull writeonly align 64 captures(none) dereferenceable(64) [[ARG:%.*]], <8 x i64> [[TMP0:%.*]]) #[[ATTR1]] { -; CHECK-NEXT:  bb: -; CHECK-NEXT:    [[ARG1_PRIV:%.*]] = alloca <8 x i64>, align 64 -; CHECK-NEXT:    store <8 x i64> [[TMP0]], ptr [[ARG1_PRIV]], align 64 -; CHECK-NEXT:    [[TMP:%.*]] = load <8 x i64>, ptr [[ARG1_PRIV]], align 64 -; CHECK-NEXT:    store <8 x i64> [[TMP]], ptr [[ARG]], align 64 -; CHECK-NEXT:    ret void +; TUNIT: Function Attrs: inlinehint mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: readwrite) uwtable +; TUNIT-LABEL: define {{[^@]+}}@callee_avx512_legal512_prefer512_call_avx512_legal512_prefer256 +; TUNIT-SAME: (ptr noalias nofree noundef nonnull writeonly align 64 captures(none) dereferenceable(64) [[ARG:%.*]], <8 x i64> [[TMP0:%.*]]) #[[ATTR1]] { +; TUNIT-NEXT:  bb: +; TUNIT-NEXT:    [[ARG1_PRIV:%.*]] = alloca <8 x i64>, align 64 +; TUNIT-NEXT:    store <8 x i64> [[TMP0]], ptr [[ARG1_PRIV]], align 64 +; TUNIT-NEXT:    [[TMP:%.*]] = load <8 x i64>, ptr [[ARG1_PRIV]], align 64 +; TUNIT-NEXT:    store <8 x i64> [[TMP]], ptr [[ARG]], align 64 +; TUNIT-NEXT:    ret void +; +; CGSCC: Function Attrs: inlinehint mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: readwrite) uwtable +; CGSCC-LABEL: define {{[^@]+}}@callee_avx512_legal512_prefer512_call_avx512_legal512_prefer256 +; CGSCC-SAME: (ptr noalias nofree noundef nonnull writeonly align 64 captures(none) dereferenceable(64) [[ARG:%.*]], <8 x i64> [[TMP0:%.*]]) #[[ATTR1]] { +; CGSCC-NEXT:  bb: +; CGSCC-NEXT:    [[ARG1_PRIV:%.*]] = alloca <8 x i64>, align 64 +; CGSCC-NEXT:    store <8 x i64> [[TMP0]], ptr [[ARG1_PRIV]], align 64 +; CGSCC-NEXT:    [[TMP:%.*]] = load <8 x i64>, ptr [[ARG1_PRIV]], align 64, !invariant.load [[META0]] +; CGSCC-NEXT:    store <8 x i64> [[TMP]], ptr [[ARG]], align 64 +; CGSCC-NEXT:    ret void  ;  bb:    %tmp = load <8 x i64>, ptr %arg1 @@ -180,15 +210,25 @@ bb:  ; This should promote  define internal fastcc void @callee_avx512_legal512_prefer256_call_avx512_legal512_prefer512(ptr %arg, ptr readonly %arg1) #0 {  ; -; CHECK: Function Attrs: inlinehint mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: readwrite) uwtable -; CHECK-LABEL: define {{[^@]+}}@callee_avx512_legal512_prefer256_call_avx512_legal512_prefer512 -; CHECK-SAME: (ptr noalias nofree noundef nonnull writeonly align 64 captures(none) dereferenceable(64) [[ARG:%.*]], <8 x i64> [[TMP0:%.*]]) #[[ATTR0]] { -; CHECK-NEXT:  bb: -; CHECK-NEXT:    [[ARG1_PRIV:%.*]] = alloca <8 x i64>, align 64 -; CHECK-NEXT:    store <8 x i64> [[TMP0]], ptr [[ARG1_PRIV]], align 64 -; CHECK-NEXT:    [[TMP:%.*]] = load <8 x i64>, ptr [[ARG1_PRIV]], align 64 -; CHECK-NEXT:    store <8 x i64> [[TMP]], ptr [[ARG]], align 64 -; CHECK-NEXT:    ret void +; TUNIT: Function Attrs: inlinehint mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: readwrite) uwtable +; TUNIT-LABEL: define {{[^@]+}}@callee_avx512_legal512_prefer256_call_avx512_legal512_prefer512 +; TUNIT-SAME: (ptr noalias nofree noundef nonnull writeonly align 64 captures(none) dereferenceable(64) [[ARG:%.*]], <8 x i64> [[TMP0:%.*]]) #[[ATTR0]] { +; TUNIT-NEXT:  bb: +; TUNIT-NEXT:    [[ARG1_PRIV:%.*]] = alloca <8 x i64>, align 64 +; TUNIT-NEXT:    store <8 x i64> [[TMP0]], ptr [[ARG1_PRIV]], align 64 +; TUNIT-NEXT:    [[TMP:%.*]] = load <8 x i64>, ptr [[ARG1_PRIV]], align 64 +; TUNIT-NEXT:    store <8 x i64> [[TMP]], ptr [[ARG]], align 64 +; TUNIT-NEXT:    ret void +; +; CGSCC: Function Attrs: inlinehint mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: readwrite) uwtable +; CGSCC-LABEL: define {{[^@]+}}@callee_avx512_legal512_prefer256_call_avx512_legal512_prefer512 +; CGSCC-SAME: (ptr noalias nofree noundef nonnull writeonly align 64 captures(none) dereferenceable(64) [[ARG:%.*]], <8 x i64> [[TMP0:%.*]]) #[[ATTR0]] { +; CGSCC-NEXT:  bb: +; CGSCC-NEXT:    [[ARG1_PRIV:%.*]] = alloca <8 x i64>, align 64 +; CGSCC-NEXT:    store <8 x i64> [[TMP0]], ptr [[ARG1_PRIV]], align 64 +; CGSCC-NEXT:    [[TMP:%.*]] = load <8 x i64>, ptr [[ARG1_PRIV]], align 64, !invariant.load [[META0]] +; CGSCC-NEXT:    store <8 x i64> [[TMP]], ptr [[ARG]], align 64 +; CGSCC-NEXT:    ret void  ;  bb:    %tmp = load <8 x i64>, ptr %arg1 @@ -237,13 +277,21 @@ bb:  ; This should not promote  define internal fastcc void @callee_avx512_legal256_prefer256_call_avx512_legal512_prefer256(ptr %arg, ptr readonly %arg1) #1 {  ; -; CHECK: Function Attrs: inlinehint mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: readwrite) uwtable -; CHECK-LABEL: define {{[^@]+}}@callee_avx512_legal256_prefer256_call_avx512_legal512_prefer256 -; CHECK-SAME: (ptr noalias nofree noundef nonnull writeonly align 64 captures(none) dereferenceable(64) [[ARG:%.*]], ptr noalias nofree noundef nonnull readonly align 64 captures(none) dereferenceable(64) [[ARG1:%.*]]) #[[ATTR1]] { -; CHECK-NEXT:  bb: -; CHECK-NEXT:    [[TMP:%.*]] = load <8 x i64>, ptr [[ARG1]], align 64 -; CHECK-NEXT:    store <8 x i64> [[TMP]], ptr [[ARG]], align 64 -; CHECK-NEXT:    ret void +; TUNIT: Function Attrs: inlinehint mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: readwrite) uwtable +; TUNIT-LABEL: define {{[^@]+}}@callee_avx512_legal256_prefer256_call_avx512_legal512_prefer256 +; TUNIT-SAME: (ptr noalias nofree noundef nonnull writeonly align 64 captures(none) dereferenceable(64) [[ARG:%.*]], ptr noalias nofree noundef nonnull readonly align 64 captures(none) dereferenceable(64) [[ARG1:%.*]]) #[[ATTR1]] { +; TUNIT-NEXT:  bb: +; TUNIT-NEXT:    [[TMP:%.*]] = load <8 x i64>, ptr [[ARG1]], align 64 +; TUNIT-NEXT:    store <8 x i64> [[TMP]], ptr [[ARG]], align 64 +; TUNIT-NEXT:    ret void +; +; CGSCC: Function Attrs: inlinehint mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: readwrite) uwtable +; CGSCC-LABEL: define {{[^@]+}}@callee_avx512_legal256_prefer256_call_avx512_legal512_prefer256 +; CGSCC-SAME: (ptr noalias nofree noundef nonnull writeonly align 64 captures(none) dereferenceable(64) [[ARG:%.*]], ptr noalias nofree noundef nonnull readonly align 64 captures(none) dereferenceable(64) [[ARG1:%.*]]) #[[ATTR1]] { +; CGSCC-NEXT:  bb: +; CGSCC-NEXT:    [[TMP:%.*]] = load <8 x i64>, ptr [[ARG1]], align 64, !invariant.load [[META0]] +; CGSCC-NEXT:    store <8 x i64> [[TMP]], ptr [[ARG]], align 64 +; CGSCC-NEXT:    ret void  ;  bb:    %tmp = load <8 x i64>, ptr %arg1 @@ -290,13 +338,21 @@ bb:  ; This should not promote  define internal fastcc void @callee_avx512_legal512_prefer256_call_avx512_legal256_prefer256(ptr %arg, ptr readonly %arg1) #2 {  ; -; CHECK: Function Attrs: inlinehint mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: readwrite) uwtable -; CHECK-LABEL: define {{[^@]+}}@callee_avx512_legal512_prefer256_call_avx512_legal256_prefer256 -; CHECK-SAME: (ptr noalias nofree noundef nonnull writeonly align 64 captures(none) dereferenceable(64) [[ARG:%.*]], ptr noalias nofree noundef nonnull readonly align 64 captures(none) dereferenceable(64) [[ARG1:%.*]]) #[[ATTR2:[0-9]+]] { -; CHECK-NEXT:  bb: -; CHECK-NEXT:    [[TMP:%.*]] = load <8 x i64>, ptr [[ARG1]], align 64 -; CHECK-NEXT:    store <8 x i64> [[TMP]], ptr [[ARG]], align 64 -; CHECK-NEXT:    ret void +; TUNIT: Function Attrs: inlinehint mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: readwrite) uwtable +; TUNIT-LABEL: define {{[^@]+}}@callee_avx512_legal512_prefer256_call_avx512_legal256_prefer256 +; TUNIT-SAME: (ptr noalias nofree noundef nonnull writeonly align 64 captures(none) dereferenceable(64) [[ARG:%.*]], ptr noalias nofree noundef nonnull readonly align 64 captures(none) dereferenceable(64) [[ARG1:%.*]]) #[[ATTR2]] { +; TUNIT-NEXT:  bb: +; TUNIT-NEXT:    [[TMP:%.*]] = load <8 x i64>, ptr [[ARG1]], align 64 +; TUNIT-NEXT:    store <8 x i64> [[TMP]], ptr [[ARG]], align 64 +; TUNIT-NEXT:    ret void +; +; CGSCC: Function Attrs: inlinehint mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: readwrite) uwtable +; CGSCC-LABEL: define {{[^@]+}}@callee_avx512_legal512_prefer256_call_avx512_legal256_prefer256 +; CGSCC-SAME: (ptr noalias nofree noundef nonnull writeonly align 64 captures(none) dereferenceable(64) [[ARG:%.*]], ptr noalias nofree noundef nonnull readonly align 64 captures(none) dereferenceable(64) [[ARG1:%.*]]) #[[ATTR2]] { +; CGSCC-NEXT:  bb: +; CGSCC-NEXT:    [[TMP:%.*]] = load <8 x i64>, ptr [[ARG1]], align 64, !invariant.load [[META0]] +; CGSCC-NEXT:    store <8 x i64> [[TMP]], ptr [[ARG]], align 64 +; CGSCC-NEXT:    ret void  ;  bb:    %tmp = load <8 x i64>, ptr %arg1 @@ -343,15 +399,25 @@ bb:  ; This should promote  define internal fastcc void @callee_avx2_legal256_prefer256_call_avx2_legal512_prefer256(ptr %arg, ptr readonly %arg1) #3 {  ; -; CHECK: Function Attrs: inlinehint mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: readwrite) uwtable -; CHECK-LABEL: define {{[^@]+}}@callee_avx2_legal256_prefer256_call_avx2_legal512_prefer256 -; CHECK-SAME: (ptr noalias nofree noundef nonnull writeonly align 64 captures(none) dereferenceable(64) [[ARG:%.*]], <8 x i64> [[TMP0:%.*]]) #[[ATTR3:[0-9]+]] { -; CHECK-NEXT:  bb: -; CHECK-NEXT:    [[ARG1_PRIV:%.*]] = alloca <8 x i64>, align 64 -; CHECK-NEXT:    store <8 x i64> [[TMP0]], ptr [[ARG1_PRIV]], align 64 -; CHECK-NEXT:    [[TMP:%.*]] = load <8 x i64>, ptr [[ARG1_PRIV]], align 64 -; CHECK-NEXT:    store <8 x i64> [[TMP]], ptr [[ARG]], align 64 -; CHECK-NEXT:    ret void +; TUNIT: Function Attrs: inlinehint mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: readwrite) uwtable +; TUNIT-LABEL: define {{[^@]+}}@callee_avx2_legal256_prefer256_call_avx2_legal512_prefer256 +; TUNIT-SAME: (ptr noalias nofree noundef nonnull writeonly align 64 captures(none) dereferenceable(64) [[ARG:%.*]], <8 x i64> [[TMP0:%.*]]) #[[ATTR3:[0-9]+]] { +; TUNIT-NEXT:  bb: +; TUNIT-NEXT:    [[ARG1_PRIV:%.*]] = alloca <8 x i64>, align 64 +; TUNIT-NEXT:    store <8 x i64> [[TMP0]], ptr [[ARG1_PRIV]], align 64 +; TUNIT-NEXT:    [[TMP:%.*]] = load <8 x i64>, ptr [[ARG1_PRIV]], align 64 +; TUNIT-NEXT:    store <8 x i64> [[TMP]], ptr [[ARG]], align 64 +; TUNIT-NEXT:    ret void +; +; CGSCC: Function Attrs: inlinehint mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: readwrite) uwtable +; CGSCC-LABEL: define {{[^@]+}}@callee_avx2_legal256_prefer256_call_avx2_legal512_prefer256 +; CGSCC-SAME: (ptr noalias nofree noundef nonnull writeonly align 64 captures(none) dereferenceable(64) [[ARG:%.*]], <8 x i64> [[TMP0:%.*]]) #[[ATTR3:[0-9]+]] { +; CGSCC-NEXT:  bb: +; CGSCC-NEXT:    [[ARG1_PRIV:%.*]] = alloca <8 x i64>, align 64 +; CGSCC-NEXT:    store <8 x i64> [[TMP0]], ptr [[ARG1_PRIV]], align 64 +; CGSCC-NEXT:    [[TMP:%.*]] = load <8 x i64>, ptr [[ARG1_PRIV]], align 64, !invariant.load [[META0]] +; CGSCC-NEXT:    store <8 x i64> [[TMP]], ptr [[ARG]], align 64 +; CGSCC-NEXT:    ret void  ;  bb:    %tmp = load <8 x i64>, ptr %arg1 @@ -400,15 +466,25 @@ bb:  ; This should promote  define internal fastcc void @callee_avx2_legal512_prefer256_call_avx2_legal256_prefer256(ptr %arg, ptr readonly %arg1) #4 {  ; -; CHECK: Function Attrs: inlinehint mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: readwrite) uwtable -; CHECK-LABEL: define {{[^@]+}}@callee_avx2_legal512_prefer256_call_avx2_legal256_prefer256 -; CHECK-SAME: (ptr noalias nofree noundef nonnull writeonly align 64 captures(none) dereferenceable(64) [[ARG:%.*]], <8 x i64> [[TMP0:%.*]]) #[[ATTR3]] { -; CHECK-NEXT:  bb: -; CHECK-NEXT:    [[ARG1_PRIV:%.*]] = alloca <8 x i64>, align 64 -; CHECK-NEXT:    store <8 x i64> [[TMP0]], ptr [[ARG1_PRIV]], align 64 -; CHECK-NEXT:    [[TMP:%.*]] = load <8 x i64>, ptr [[ARG1_PRIV]], align 64 -; CHECK-NEXT:    store <8 x i64> [[TMP]], ptr [[ARG]], align 64 -; CHECK-NEXT:    ret void +; TUNIT: Function Attrs: inlinehint mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: readwrite) uwtable +; TUNIT-LABEL: define {{[^@]+}}@callee_avx2_legal512_prefer256_call_avx2_legal256_prefer256 +; TUNIT-SAME: (ptr noalias nofree noundef nonnull writeonly align 64 captures(none) dereferenceable(64) [[ARG:%.*]], <8 x i64> [[TMP0:%.*]]) #[[ATTR3]] { +; TUNIT-NEXT:  bb: +; TUNIT-NEXT:    [[ARG1_PRIV:%.*]] = alloca <8 x i64>, align 64 +; TUNIT-NEXT:    store <8 x i64> [[TMP0]], ptr [[ARG1_PRIV]], align 64 +; TUNIT-NEXT:    [[TMP:%.*]] = load <8 x i64>, ptr [[ARG1_PRIV]], align 64 +; TUNIT-NEXT:    store <8 x i64> [[TMP]], ptr [[ARG]], align 64 +; TUNIT-NEXT:    ret void +; +; CGSCC: Function Attrs: inlinehint mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: readwrite) uwtable +; CGSCC-LABEL: define {{[^@]+}}@callee_avx2_legal512_prefer256_call_avx2_legal256_prefer256 +; CGSCC-SAME: (ptr noalias nofree noundef nonnull writeonly align 64 captures(none) dereferenceable(64) [[ARG:%.*]], <8 x i64> [[TMP0:%.*]]) #[[ATTR3]] { +; CGSCC-NEXT:  bb: +; CGSCC-NEXT:    [[ARG1_PRIV:%.*]] = alloca <8 x i64>, align 64 +; CGSCC-NEXT:    store <8 x i64> [[TMP0]], ptr [[ARG1_PRIV]], align 64 +; CGSCC-NEXT:    [[TMP:%.*]] = load <8 x i64>, ptr [[ARG1_PRIV]], align 64, !invariant.load [[META0]] +; CGSCC-NEXT:    store <8 x i64> [[TMP]], ptr [[ARG]], align 64 +; CGSCC-NEXT:    ret void  ;  bb:    %tmp = load <8 x i64>, ptr %arg1 @@ -464,6 +540,14 @@ attributes #3 = { inlinehint norecurse nounwind uwtable "target-features"="+avx2  attributes #4 = { inlinehint norecurse nounwind uwtable "target-features"="+avx2" "min-legal-vector-width"="256" "prefer-vector-width"="256" }  attributes #5 = { argmemonly nounwind }  ;. +; CGSCC: attributes #[[ATTR0]] = { inlinehint mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: readwrite) uwtable "min-legal-vector-width"="512" "prefer-vector-width"="512" "target-features"="+avx512vl" } +; CGSCC: attributes #[[ATTR1]] = { inlinehint mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: readwrite) uwtable "min-legal-vector-width"="512" "prefer-vector-width"="256" "target-features"="+avx512vl" } +; CGSCC: attributes #[[ATTR2]] = { inlinehint mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: readwrite) uwtable "min-legal-vector-width"="256" "prefer-vector-width"="256" "target-features"="+avx512vl" } +; CGSCC: attributes #[[ATTR3]] = { inlinehint mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: readwrite) uwtable "min-legal-vector-width"="512" "prefer-vector-width"="256" "target-features"="+avx2" } +; CGSCC: attributes #[[ATTR4:[0-9]+]] = { nocallback nofree nounwind willreturn memory(argmem: write) } +; CGSCC: attributes #[[ATTR5]] = { nofree willreturn memory(write) } +; CGSCC: attributes #[[ATTR6]] = { nofree nounwind willreturn } +;.  ; TUNIT: attributes #[[ATTR0]] = { inlinehint mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: readwrite) uwtable "min-legal-vector-width"="512" "prefer-vector-width"="512" "target-features"="+avx512vl" }  ; TUNIT: attributes #[[ATTR1]] = { inlinehint mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: readwrite) uwtable "min-legal-vector-width"="512" "prefer-vector-width"="256" "target-features"="+avx512vl" }  ; TUNIT: attributes #[[ATTR2]] = { inlinehint mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: readwrite) uwtable "min-legal-vector-width"="256" "prefer-vector-width"="256" "target-features"="+avx512vl" } @@ -472,11 +556,7 @@ attributes #5 = { argmemonly nounwind }  ; TUNIT: attributes #[[ATTR5]] = { nofree willreturn memory(write) }  ; TUNIT: attributes #[[ATTR6]] = { nofree nosync nounwind willreturn }  ;. -; CGSCC: attributes #[[ATTR0]] = { inlinehint mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: readwrite) uwtable "min-legal-vector-width"="512" "prefer-vector-width"="512" "target-features"="+avx512vl" } -; CGSCC: attributes #[[ATTR1]] = { inlinehint mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: readwrite) uwtable "min-legal-vector-width"="512" "prefer-vector-width"="256" "target-features"="+avx512vl" } -; CGSCC: attributes #[[ATTR2]] = { inlinehint mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: readwrite) uwtable "min-legal-vector-width"="256" "prefer-vector-width"="256" "target-features"="+avx512vl" } -; CGSCC: attributes #[[ATTR3]] = { inlinehint mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: readwrite) uwtable "min-legal-vector-width"="512" "prefer-vector-width"="256" "target-features"="+avx2" } -; CGSCC: attributes #[[ATTR4:[0-9]+]] = { nocallback nofree nounwind willreturn memory(argmem: write) } -; CGSCC: attributes #[[ATTR5]] = { nofree willreturn memory(write) } -; CGSCC: attributes #[[ATTR6]] = { nofree nounwind willreturn } +; CGSCC: [[META0]] = !{}  ;. +;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: +; CHECK: {{.*}} diff --git a/llvm/test/Transforms/Attributor/align-ptrmask.ll b/llvm/test/Transforms/Attributor/align-ptrmask.ll new file mode 100644 index 0000000..008f5e1 --- /dev/null +++ b/llvm/test/Transforms/Attributor/align-ptrmask.ll @@ -0,0 +1,206 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 +; RUN: opt -passes=attributor -S < %s | FileCheck %s + +define ptr @align_ptrmask_back_no_prop(ptr align 2 %x, i1 %cmp1, i1 %cmp2) { +; CHECK-LABEL: define noundef nonnull align 8 dereferenceable(4) ptr @align_ptrmask_back_no_prop( +; CHECK-SAME: ptr nofree writeonly align 2 [[X:%.*]], i1 [[CMP1:%.*]], i1 [[CMP2:%.*]]) #[[ATTR0:[0-9]+]] { +; CHECK-NEXT:    [[SEL:%.*]] = select i1 [[CMP1]], i64 -32, i64 -8 +; CHECK-NEXT:    [[SEL1:%.*]] = select i1 [[CMP2]], i64 [[SEL]], i64 -16 +; CHECK-NEXT:    [[P:%.*]] = tail call noundef nonnull align 8 dereferenceable(4) ptr @llvm.ptrmask.p0.i64(ptr [[X]], i64 noundef [[SEL1]]) #[[ATTR4:[0-9]+]] +; CHECK-NEXT:    store float 1.000000e+00, ptr [[P]], align 8 +; CHECK-NEXT:    ret ptr [[P]] +; +  %sel = select i1 %cmp1, i64 -32, i64 -8 +  %sel1 = select i1 %cmp2, i64 %sel, i64 -16 +  %p = tail call ptr @llvm.ptrmask.p0.i64(ptr %x, i64 %sel1) +  store float 1.0, ptr %p, align 8 +  ret ptr %p +} + +define ptr @align_ptrmask_back_prop(ptr align 2 %x, i1 %cmp1, i1 %cmp2) { +; CHECK-LABEL: define noundef nonnull align 16 dereferenceable(4) ptr @align_ptrmask_back_prop( +; CHECK-SAME: ptr nofree writeonly align 16 [[X:%.*]], i1 [[CMP1:%.*]], i1 [[CMP2:%.*]]) #[[ATTR0]] { +; CHECK-NEXT:    [[SEL:%.*]] = select i1 [[CMP1]], i64 -32, i64 -8 +; CHECK-NEXT:    [[SEL1:%.*]] = select i1 [[CMP2]], i64 [[SEL]], i64 -16 +; CHECK-NEXT:    [[P:%.*]] = tail call noundef nonnull align 16 dereferenceable(4) ptr @llvm.ptrmask.p0.i64(ptr [[X]], i64 noundef [[SEL1]]) #[[ATTR4]] +; CHECK-NEXT:    store float 1.000000e+00, ptr [[P]], align 16 +; CHECK-NEXT:    ret ptr [[P]] +; +  %sel = select i1 %cmp1, i64 -32, i64 -8 +  %sel1 = select i1 %cmp2, i64 %sel, i64 -16 +  %p = tail call ptr @llvm.ptrmask.p0.i64(ptr %x, i64 %sel1) +  store float 1.0, ptr %p, align 16 +  ret ptr %p +} + +define ptr @align_ptrmask_forward_mask(ptr align 2 %x, i1 %cmp1, i1 %cmp2) { +; CHECK-LABEL: define align 8 ptr @align_ptrmask_forward_mask( +; CHECK-SAME: ptr nofree readnone align 2 [[X:%.*]], i1 [[CMP1:%.*]], i1 [[CMP2:%.*]]) #[[ATTR1:[0-9]+]] { +; CHECK-NEXT:    [[SEL:%.*]] = select i1 [[CMP1]], i64 -32, i64 -8 +; CHECK-NEXT:    [[SEL1:%.*]] = select i1 [[CMP2]], i64 [[SEL]], i64 -16 +; CHECK-NEXT:    [[P:%.*]] = tail call align 8 ptr @llvm.ptrmask.p0.i64(ptr [[X]], i64 noundef [[SEL1]]) #[[ATTR4]] +; CHECK-NEXT:    ret ptr [[P]] +; +  %sel = select i1 %cmp1, i64 -32, i64 -8 +  %sel1 = select i1 %cmp2, i64 %sel, i64 -16 +  %p = tail call ptr @llvm.ptrmask.p0.i64(ptr %x, i64 %sel1) +  ret ptr %p +} + +define ptr @align_ptrmask_forward_ptr(ptr align 16 %x, i1 %cmp1, i1 %cmp2) { +; CHECK-LABEL: define align 16 ptr @align_ptrmask_forward_ptr( +; CHECK-SAME: ptr nofree readnone align 16 [[X:%.*]], i1 [[CMP1:%.*]], i1 [[CMP2:%.*]]) #[[ATTR1]] { +; CHECK-NEXT:    [[SEL:%.*]] = select i1 [[CMP1]], i64 -32, i64 -8 +; CHECK-NEXT:    [[SEL1:%.*]] = select i1 [[CMP2]], i64 [[SEL]], i64 -16 +; CHECK-NEXT:    [[P:%.*]] = tail call align 16 ptr @llvm.ptrmask.p0.i64(ptr [[X]], i64 noundef [[SEL1]]) #[[ATTR4]] +; CHECK-NEXT:    ret ptr [[P]] +; +  %sel = select i1 %cmp1, i64 -32, i64 -8 +  %sel1 = select i1 %cmp2, i64 %sel, i64 -16 +  %p = tail call ptr @llvm.ptrmask.p0.i64(ptr %x, i64 %sel1) +  ret ptr %p +} + +define ptr @align_ptrmask_forward_nonconst_mask(ptr align 8 %x, i64 %y, i1 %cmp1, i1 %cmp2) { +; CHECK-LABEL: define align 8 ptr @align_ptrmask_forward_nonconst_mask( +; CHECK-SAME: ptr nofree readnone align 8 [[X:%.*]], i64 [[Y:%.*]], i1 [[CMP1:%.*]], i1 [[CMP2:%.*]]) #[[ATTR1]] { +; CHECK-NEXT:    [[SEL:%.*]] = select i1 [[CMP1]], i64 -32, i64 [[Y]] +; CHECK-NEXT:    [[SEL1:%.*]] = select i1 [[CMP2]], i64 [[SEL]], i64 -16 +; CHECK-NEXT:    [[P:%.*]] = tail call align 8 ptr @llvm.ptrmask.p0.i64(ptr [[X]], i64 [[SEL1]]) #[[ATTR4]] +; CHECK-NEXT:    ret ptr [[P]] +; +  %sel = select i1 %cmp1, i64 -32, i64 %y +  %sel1 = select i1 %cmp2, i64 %sel, i64 -16 +  %p = tail call ptr @llvm.ptrmask.p0.i64(ptr %x, i64 %sel1) +  ret ptr %p +} + +define ptr @align_ptrmask_back_nonconst_mask(ptr align 4 %x, i64 %y, i1 %cmp1, i1 %cmp2) { +; CHECK-LABEL: define noundef nonnull align 8 dereferenceable(4) ptr @align_ptrmask_back_nonconst_mask( +; CHECK-SAME: ptr nofree writeonly align 8 [[X:%.*]], i64 [[Y:%.*]], i1 [[CMP1:%.*]], i1 [[CMP2:%.*]]) #[[ATTR0]] { +; CHECK-NEXT:    [[SEL:%.*]] = select i1 [[CMP1]], i64 -32, i64 [[Y]] +; CHECK-NEXT:    [[SEL1:%.*]] = select i1 [[CMP2]], i64 [[SEL]], i64 -16 +; CHECK-NEXT:    [[P:%.*]] = tail call noundef nonnull align 8 dereferenceable(4) ptr @llvm.ptrmask.p0.i64(ptr [[X]], i64 [[SEL1]]) #[[ATTR4]] +; CHECK-NEXT:    store float 1.000000e+00, ptr [[P]], align 8 +; CHECK-NEXT:    ret ptr [[P]] +; +  %sel = select i1 %cmp1, i64 -32, i64 %y +  %sel1 = select i1 %cmp2, i64 %sel, i64 -16 +  %p = tail call ptr @llvm.ptrmask.p0.i64(ptr %x, i64 %sel1) +  store float 1.0, ptr %p, align 8 +  ret ptr %p +} + +define ptr @align_ptrmask_back_const_back_noprop(ptr align 4 %x, i64 %y, i1 %cmp1, i1 %cmp2) { +; CHECK-LABEL: define noundef nonnull align 8 dereferenceable(4) ptr @align_ptrmask_back_const_back_noprop( +; CHECK-SAME: ptr nofree writeonly align 4 [[X:%.*]], i64 [[Y:%.*]], i1 [[CMP1:%.*]], i1 [[CMP2:%.*]]) #[[ATTR0]] { +; CHECK-NEXT:    [[P:%.*]] = tail call noundef nonnull align 8 dereferenceable(4) ptr @llvm.ptrmask.p0.i64(ptr [[X]], i64 noundef -8) #[[ATTR4]] +; CHECK-NEXT:    store float 1.000000e+00, ptr [[P]], align 8 +; CHECK-NEXT:    ret ptr [[P]] +; +  %p = tail call ptr @llvm.ptrmask.p0.i64(ptr %x, i64 -8) +  store float 1.0, ptr %p, align 8 +  ret ptr %p +} + +define ptr @align_ptrmask_back_const_back_prop(ptr align 4 %x, i64 %y, i1 %cmp1, i1 %cmp2) { +; CHECK-LABEL: define noundef nonnull align 8 dereferenceable(4) ptr @align_ptrmask_back_const_back_prop( +; CHECK-SAME: ptr nofree writeonly align 8 [[X:%.*]], i64 [[Y:%.*]], i1 [[CMP1:%.*]], i1 [[CMP2:%.*]]) #[[ATTR0]] { +; CHECK-NEXT:    [[P:%.*]] = tail call noundef nonnull align 8 dereferenceable(4) ptr @llvm.ptrmask.p0.i64(ptr [[X]], i64 noundef -2) #[[ATTR4]] +; CHECK-NEXT:    store float 1.000000e+00, ptr [[P]], align 8 +; CHECK-NEXT:    ret ptr [[P]] +; +  %p = tail call ptr @llvm.ptrmask.p0.i64(ptr %x, i64 -2) +  store float 1.0, ptr %p, align 8 +  ret ptr %p +} + +define ptr @align_ptrmask_back_const_forward_mask(ptr align 4 %x, i64 %y, i1 %cmp1, i1 %cmp2) { +; CHECK-LABEL: define align 8 ptr @align_ptrmask_back_const_forward_mask( +; CHECK-SAME: ptr nofree readnone align 4 [[X:%.*]], i64 [[Y:%.*]], i1 [[CMP1:%.*]], i1 [[CMP2:%.*]]) #[[ATTR1]] { +; CHECK-NEXT:    [[P:%.*]] = tail call align 8 ptr @llvm.ptrmask.p0.i64(ptr [[X]], i64 noundef -8) #[[ATTR4]] +; CHECK-NEXT:    ret ptr [[P]] +; +  %p = tail call ptr @llvm.ptrmask.p0.i64(ptr %x, i64 -8) +  ret ptr %p +} + +define ptr @align_ptrmask_back_const_forward_ptr(ptr align 16 %x, i64 %y, i1 %cmp1, i1 %cmp2) { +; CHECK-LABEL: define align 16 ptr @align_ptrmask_back_const_forward_ptr( +; CHECK-SAME: ptr nofree readnone align 16 [[X:%.*]], i64 [[Y:%.*]], i1 [[CMP1:%.*]], i1 [[CMP2:%.*]]) #[[ATTR1]] { +; CHECK-NEXT:    [[P:%.*]] = tail call align 16 ptr @llvm.ptrmask.p0.i64(ptr [[X]], i64 noundef -8) #[[ATTR4]] +; CHECK-NEXT:    ret ptr [[P]] +; +  %p = tail call ptr @llvm.ptrmask.p0.i64(ptr %x, i64 -8) +  ret ptr %p +} + +; FIXME: The store will create AAAlign for %ptr1, +; but the attribute didn't propagate through extractelement, need propagate +define <2 x ptr> @ptrmask_v2p0_v2i64(<2 x ptr> align 2 %ptr, i64 %a) { +; CHECK-LABEL: define <2 x ptr> @ptrmask_v2p0_v2i64( +; CHECK-SAME: <2 x ptr> align 2 [[PTR:%.*]], i64 [[A:%.*]]) #[[ATTR2:[0-9]+]] { +; CHECK-NEXT:    [[RESULT:%.*]] = call <2 x ptr> @llvm.ptrmask.v2p0.v2i64(<2 x ptr> [[PTR]], <2 x i64> noundef splat (i64 -8)) #[[ATTR4]] +; CHECK-NEXT:    [[PTR1:%.*]] = extractelement <2 x ptr> [[RESULT]], i32 0 +; CHECK-NEXT:    [[PTR2:%.*]] = extractelement <2 x ptr> [[RESULT]], i32 1 +; CHECK-NEXT:    store i64 [[A]], ptr [[PTR1]], align 16 +; CHECK-NEXT:    store i64 [[A]], ptr [[PTR2]], align 16 +; CHECK-NEXT:    ret <2 x ptr> [[RESULT]] +; +  %result = call <2 x ptr> @llvm.ptrmask.v2p0.v2i64(<2 x ptr> %ptr, <2 x i64> splat(i64 -8)) +  %ptr1 = extractelement <2 x ptr> %result, i32 0 +  %ptr2 = extractelement <2 x ptr> %result, i32 1 +  store i64 %a, ptr %ptr1, align 16 +  store i64 %a, ptr %ptr2, align 16 +  ret <2 x ptr> %result +} + +define ptr @align_ptrmask_forward_mask_positive(ptr align 4 %x, i64 %y, i1 %cmp1, i1 %cmp2) { +; CHECK-LABEL: define align 4 ptr @align_ptrmask_forward_mask_positive( +; CHECK-SAME: ptr nofree readnone align 4 [[X:%.*]], i64 [[Y:%.*]], i1 [[CMP1:%.*]], i1 [[CMP2:%.*]]) #[[ATTR1]] { +; CHECK-NEXT:    [[P:%.*]] = tail call align 4 ptr @llvm.ptrmask.p0.i64(ptr [[X]], i64 noundef 2) #[[ATTR4]] +; CHECK-NEXT:    ret ptr [[P]] +; +  %p = tail call ptr @llvm.ptrmask.p0.i64(ptr %x, i64 2) +  ret ptr %p +} + +define ptr @align_ptrmask_forward_mask_poison(ptr align 4 %x, i64 %y, i1 %cmp1, i1 %cmp2) { +; CHECK-LABEL: define align 4 ptr @align_ptrmask_forward_mask_poison( +; CHECK-SAME: ptr nofree readnone align 4 [[X:%.*]], i64 [[Y:%.*]], i1 [[CMP1:%.*]], i1 [[CMP2:%.*]]) #[[ATTR1]] { +; CHECK-NEXT:    [[P:%.*]] = tail call align 4 ptr @llvm.ptrmask.p0.i64(ptr [[X]], i64 poison) #[[ATTR4]] +; CHECK-NEXT:    ret ptr [[P]] +; +  %p = tail call ptr @llvm.ptrmask.p0.i64(ptr %x, i64 poison) +  ret ptr %p +} + +define ptr @align_ptrmask_forward_mask_max(ptr align 4 %x, i64 %y, i1 %cmp1, i1 %cmp2) { +; CHECK-LABEL: define align 4294967296 ptr @align_ptrmask_forward_mask_max( +; CHECK-SAME: ptr nofree readnone align 4 [[X:%.*]], i64 [[Y:%.*]], i1 [[CMP1:%.*]], i1 [[CMP2:%.*]]) #[[ATTR1]] { +; CHECK-NEXT:    [[P:%.*]] = tail call align 4294967296 ptr @llvm.ptrmask.p0.i64(ptr [[X]], i64 noundef -4294967296) #[[ATTR4]] +; CHECK-NEXT:    ret ptr [[P]] +; +  %p = tail call ptr @llvm.ptrmask.p0.i64(ptr %x, i64 -4294967296) +  ret ptr %p +} + +define ptr @align_ptrmask_forward_mask_max_plus_one(ptr align 4 %x, i64 %y, i1 %cmp1, i1 %cmp2) { +; CHECK-LABEL: define align 4294967296 ptr @align_ptrmask_forward_mask_max_plus_one( +; CHECK-SAME: ptr nofree readnone align 4 [[X:%.*]], i64 [[Y:%.*]], i1 [[CMP1:%.*]], i1 [[CMP2:%.*]]) #[[ATTR1]] { +; CHECK-NEXT:    [[P:%.*]] = tail call align 4294967296 ptr @llvm.ptrmask.p0.i64(ptr [[X]], i64 noundef -8589934592) #[[ATTR4]] +; CHECK-NEXT:    ret ptr [[P]] +; +  %p = tail call ptr @llvm.ptrmask.p0.i64(ptr %x, i64 -8589934592) +  ret ptr %p +} + +define ptr @align_ptrmask_back_callsite(ptr align 4 %x, i64 %y, i1 %cmp1, i1 %cmp2) { +; CHECK-LABEL: define align 16 ptr @align_ptrmask_back_callsite( +; CHECK-SAME: ptr nofree readnone align 16 [[X:%.*]], i64 [[Y:%.*]], i1 [[CMP1:%.*]], i1 [[CMP2:%.*]]) #[[ATTR1]] { +; CHECK-NEXT:    [[P:%.*]] = tail call align 16 ptr @llvm.ptrmask.p0.i64(ptr [[X]], i64 noundef -4) #[[ATTR4]] +; CHECK-NEXT:    ret ptr [[P]] +; +  %p = tail call align 16 ptr @llvm.ptrmask.p0.i64(ptr %x, i64 -4) +  ret ptr %p +} diff --git a/llvm/test/Transforms/DropUnnecessaryAssumes/basic.ll b/llvm/test/Transforms/DropUnnecessaryAssumes/basic.ll index 8a6f60b..87aed77 100644 --- a/llvm/test/Transforms/DropUnnecessaryAssumes/basic.ll +++ b/llvm/test/Transforms/DropUnnecessaryAssumes/basic.ll @@ -184,6 +184,18 @@ define void @type_test(ptr %x) {    ret void  } +define void @public_type_test(ptr %x) { +; CHECK-LABEL: define void @public_type_test( +; CHECK-SAME: ptr [[X:%.*]]) { +; CHECK-NEXT:    [[TEST:%.*]] = call i1 @llvm.public.type.test(ptr [[X]], metadata !"typeid") +; CHECK-NEXT:    call void @llvm.assume(i1 [[TEST]]) +; CHECK-NEXT:    ret void +; +  %test = call i1 @llvm.public.type.test(ptr %x, metadata !"typeid") +  call void @llvm.assume(i1 %test) +  ret void +} +  define void @multiple_dead_conds(i32 %x) {  ; CHECK-LABEL: define void @multiple_dead_conds(  ; CHECK-SAME: i32 [[X:%.*]]) { diff --git a/llvm/test/Transforms/IndVarSimplify/loop-guard-order.ll b/llvm/test/Transforms/IndVarSimplify/loop-guard-order.ll index 14ee00d..2763860 100644 --- a/llvm/test/Transforms/IndVarSimplify/loop-guard-order.ll +++ b/llvm/test/Transforms/IndVarSimplify/loop-guard-order.ll @@ -114,7 +114,7 @@ define i32 @urem_order1(i32 %n) {  ; CHECK:       [[LOOP]]:  ; CHECK-NEXT:    [[IV:%.*]] = phi i32 [ [[IV_NEXT:%.*]], %[[LOOP]] ], [ 0, %[[LOOP_PREHEADER]] ]  ; CHECK-NEXT:    call void @foo() -; CHECK-NEXT:    [[IV_NEXT]] = add i32 [[IV]], 3 +; CHECK-NEXT:    [[IV_NEXT]] = add nuw i32 [[IV]], 3  ; CHECK-NEXT:    [[EC:%.*]] = icmp eq i32 [[IV_NEXT]], [[N]]  ; CHECK-NEXT:    br i1 [[EC]], label %[[EXIT_LOOPEXIT:.*]], label %[[LOOP]]  ; CHECK:       [[EXIT_LOOPEXIT]]: @@ -205,13 +205,12 @@ define i64 @test_loop_with_div_order_1(i64 %n) {  ; CHECK-NEXT:    [[PARITY_CHECK:%.*]] = icmp eq i64 [[IS_ODD]], 0  ; CHECK-NEXT:    br i1 [[PARITY_CHECK]], label %[[LOOP_PREHEADER:.*]], label %[[EXIT]]  ; CHECK:       [[LOOP_PREHEADER]]: -; CHECK-NEXT:    [[UMAX:%.*]] = call i64 @llvm.umax.i64(i64 [[UPPER_BOUND]], i64 1)  ; CHECK-NEXT:    br label %[[LOOP:.*]]  ; CHECK:       [[LOOP]]:  ; CHECK-NEXT:    [[IV:%.*]] = phi i64 [ [[IV_NEXT:%.*]], %[[LOOP]] ], [ 0, %[[LOOP_PREHEADER]] ]  ; CHECK-NEXT:    [[DUMMY:%.*]] = load volatile i64, ptr null, align 8  ; CHECK-NEXT:    [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 -; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp ne i64 [[IV_NEXT]], [[UMAX]] +; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp ne i64 [[IV_NEXT]], [[UPPER_BOUND]]  ; CHECK-NEXT:    br i1 [[EXITCOND]], label %[[LOOP]], label %[[EXIT_LOOPEXIT:.*]]  ; CHECK:       [[EXIT_LOOPEXIT]]:  ; CHECK-NEXT:    br label %[[EXIT]] diff --git a/llvm/test/Transforms/InstCombine/or.ll b/llvm/test/Transforms/InstCombine/or.ll index 6b090e9..f61a197 100644 --- a/llvm/test/Transforms/InstCombine/or.ll +++ b/llvm/test/Transforms/InstCombine/or.ll @@ -2113,3 +2113,98 @@ define <4 x i32> @or_zext_nneg_minus_constant_splat(<4 x i8> %a) {    %or = or <4 x i32> %zext, splat (i32 -9)    ret <4 x i32> %or  } + +define i8 @or_positive_minus_non_positive_to_abs(i8 %a){ +; CHECK-LABEL: @or_positive_minus_non_positive_to_abs( +; CHECK-NEXT:    [[TMP2:%.*]] = call i8 @llvm.abs.i8(i8 [[A:%.*]], i1 false) +; CHECK-NEXT:    ret i8 [[TMP2]] +; +  %b = icmp sgt i8 %a, 0 +  %mask = sext i1 %b to i8 +  %neg = sub i8 0, %a +  %mask_inv = xor i8 %mask, -1 +  %c = and i8 %neg, %mask_inv +  %d = and i8 %a, %mask +  %or = or i8 %c, %d +  ret i8 %or +} + +; TODO: Fold to smax https://alive2.llvm.org/ce/z/wDiDh2 +define i8 @or_select_smax_neg_to_abs(i8 %a){ +; CHECK-LABEL: @or_select_smax_neg_to_abs( +; CHECK-NEXT:    [[SGT0:%.*]] = icmp sgt i8 [[A:%.*]], 0 +; CHECK-NEXT:    [[NEG:%.*]] = sub nsw i8 0, [[A]] +; CHECK-NEXT:    [[OR:%.*]] = select i1 [[SGT0]], i8 0, i8 [[NEG]] +; CHECK-NEXT:    ret i8 [[OR]] +; +  %sgt0 = icmp sgt i8 %a, 0 +  %neg = sub nsw i8 0, %a +  %sel = select i1 %sgt0, i8 0, i8 %neg +  ret i8 %sel +} + +; TODO: Fold to abs https://alive2.llvm.org/ce/z/DybfHG +define i8 @or_select_smax_smax_to_abs(i8 %a){ +; CHECK-LABEL: @or_select_smax_smax_to_abs( +; CHECK-NEXT:    [[NEG:%.*]] = sub nsw i8 0, [[A:%.*]] +; CHECK-NEXT:    [[SEL:%.*]] = call i8 @llvm.smax.i8(i8 [[NEG]], i8 0) +; CHECK-NEXT:    [[MAX:%.*]] = call i8 @llvm.smax.i8(i8 [[A]], i8 0) +; CHECK-NEXT:    [[OR:%.*]] = or i8 [[SEL]], [[MAX]] +; CHECK-NEXT:    ret i8 [[OR]] +; +  %neg = sub nsw i8 0, %a +  %sel = call i8 @llvm.smax.i8(i8 %neg, i8 0) +  %max = call i8 @llvm.smax.i8(i8 %a, i8 0) +  %or = or i8 %sel, %max +  ret i8 %or +} + +declare i8 @llvm.abs.i8(i8, i1) +declare <2 x i8> @llvm.abs.v2i8(<2 x i8>, i1) + +define <2 x i8> @or_sgt_select_smax_to_abs(<2 x i8> %a){ +; CHECK-LABEL: @or_sgt_select_smax_to_abs( +; CHECK-NEXT:    [[OR:%.*]] = call <2 x i8> @llvm.abs.v2i8(<2 x i8> [[A:%.*]], i1 false) +; CHECK-NEXT:    ret <2 x i8> [[OR]] +; +  %sgt0 = icmp sgt <2 x i8> %a, zeroinitializer +  %neg = sub <2 x i8> zeroinitializer, %a +  %sel = select <2 x i1> %sgt0, <2 x i8> zeroinitializer, <2 x i8> %neg +  %max = call <2 x i8> @llvm.smax.v2i8(<2 x i8> %a, <2 x i8> zeroinitializer) +  %or = or <2 x i8> %sel, %max +  ret <2 x i8> %or +} + +define <2 x i8> @or_slt_select_smax_to_abs(<2 x i8> %a){ +; CHECK-LABEL: @or_slt_select_smax_to_abs( +; CHECK-NEXT:    [[OR:%.*]] = call <2 x i8> @llvm.abs.v2i8(<2 x i8> [[A:%.*]], i1 false) +; CHECK-NEXT:    ret <2 x i8> [[OR]] +; +  %slt0 = icmp slt <2 x i8> %a, zeroinitializer +  %neg = sub <2 x i8> zeroinitializer, %a +  %sel = select <2 x i1> %slt0, <2 x i8> %neg, <2 x i8> zeroinitializer +  %max = call <2 x i8> @llvm.smax.v2i8(<2 x i8> %a, <2 x i8> zeroinitializer) +  %or = or <2 x i8> %sel, %max +  ret <2 x i8> %or +} + +; negative test - %d has multiple uses. %or is not folded to abs. + +define <2 x i8> @or_select_smax_multi_uses(<2 x i8> %a){ +; CHECK-LABEL: @or_select_smax_multi_uses( +; CHECK-NEXT:    [[B:%.*]] = icmp sgt <2 x i8> [[A:%.*]], zeroinitializer +; CHECK-NEXT:    [[NEG:%.*]] = sub <2 x i8> zeroinitializer, [[A]] +; CHECK-NEXT:    [[C:%.*]] = select <2 x i1> [[B]], <2 x i8> zeroinitializer, <2 x i8> [[NEG]] +; CHECK-NEXT:    [[D:%.*]] = call <2 x i8> @llvm.smax.v2i8(<2 x i8> [[A]], <2 x i8> zeroinitializer) +; CHECK-NEXT:    [[OR1:%.*]] = or <2 x i8> [[C]], [[D]] +; CHECK-NEXT:    [[OR:%.*]] = add <2 x i8> [[OR1]], [[D]] +; CHECK-NEXT:    ret <2 x i8> [[OR]] +; +  %sgt0 = icmp sgt <2 x i8> %a, zeroinitializer +  %neg = sub <2 x i8> zeroinitializer, %a +  %sel = select <2 x i1> %sgt0, <2 x i8> zeroinitializer, <2 x i8> %neg +  %max = call <2 x i8> @llvm.smax.v2i8(<2 x i8> %a, <2 x i8> zeroinitializer) +  %or = or <2 x i8> %sel, %max +  %add = add <2 x i8> %or, %max +  ret <2 x i8> %add +} diff --git a/llvm/test/Transforms/InstCombine/select-safe-transforms.ll b/llvm/test/Transforms/InstCombine/select-safe-transforms.ll index 3d97048..8b3c050 100644 --- a/llvm/test/Transforms/InstCombine/select-safe-transforms.ll +++ b/llvm/test/Transforms/InstCombine/select-safe-transforms.ll @@ -256,27 +256,27 @@ define <2 x i1> @not_logical_or2(i1 %b, <2 x i32> %a) {    ret <2 x i1> %and  } -define i1 @bools_logical_commute0(i1 %a, i1 %b, i1 %c) { +define i1 @bools_logical_commute0(i1 %a, i1 %b, i1 %c) !prof !0 {  ; CHECK-LABEL: @bools_logical_commute0( -; CHECK-NEXT:    [[OR:%.*]] = select i1 [[C:%.*]], i1 [[B:%.*]], i1 [[A:%.*]] +; CHECK-NEXT:    [[OR:%.*]] = select i1 [[C:%.*]], i1 [[B:%.*]], i1 [[A:%.*]], !prof [[PROF2]]  ; CHECK-NEXT:    ret i1 [[OR]]  ;    %not = xor i1 %c, -1 -  %and1 = select i1 %not, i1 %a, i1 false -  %and2 = select i1 %c, i1 %b, i1 false -  %or = select i1 %and1, i1 true, i1 %and2 +  %and1 = select i1 %not, i1 %a, i1 false, !prof!1 +  %and2 = select i1 %c, i1 %b, i1 false, !prof !2 +  %or = select i1 %and1, i1 true, i1 %and2, !prof !3    ret i1 %or  } -define i1 @bools_logical_commute0_and1(i1 %a, i1 %b, i1 %c) { +define i1 @bools_logical_commute0_and1(i1 %a, i1 %b, i1 %c) !prof !0 {  ; CHECK-LABEL: @bools_logical_commute0_and1( -; CHECK-NEXT:    [[OR:%.*]] = select i1 [[C:%.*]], i1 [[B:%.*]], i1 [[A:%.*]] +; CHECK-NEXT:    [[OR:%.*]] = select i1 [[C:%.*]], i1 [[B:%.*]], i1 [[A:%.*]], !prof [[PROF1]]  ; CHECK-NEXT:    ret i1 [[OR]]  ;    %not = xor i1 %c, -1    %and1 = and i1 %not, %a -  %and2 = select i1 %c, i1 %b, i1 false -  %or = select i1 %and1, i1 true, i1 %and2 +  %and2 = select i1 %c, i1 %b, i1 false, !prof !1 +  %or = select i1 %and1, i1 true, i1 %and2, !prof !2    ret i1 %or  } @@ -292,15 +292,15 @@ define i1 @bools_logical_commute0_and2(i1 %a, i1 %b, i1 %c) {    ret i1 %or  } -define i1 @bools_logical_commute0_and1_and2(i1 %a, i1 %b, i1 %c) { +define i1 @bools_logical_commute0_and1_and2(i1 %a, i1 %b, i1 %c) !prof !0 {  ; CHECK-LABEL: @bools_logical_commute0_and1_and2( -; CHECK-NEXT:    [[OR:%.*]] = select i1 [[C:%.*]], i1 [[B:%.*]], i1 [[A:%.*]] +; CHECK-NEXT:    [[OR:%.*]] = select i1 [[C:%.*]], i1 [[B:%.*]], i1 [[A:%.*]], !prof [[PROF3:![0-9]+]]  ; CHECK-NEXT:    ret i1 [[OR]]  ;    %not = xor i1 %c, -1    %and1 = and i1 %not, %a    %and2 = and i1 %c, %b -  %or = select i1 %and1, i1 true, i1 %and2 +  %or = select i1 %and1, i1 true, i1 %and2, !prof !1    ret i1 %or  } @@ -457,27 +457,27 @@ define i1 @bools_logical_commute3_and1_and2(i1 %b, i1 %c) {    ret i1 %or  } -define i1 @bools2_logical_commute0(i1 %a, i1 %b, i1 %c) { +define i1 @bools2_logical_commute0(i1 %a, i1 %b, i1 %c) !prof !0 {  ; CHECK-LABEL: @bools2_logical_commute0( -; CHECK-NEXT:    [[OR:%.*]] = select i1 [[C:%.*]], i1 [[A:%.*]], i1 [[B:%.*]] +; CHECK-NEXT:    [[OR:%.*]] = select i1 [[C:%.*]], i1 [[A:%.*]], i1 [[B:%.*]], !prof [[PROF1]]  ; CHECK-NEXT:    ret i1 [[OR]]  ;    %not = xor i1 %c, -1 -  %and1 = select i1 %c, i1 %a, i1 false -  %and2 = select i1 %not, i1 %b, i1 false -  %or = select i1 %and1, i1 true, i1 %and2 +  %and1 = select i1 %c, i1 %a, i1 false, !prof !1 +  %and2 = select i1 %not, i1 %b, i1 false, !prof !2 +  %or = select i1 %and1, i1 true, i1 %and2, !prof !3    ret i1 %or  } -define i1 @bools2_logical_commute0_and1(i1 %a, i1 %b, i1 %c) { +define i1 @bools2_logical_commute0_and1(i1 %a, i1 %b, i1 %c) !prof !0 {  ; CHECK-LABEL: @bools2_logical_commute0_and1( -; CHECK-NEXT:    [[OR:%.*]] = select i1 [[C:%.*]], i1 [[A:%.*]], i1 [[B:%.*]] +; CHECK-NEXT:    [[OR:%.*]] = select i1 [[C:%.*]], i1 [[A:%.*]], i1 [[B:%.*]], !prof [[PROF2]]  ; CHECK-NEXT:    ret i1 [[OR]]  ;    %not = xor i1 %c, -1    %and1 = and i1 %c, %a -  %and2 = select i1 %not, i1 %b, i1 false -  %or = select i1 %and1, i1 true, i1 %and2 +  %and2 = select i1 %not, i1 %b, i1 false, !prof !1 +  %or = select i1 %and1, i1 true, i1 %and2, !prof !2    ret i1 %or  } @@ -493,15 +493,15 @@ define i1 @bools2_logical_commute0_and2(i1 %a, i1 %b, i1 %c) {    ret i1 %or  } -define i1 @bools2_logical_commute0_and1_and2(i1 %a, i1 %b, i1 %c) { +define i1 @bools2_logical_commute0_and1_and2(i1 %a, i1 %b, i1 %c) !prof !0 {  ; CHECK-LABEL: @bools2_logical_commute0_and1_and2( -; CHECK-NEXT:    [[OR:%.*]] = select i1 [[C:%.*]], i1 [[A:%.*]], i1 [[B:%.*]] +; CHECK-NEXT:    [[OR:%.*]] = select i1 [[C:%.*]], i1 [[A:%.*]], i1 [[B:%.*]], !prof [[PROF3]]  ; CHECK-NEXT:    ret i1 [[OR]]  ;    %not = xor i1 %c, -1    %and1 = and i1 %c, %a    %and2 = and i1 %not, %b -  %or = select i1 %and1, i1 true, i1 %and2 +  %or = select i1 %and1, i1 true, i1 %and2, !prof !1    ret i1 %or  } @@ -799,8 +799,11 @@ define <2 x i1> @not_logical_and2(i1 %b, <2 x i32> %a) {  !0 = !{!"function_entry_count", i64 1000}  !1 = !{!"branch_weights", i32 2, i32 3} +!2 = !{!"branch_weights", i32 5, i32 7} +!3 = !{!"branch_weights", i32 11, i32 13}  ;.  ; CHECK: [[META0:![0-9]+]] = !{!"function_entry_count", i64 1000}  ; CHECK: [[PROF1]] = !{!"branch_weights", i32 2, i32 3}  ; CHECK: [[PROF2]] = !{!"branch_weights", i32 3, i32 2} +; CHECK: [[PROF3]] = !{!"unknown", !"instcombine"}  ;. diff --git a/llvm/test/Transforms/LoopUnroll/zeroed-branch-weights.ll b/llvm/test/Transforms/LoopUnroll/zeroed-branch-weights.ll new file mode 100644 index 0000000..4d378b0 --- /dev/null +++ b/llvm/test/Transforms/LoopUnroll/zeroed-branch-weights.ll @@ -0,0 +1,30 @@ +; Check that zeroed branch weights do not crash or otherwise break basic +; LoopUnroll behavior when it tries to compute a probability from them. + +; RUN: opt < %s -S -unroll-count=2 -passes='loop-unroll' 2>&1 | FileCheck %s + +define void @test() { +entry: +  br label %loop + +loop: +  br i1 false, label %end, label %loop, !prof !0 + +end: +  ret void +} + +!0 = !{!"branch_weights", i32 0, i32 0} + +; CHECK: define void @test() { +; CHECK: entry: +; CHECK:   br label %loop +; CHECK: loop: +; CHECK:   br i1 false, label %end, label %loop.1, !prof !0 +; CHECK: loop.1: +; CHECK:   br i1 false, label %end, label %loop, !prof !0, !llvm.loop !1 +; CHECK-NOT: loop.2 +; CHECK: end: +; CHECK:   ret void +; CHECK: } +; CHECK: !0 = !{!"branch_weights", i32 0, i32 0} diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/force-target-instruction-cost.ll b/llvm/test/Transforms/LoopVectorize/AArch64/force-target-instruction-cost.ll index bfee39ea..068f82c 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/force-target-instruction-cost.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/force-target-instruction-cost.ll @@ -365,8 +365,8 @@ define void @invalid_legacy_cost(i64 %N, ptr %x) #0 {  ; CHECK:       [[VECTOR_BODY]]:  ; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]  ; CHECK-NEXT:    [[TMP6:%.*]] = alloca i8, i64 0, align 16 -; CHECK-NEXT:    [[TMP7:%.*]] = insertelement <2 x ptr> poison, ptr [[TMP6]], i32 0 -; CHECK-NEXT:    [[TMP8:%.*]] = insertelement <2 x ptr> [[TMP7]], ptr [[TMP6]], i32 1 +; CHECK-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x ptr> poison, ptr [[TMP6]], i64 0 +; CHECK-NEXT:    [[TMP8:%.*]] = shufflevector <2 x ptr> [[BROADCAST_SPLATINSERT]], <2 x ptr> poison, <2 x i32> zeroinitializer  ; CHECK-NEXT:    [[TMP9:%.*]] = getelementptr ptr, ptr [[X]], i64 [[INDEX]]  ; CHECK-NEXT:    store <2 x ptr> [[TMP8]], ptr [[TMP9]], align 8  ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/licm-calls.ll b/llvm/test/Transforms/LoopVectorize/AArch64/licm-calls.ll index ea01489..0a9494e 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/licm-calls.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/licm-calls.ll @@ -10,8 +10,8 @@ define void @licm_replicate_call(double %x, ptr %dst) {  ; CHECK-NEXT:    br label %[[VECTOR_PH:.*]]  ; CHECK:       [[VECTOR_PH]]:  ; CHECK-NEXT:    [[TMP1:%.*]] = tail call double @llvm.pow.f64(double [[X]], double 3.000000e+00) -; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <2 x double> poison, double [[TMP1]], i32 0 -; CHECK-NEXT:    [[TMP3:%.*]] = insertelement <2 x double> [[TMP2]], double [[TMP1]], i32 1 +; CHECK-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x double> poison, double [[TMP1]], i64 0 +; CHECK-NEXT:    [[TMP3:%.*]] = shufflevector <2 x double> [[BROADCAST_SPLATINSERT]], <2 x double> poison, <2 x i32> zeroinitializer  ; CHECK-NEXT:    br label %[[VECTOR_BODY:.*]]  ; CHECK:       [[VECTOR_BODY]]:  ; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/masked-call-scalarize.ll b/llvm/test/Transforms/LoopVectorize/AArch64/masked-call-scalarize.ll index 157b787..3558957 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/masked-call-scalarize.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/masked-call-scalarize.ll @@ -64,9 +64,9 @@ define void @test_widen_exp_v2(ptr noalias %p2, ptr noalias %p, i64 %n) #5 {  ; TFCOMMON-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE6:%.*]] ]  ; TFCOMMON-NEXT:    [[ACTIVE_LANE_MASK:%.*]] = phi <2 x i1> [ [[ACTIVE_LANE_MASK_ENTRY]], [[ENTRY]] ], [ [[ACTIVE_LANE_MASK_NEXT:%.*]], [[PRED_STORE_CONTINUE6]] ]  ; TFCOMMON-NEXT:    [[LD:%.*]] = load double, ptr [[P2:%.*]], align 8 -; TFCOMMON-NEXT:    [[TMP5:%.*]] = tail call double @llvm.exp.f64(double [[LD]]) #[[ATTR3:[0-9]+]] -; TFCOMMON-NEXT:    [[TMP7:%.*]] = insertelement <2 x double> poison, double [[TMP5]], i32 0 -; TFCOMMON-NEXT:    [[TMP8:%.*]] = insertelement <2 x double> [[TMP7]], double [[TMP5]], i32 1 +; TFCOMMON-NEXT:    [[TMP5:%.*]] = tail call double @llvm.exp.f64(double [[LD]]) #[[ATTR2:[0-9]+]] +; TFCOMMON-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x double> poison, double [[TMP5]], i64 0 +; TFCOMMON-NEXT:    [[TMP8:%.*]] = shufflevector <2 x double> [[BROADCAST_SPLATINSERT]], <2 x double> poison, <2 x i32> zeroinitializer  ; TFCOMMON-NEXT:    [[TMP9:%.*]] = fcmp ogt <2 x double> [[TMP8]], zeroinitializer  ; TFCOMMON-NEXT:    [[PREDPHI:%.*]] = select <2 x i1> [[TMP9]], <2 x double> zeroinitializer, <2 x double> splat (double 1.000000e+00)  ; TFCOMMON-NEXT:    [[TMP16:%.*]] = extractelement <2 x i1> [[ACTIVE_LANE_MASK]], i32 0 @@ -79,7 +79,7 @@ define void @test_widen_exp_v2(ptr noalias %p2, ptr noalias %p, i64 %n) #5 {  ; TFCOMMON-NEXT:    [[TMP14:%.*]] = extractelement <2 x i1> [[ACTIVE_LANE_MASK]], i32 1  ; TFCOMMON-NEXT:    br i1 [[TMP14]], label [[PRED_STORE_IF1:%.*]], label [[PRED_STORE_CONTINUE6]]  ; TFCOMMON:       pred.store.if1: -; TFCOMMON-NEXT:    [[TMP19:%.*]] = extractelement <2 x double> [[PREDPHI]], i32 1 +; TFCOMMON-NEXT:    [[TMP19:%.*]] = extractelement <2 x double> [[PREDPHI]], i32 0  ; TFCOMMON-NEXT:    store double [[TMP19]], ptr [[P]], align 8  ; TFCOMMON-NEXT:    br label [[PRED_STORE_CONTINUE6]]  ; TFCOMMON:       pred.store.continue2: @@ -105,9 +105,9 @@ define void @test_widen_exp_v2(ptr noalias %p2, ptr noalias %p, i64 %n) #5 {  ; TFA_INTERLEAVE-NEXT:    [[ACTIVE_LANE_MASK:%.*]] = phi <2 x i1> [ [[ACTIVE_LANE_MASK_ENTRY]], [[ENTRY]] ], [ [[ACTIVE_LANE_MASK_NEXT:%.*]], [[PRED_STORE_CONTINUE9]] ]  ; TFA_INTERLEAVE-NEXT:    [[ACTIVE_LANE_MASK2:%.*]] = phi <2 x i1> [ [[ACTIVE_LANE_MASK_ENTRY1]], [[ENTRY]] ], [ [[ACTIVE_LANE_MASK_NEXT10:%.*]], [[PRED_STORE_CONTINUE9]] ]  ; TFA_INTERLEAVE-NEXT:    [[TMP4:%.*]] = load double, ptr [[P2:%.*]], align 8 -; TFA_INTERLEAVE-NEXT:    [[TMP9:%.*]] = tail call double @llvm.exp.f64(double [[TMP4]]) #[[ATTR3:[0-9]+]] -; TFA_INTERLEAVE-NEXT:    [[TMP11:%.*]] = insertelement <2 x double> poison, double [[TMP9]], i32 0 -; TFA_INTERLEAVE-NEXT:    [[TMP12:%.*]] = insertelement <2 x double> [[TMP11]], double [[TMP9]], i32 1 +; TFA_INTERLEAVE-NEXT:    [[TMP5:%.*]] = tail call double @llvm.exp.f64(double [[TMP4]]) #[[ATTR2:[0-9]+]] +; TFA_INTERLEAVE-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x double> poison, double [[TMP5]], i64 0 +; TFA_INTERLEAVE-NEXT:    [[TMP12:%.*]] = shufflevector <2 x double> [[BROADCAST_SPLATINSERT]], <2 x double> poison, <2 x i32> zeroinitializer  ; TFA_INTERLEAVE-NEXT:    [[TMP14:%.*]] = fcmp ogt <2 x double> [[TMP12]], zeroinitializer  ; TFA_INTERLEAVE-NEXT:    [[PREDPHI3:%.*]] = select <2 x i1> [[TMP14]], <2 x double> zeroinitializer, <2 x double> splat (double 1.000000e+00)  ; TFA_INTERLEAVE-NEXT:    [[TMP19:%.*]] = extractelement <2 x i1> [[ACTIVE_LANE_MASK]], i32 0 @@ -120,7 +120,7 @@ define void @test_widen_exp_v2(ptr noalias %p2, ptr noalias %p, i64 %n) #5 {  ; TFA_INTERLEAVE-NEXT:    [[TMP29:%.*]] = extractelement <2 x i1> [[ACTIVE_LANE_MASK]], i32 1  ; TFA_INTERLEAVE-NEXT:    br i1 [[TMP29]], label [[PRED_STORE_IF4:%.*]], label [[PRED_STORE_CONTINUE5:%.*]]  ; TFA_INTERLEAVE:       pred.store.if3: -; TFA_INTERLEAVE-NEXT:    [[TMP22:%.*]] = extractelement <2 x double> [[PREDPHI3]], i32 1 +; TFA_INTERLEAVE-NEXT:    [[TMP22:%.*]] = extractelement <2 x double> [[PREDPHI3]], i32 0  ; TFA_INTERLEAVE-NEXT:    store double [[TMP22]], ptr [[P]], align 8  ; TFA_INTERLEAVE-NEXT:    br label [[PRED_STORE_CONTINUE5]]  ; TFA_INTERLEAVE:       pred.store.continue4: @@ -134,7 +134,7 @@ define void @test_widen_exp_v2(ptr noalias %p2, ptr noalias %p, i64 %n) #5 {  ; TFA_INTERLEAVE-NEXT:    [[TMP25:%.*]] = extractelement <2 x i1> [[ACTIVE_LANE_MASK2]], i32 1  ; TFA_INTERLEAVE-NEXT:    br i1 [[TMP25]], label [[PRED_STORE_IF8:%.*]], label [[PRED_STORE_CONTINUE9]]  ; TFA_INTERLEAVE:       pred.store.if7: -; TFA_INTERLEAVE-NEXT:    [[TMP34:%.*]] = extractelement <2 x double> [[PREDPHI3]], i32 1 +; TFA_INTERLEAVE-NEXT:    [[TMP34:%.*]] = extractelement <2 x double> [[PREDPHI3]], i32 0  ; TFA_INTERLEAVE-NEXT:    store double [[TMP34]], ptr [[P]], align 8  ; TFA_INTERLEAVE-NEXT:    br label [[PRED_STORE_CONTINUE9]]  ; TFA_INTERLEAVE:       pred.store.continue8: diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/vplan-printing.ll b/llvm/test/Transforms/LoopVectorize/AArch64/vplan-printing.ll index 49f663f..62e248b 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/vplan-printing.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/vplan-printing.ll @@ -1,12 +1,12 @@  ; REQUIRES: asserts -; RUN: opt -mattr=+neon,+dotprod -passes=loop-vectorize -debug-only=loop-vectorize -force-vector-interleave=1 -enable-epilogue-vectorization -epilogue-vectorization-force-VF=2 -disable-output %s 2>&1 | FileCheck %s +; RUN: opt -passes=loop-vectorize -debug-only=loop-vectorize -disable-output %s 2>&1 | FileCheck %s  target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"  target triple = "aarch64-none-unknown-elf"  ; Tests for printing VPlans that are enabled under AArch64 -define i32 @print_partial_reduction(ptr %a, ptr %b) { +define i32 @print_partial_reduction(ptr %a, ptr %b) "target-features"="+neon,+dotprod" {  ; CHECK:      VPlan 'Initial VPlan for VF={8,16},UF>=1' {  ; CHECK-NEXT: Live-in vp<[[VF:%.]]> = VF  ; CHECK-NEXT: Live-in vp<[[VFxUF:%.]]> = VF * UF @@ -69,60 +69,37 @@ define i32 @print_partial_reduction(ptr %a, ptr %b) {  ; CHECK-NEXT: No successors  ; CHECK-NEXT: }  ; CHECK: VPlan 'Final VPlan for VF={8,16},UF={1}' { +; CHECK-NEXT: Live-in ir<1024> = vector-trip-count  ; CHECK-NEXT: Live-in ir<1024> = original trip-count  ; CHECK-EMPTY:  ; CHECK-NEXT: ir-bb<entry>: -; CHECK-NEXT: Successor(s): ir-bb<scalar.ph>, ir-bb<vector.main.loop.iter.check> +; CHECK-NEXT: Successor(s): vector.ph  ; CHECK-EMPTY: -; CHECK-NEXT: ir-bb<vector.main.loop.iter.check>: -; CHECK-NEXT: Successor(s): ir-bb<scalar.ph>, ir-bb<vector.ph> -; CHECK-EMPTY: -; CHECK-NEXT: ir-bb<vector.ph>: -; CHECK-NEXT:  EMIT vp<[[RDX_START:%.+]]> = reduction-start-vector ir<0>, ir<0>, ir<4> +; CHECK-NEXT: vector.ph: +; CHECK-NEXT:   EMIT vp<%1> = reduction-start-vector ir<0>, ir<0>, ir<4>  ; CHECK-NEXT: Successor(s): vector.body  ; CHECK-EMPTY:  ; CHECK-NEXT: vector.body: -; CHECK-NEXT:   EMIT-SCALAR vp<[[EP_IV:%.+]]> = phi [ ir<0>, ir-bb<vector.ph> ], [ vp<%index.next>, vector.body ] -; CHECK-NEXT:   WIDEN-REDUCTION-PHI ir<%accum> = phi vp<[[RDX_START]]>, ir<%add> (VF scaled by 1/4) -; CHECK-NEXT:   CLONE ir<%gep.a> = getelementptr ir<%a>, vp<[[EP_IV]]> +; CHECK-NEXT:   EMIT-SCALAR vp<%index> = phi [ ir<0>, vector.ph ], [ vp<%index.next>, vector.body ] +; CHECK-NEXT:   WIDEN-REDUCTION-PHI ir<%accum> = phi vp<%1>, ir<%add> (VF scaled by 1/4) +; CHECK-NEXT:   CLONE ir<%gep.a> = getelementptr ir<%a>, vp<%index>  ; CHECK-NEXT:   WIDEN ir<%load.a> = load ir<%gep.a> -; CHECK-NEXT:   CLONE ir<%gep.b> = getelementptr ir<%b>, vp<[[EP_IV]]> +; CHECK-NEXT:   CLONE ir<%gep.b> = getelementptr ir<%b>, vp<%index>  ; CHECK-NEXT:   WIDEN ir<%load.b> = load ir<%gep.b>  ; CHECK-NEXT:   WIDEN-CAST ir<%ext.b> = zext ir<%load.b> to i32  ; CHECK-NEXT:   WIDEN-CAST ir<%ext.a> = zext ir<%load.a> to i32  ; CHECK-NEXT:   WIDEN ir<%mul> = mul ir<%ext.b>, ir<%ext.a>  ; CHECK-NEXT:   PARTIAL-REDUCE ir<%add> = add ir<%accum>, ir<%mul> -; CHECK-NEXT:   EMIT vp<[[EP_IV_NEXT:%.+]]> = add nuw vp<[[EP_IV]]>, ir<16> -; CHECK-NEXT:   EMIT branch-on-count vp<[[EP_IV_NEXT]]>, ir<1024> +; CHECK-NEXT:   EMIT vp<%index.next> = add nuw vp<%index>, ir<16> +; CHECK-NEXT:   EMIT branch-on-count vp<%index.next>, ir<1024>  ; CHECK-NEXT: Successor(s): middle.block, vector.body  ; CHECK-EMPTY:  ; CHECK-NEXT: middle.block: -; CHECK-NEXT:   EMIT vp<[[RED_RESULT:%.+]]> = compute-reduction-result ir<%accum>, ir<%add> -; CHECK-NEXT:   EMIT branch-on-cond ir<true> -; CHECK-NEXT: Successor(s): ir-bb<exit>, ir-bb<scalar.ph> +; CHECK-NEXT:   EMIT vp<%3> = compute-reduction-result ir<%accum>, ir<%add> +; CHECK-NEXT: Successor(s): ir-bb<exit>  ; CHECK-EMPTY:  ; CHECK-NEXT: ir-bb<exit>: -; CHECK-NEXT:   IR   %add.lcssa = phi i32 [ %add, %for.body ] (extra operand: vp<[[RED_RESULT]]> from middle.block) -; CHECK-NEXT: No successors -; CHECK-EMPTY: -; CHECK-NEXT: ir-bb<scalar.ph>: -; CHECK-NEXT:   EMIT-SCALAR vp<[[EP_RESUME:%.+]]> = phi [ ir<1024>, middle.block ], [ ir<0>, ir-bb<entry> ] -; CHECK-NEXT:   EMIT-SCALAR vp<[[EP_MERGE:%.+]]> = phi [ vp<[[RED_RESULT]]>, middle.block ], [ ir<0>, ir-bb<entry> ] -; CHECK-NEXT:   EMIT-SCALAR vp<%6> = resume-for-epilogue vp<%vec.epilog.resume.val> -; CHECK-NEXT: Successor(s): ir-bb<for.body> -; CHECK-EMPTY: -; CHECK-NEXT: ir-bb<for.body>: -; CHECK-NEXT:   IR   %accum = phi i32 [ 0, %scalar.ph ], [ %add, %for.body ] (extra operand: vp<[[EP_MERGE]]> from ir-bb<scalar.ph>) -; CHECK-NEXT:   IR   %gep.a = getelementptr i8, ptr %a, i64 %iv -; CHECK-NEXT:   IR   %load.a = load i8, ptr %gep.a, align 1 -; CHECK-NEXT:   IR   %ext.a = zext i8 %load.a to i32 -; CHECK-NEXT:   IR   %gep.b = getelementptr i8, ptr %b, i64 %iv -; CHECK-NEXT:   IR   %load.b = load i8, ptr %gep.b, align 1 -; CHECK-NEXT:   IR   %ext.b = zext i8 %load.b to i32 -; CHECK-NEXT:   IR   %mul = mul i32 %ext.b, %ext.a -; CHECK-NEXT:   IR   %add = add i32 %mul, %accum -; CHECK-NEXT:   IR   %iv.next = add i64 %iv, 1 -; CHECK-NEXT:   IR   %exitcond.not = icmp eq i64 %iv.next, 1024 +; CHECK-NEXT:   IR   %add.lcssa = phi i32 [ %add, %for.body ] (extra operand: vp<%3> from middle.block)  ; CHECK-NEXT: No successors  ; CHECK-NEXT: }  entry: @@ -141,8 +118,12 @@ for.body:                                         ; preds = %for.body, %entry    %add = add i32 %mul, %accum    %iv.next = add i64 %iv, 1    %exitcond.not = icmp eq i64 %iv.next, 1024 -  br i1 %exitcond.not, label %exit, label %for.body +  br i1 %exitcond.not, label %exit, label %for.body, !llvm.loop !0  exit:    ret i32 %add  } + +!0 = distinct !{!0, !2, !3} +!2 = !{!"llvm.loop.interleave.count", i32 1} +!3 = !{!"llvm.loop.vectorize.predicate.enable", i1 false} diff --git a/llvm/test/Transforms/LoopVectorize/X86/replicate-recipe-with-only-first-lane-used.ll b/llvm/test/Transforms/LoopVectorize/X86/replicate-recipe-with-only-first-lane-used.ll index 03087bb..4590dfc 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/replicate-recipe-with-only-first-lane-used.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/replicate-recipe-with-only-first-lane-used.ll @@ -199,10 +199,8 @@ define float @uniform_load_replicating_select(ptr %A, ptr %B, i64 %1) {  ; CHECK-NEXT:    [[TMP5:%.*]] = add i64 [[INDEX]], 7  ; CHECK-NEXT:    [[TMP6:%.*]] = load float, ptr [[A]], align 4  ; CHECK-NEXT:    [[TMP10:%.*]] = fcmp ogt float [[TMP6]], 0.000000e+00 -; CHECK-NEXT:    [[TMP8:%.*]] = insertelement <4 x i1> poison, i1 [[TMP10]], i32 0 -; CHECK-NEXT:    [[TMP9:%.*]] = insertelement <4 x i1> [[TMP8]], i1 [[TMP10]], i32 1 -; CHECK-NEXT:    [[TMP13:%.*]] = insertelement <4 x i1> [[TMP9]], i1 [[TMP10]], i32 2 -; CHECK-NEXT:    [[TMP14:%.*]] = insertelement <4 x i1> [[TMP13]], i1 [[TMP10]], i32 3 +; CHECK-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i1> poison, i1 [[TMP10]], i64 0 +; CHECK-NEXT:    [[TMP14:%.*]] = shufflevector <4 x i1> [[BROADCAST_SPLATINSERT]], <4 x i1> poison, <4 x i32> zeroinitializer  ; CHECK-NEXT:    [[TMP15:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[TMP2]]  ; CHECK-NEXT:    [[TMP16:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[TMP3]]  ; CHECK-NEXT:    [[TMP17:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[TMP4]] diff --git a/llvm/test/Transforms/LoopVectorize/first-order-recurrence-sink-replicate-region.ll b/llvm/test/Transforms/LoopVectorize/first-order-recurrence-sink-replicate-region.ll index 9deab90..fe230fa 100644 --- a/llvm/test/Transforms/LoopVectorize/first-order-recurrence-sink-replicate-region.ll +++ b/llvm/test/Transforms/LoopVectorize/first-order-recurrence-sink-replicate-region.ll @@ -102,7 +102,7 @@ exit:    ret void  } -define void @sink_replicate_region_2(i32 %x, i8 %y, ptr %ptr) optsize { +define void @sink_replicate_region_2(i32 %x, i8 %y, ptr %ptr, i32 %z) optsize {  ; CHECK-LABEL: sink_replicate_region_2  ; CHECK:      VPlan 'Initial VPlan for VF={2},UF>=1' {  ; CHECK-NEXT: Live-in vp<[[VF:%.+]]> = VF @@ -125,16 +125,18 @@ define void @sink_replicate_region_2(i32 %x, i8 %y, ptr %ptr) optsize {  ; CHECK-NEXT:   ir<%iv> = WIDEN-INDUCTION ir<0>, ir<1>, vp<[[VF]]>  ; CHECK-NEXT:   EMIT vp<[[MASK:%.+]]> = icmp ule ir<%iv>, vp<[[BTC]]>  ; CHECK-NEXT:   EMIT vp<[[SPLICE:%.+]]> = first-order splice ir<%recur>, ir<%recur.next> +; CHECK-NEXT:   WIDEN ir<%cond> = icmp eq ir<%iv>, ir<%z> +; CHECK-NEXT:   EMIT vp<[[AND:%.+]]> = logical-and vp<[[MASK]]>, ir<%cond>  ; CHECK-NEXT:   Successor(s): pred.store  ; CHECK-EMPTY:  ; CHECK-NEXT: <xVFxUF> pred.store: {  ; CHECK-NEXT:  pred.store.entry: -; CHECK-NEXT:    BRANCH-ON-MASK vp<[[MASK]]> +; CHECK-NEXT:    BRANCH-ON-MASK vp<[[AND]]>  ; CHECK-NEXT:  Successor(s): pred.store.if, pred.store.continue  ; CHECK-EMPTY:  ; CHECK-NEXT:  pred.store.if: -; CHECK-NEXT:     vp<[[STEPS:%.+]]> = SCALAR-STEPS vp<[[CAN_IV]]>, ir<1>  ; CHECK-NEXT:     REPLICATE ir<%rem> = srem vp<[[SPLICE]]>, ir<%x> +; CHECK-NEXT:     vp<[[STEPS:%.+]]> = SCALAR-STEPS vp<[[CAN_IV]]>, ir<1>  ; CHECK-NEXT:     REPLICATE ir<%gep> = getelementptr ir<%ptr>, vp<[[STEPS]]>  ; CHECK-NEXT:     REPLICATE ir<%add> = add ir<%rem>, ir<%recur.next>  ; CHECK-NEXT:     REPLICATE store ir<%add>, ir<%gep> @@ -143,9 +145,9 @@ define void @sink_replicate_region_2(i32 %x, i8 %y, ptr %ptr) optsize {  ; CHECK-NEXT:   pred.store.continue:  ; CHECK-NEXT:   No successors  ; CHECK-NEXT: } -; CHECK-NEXT: Successor(s): loop.0 +; CHECK-NEXT: Successor(s): if.1  ; CHECK-EMPTY: -; CHECK-NEXT: loop.0: +; CHECK-NEXT: if.1:  ; CHECK-NEXT:   EMIT vp<[[CAN_IV_NEXT:%.+]]> = add nuw vp<[[CAN_IV]]>, vp<[[VFxUF]]>  ; CHECK-NEXT:   EMIT branch-on-count vp<[[CAN_IV_NEXT]]>, vp<[[VEC_TC]]>  ; CHECK-NEXT: No successors @@ -162,13 +164,20 @@ entry:    br label %loop  loop: -  %recur = phi i32 [ 0, %entry ], [ %recur.next, %loop ] -  %iv = phi i32 [ 0, %entry ], [ %iv.next, %loop ] -  %rem = srem i32 %recur, %x +  %recur = phi i32 [ 0, %entry ], [ %recur.next, %latch ] +  %iv = phi i32 [ 0, %entry ], [ %iv.next, %latch ]    %recur.next = sext i8 %y to i32 +  %cond = icmp eq i32 %iv, %z +  br i1 %cond, label %if, label %latch + +if: +  %rem = srem i32 %recur, %x    %add = add i32 %rem, %recur.next    %gep = getelementptr i32, ptr %ptr, i32 %iv    store i32 %add, ptr %gep +  br label %latch + +latch:    %iv.next = add nsw i32 %iv, 1    %ec = icmp eq i32 %iv.next, 20001    br i1 %ec, label %exit, label %loop diff --git a/llvm/test/Transforms/LoopVectorize/hoist-and-sink-mem-ops-with-invariant-pointers.ll b/llvm/test/Transforms/LoopVectorize/hoist-and-sink-mem-ops-with-invariant-pointers.ll new file mode 100644 index 0000000..8615401 --- /dev/null +++ b/llvm/test/Transforms/LoopVectorize/hoist-and-sink-mem-ops-with-invariant-pointers.ll @@ -0,0 +1,247 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals none --version 6 +; RUN: opt -passes=loop-vectorize -force-vector-width=4 -force-vector-interleave=1 -S %s | FileCheck %s + +define void @hoist_invariant_load_noalias_due_to_memchecks(ptr %dst, ptr %invariant_ptr, i32 %n) { +; CHECK-LABEL: define void @hoist_invariant_load_noalias_due_to_memchecks( +; CHECK-SAME: ptr [[DST:%.*]], ptr [[INVARIANT_PTR:%.*]], i32 [[N:%.*]]) { +; CHECK-NEXT:  [[ENTRY:.*]]: +; CHECK-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[N]], 4 +; CHECK-NEXT:    br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_MEMCHECK:.*]] +; CHECK:       [[VECTOR_MEMCHECK]]: +; CHECK-NEXT:    [[TMP0:%.*]] = add i32 [[N]], -1 +; CHECK-NEXT:    [[TMP1:%.*]] = zext i32 [[TMP0]] to i64 +; CHECK-NEXT:    [[TMP2:%.*]] = shl nuw nsw i64 [[TMP1]], 2 +; CHECK-NEXT:    [[TMP3:%.*]] = add nuw nsw i64 [[TMP2]], 4 +; CHECK-NEXT:    [[SCEVGEP:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP3]] +; CHECK-NEXT:    [[SCEVGEP1:%.*]] = getelementptr i8, ptr [[INVARIANT_PTR]], i64 4 +; CHECK-NEXT:    [[BOUND0:%.*]] = icmp ult ptr [[DST]], [[SCEVGEP1]] +; CHECK-NEXT:    [[BOUND1:%.*]] = icmp ult ptr [[INVARIANT_PTR]], [[SCEVGEP]] +; CHECK-NEXT:    [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND0]], [[BOUND1]] +; CHECK-NEXT:    br i1 [[FOUND_CONFLICT]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]] +; CHECK:       [[VECTOR_PH]]: +; CHECK-NEXT:    [[N_MOD_VF:%.*]] = urem i32 [[N]], 4 +; CHECK-NEXT:    [[N_VEC:%.*]] = sub i32 [[N]], [[N_MOD_VF]] +; CHECK-NEXT:    br label %[[VECTOR_BODY:.*]] +; CHECK:       [[VECTOR_BODY]]: +; CHECK-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT:    [[TMP4:%.*]] = load i32, ptr [[INVARIANT_PTR]], align 4, !alias.scope [[META0:![0-9]+]] +; CHECK-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[TMP4]], i64 0 +; CHECK-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer +; CHECK-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[INDEX]] +; CHECK-NEXT:    store <4 x i32> [[BROADCAST_SPLAT]], ptr [[TMP5]], align 4, !alias.scope [[META3:![0-9]+]], !noalias [[META0]] +; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4 +; CHECK-NEXT:    [[TMP6:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT:    br i1 [[TMP6]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] +; CHECK:       [[MIDDLE_BLOCK]]: +; CHECK-NEXT:    [[CMP_N:%.*]] = icmp eq i32 [[N]], [[N_VEC]] +; CHECK-NEXT:    br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]] +; CHECK:       [[SCALAR_PH]]: +; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i32 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ], [ 0, %[[VECTOR_MEMCHECK]] ] +; CHECK-NEXT:    br label %[[LOOP:.*]] +; CHECK:       [[LOOP]]: +; CHECK-NEXT:    [[IV:%.*]] = phi i32 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] +; CHECK-NEXT:    [[INV_VAL:%.*]] = load i32, ptr [[INVARIANT_PTR]], align 4 +; CHECK-NEXT:    [[GEP:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[IV]] +; CHECK-NEXT:    store i32 [[INV_VAL]], ptr [[GEP]], align 4 +; CHECK-NEXT:    [[IV_NEXT]] = add nuw nsw i32 [[IV]], 1 +; CHECK-NEXT:    [[EC:%.*]] = icmp eq i32 [[IV_NEXT]], [[N]] +; CHECK-NEXT:    br i1 [[EC]], label %[[EXIT]], label %[[LOOP]], !llvm.loop [[LOOP8:![0-9]+]] +; CHECK:       [[EXIT]]: +; CHECK-NEXT:    ret void +; +entry: +  br label %loop + +loop: +  %iv = phi i32 [ 0, %entry ], [ %iv.next, %loop ] +  %inv_val = load i32, ptr %invariant_ptr, align 4 +  %gep = getelementptr inbounds i32, ptr %dst, i32 %iv +  store i32 %inv_val, ptr %gep, align 4 +  %iv.next = add nuw nsw i32 %iv, 1 +  %ec = icmp eq i32 %iv.next, %n +  br i1 %ec, label %exit, label %loop + +exit: +  ret void +} + +; Test that loads with non-invariant addresses are not hoisted. +define void @dont_hoist_variant_address(ptr %dst, ptr %src, i32 %n) { +; CHECK-LABEL: define void @dont_hoist_variant_address( +; CHECK-SAME: ptr [[DST:%.*]], ptr [[SRC:%.*]], i32 [[N:%.*]]) { +; CHECK-NEXT:  [[ENTRY:.*]]: +; CHECK-NEXT:    [[SRC2:%.*]] = ptrtoint ptr [[SRC]] to i64 +; CHECK-NEXT:    [[A1:%.*]] = ptrtoint ptr [[DST]] to i64 +; CHECK-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[N]], 4 +; CHECK-NEXT:    br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_MEMCHECK:.*]] +; CHECK:       [[VECTOR_MEMCHECK]]: +; CHECK-NEXT:    [[TMP0:%.*]] = sub i64 [[A1]], [[SRC2]] +; CHECK-NEXT:    [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP0]], 16 +; CHECK-NEXT:    br i1 [[DIFF_CHECK]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]] +; CHECK:       [[VECTOR_PH]]: +; CHECK-NEXT:    [[N_MOD_VF:%.*]] = urem i32 [[N]], 4 +; CHECK-NEXT:    [[N_VEC:%.*]] = sub i32 [[N]], [[N_MOD_VF]] +; CHECK-NEXT:    br label %[[VECTOR_BODY:.*]] +; CHECK:       [[VECTOR_BODY]]: +; CHECK-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i32 [[INDEX]] +; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP1]], align 4 +; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[INDEX]] +; CHECK-NEXT:    store <4 x i32> [[WIDE_LOAD]], ptr [[TMP2]], align 4 +; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4 +; CHECK-NEXT:    [[TMP3:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT:    br i1 [[TMP3]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]] +; CHECK:       [[MIDDLE_BLOCK]]: +; CHECK-NEXT:    [[CMP_N:%.*]] = icmp eq i32 [[N]], [[N_VEC]] +; CHECK-NEXT:    br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]] +; CHECK:       [[SCALAR_PH]]: +; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i32 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ], [ 0, %[[VECTOR_MEMCHECK]] ] +; CHECK-NEXT:    br label %[[LOOP:.*]] +; CHECK:       [[LOOP]]: +; CHECK-NEXT:    [[IV:%.*]] = phi i32 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] +; CHECK-NEXT:    [[GEP_SRC:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i32 [[IV]] +; CHECK-NEXT:    [[VAL:%.*]] = load i32, ptr [[GEP_SRC]], align 4 +; CHECK-NEXT:    [[GEP_DST:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[IV]] +; CHECK-NEXT:    store i32 [[VAL]], ptr [[GEP_DST]], align 4 +; CHECK-NEXT:    [[IV_NEXT]] = add nuw nsw i32 [[IV]], 1 +; CHECK-NEXT:    [[EC:%.*]] = icmp eq i32 [[IV_NEXT]], [[N]] +; CHECK-NEXT:    br i1 [[EC]], label %[[EXIT]], label %[[LOOP]], !llvm.loop [[LOOP10:![0-9]+]] +; CHECK:       [[EXIT]]: +; CHECK-NEXT:    ret void +; +entry: +  br label %loop + +loop: +  %iv = phi i32 [ 0, %entry ], [ %iv.next, %loop ] +  %gep.src = getelementptr inbounds i32, ptr %src, i32 %iv +  %val = load i32, ptr %gep.src, align 4 +  %gep.dst = getelementptr inbounds i32, ptr %dst, i32 %iv +  store i32 %val, ptr %gep.dst, align 4 +  %iv.next = add nuw nsw i32 %iv, 1 +  %ec = icmp eq i32 %iv.next, %n +  br i1 %ec, label %exit, label %loop + +exit: +  ret void +} + +; Test that predicated loads are not hoisted. +define void @dont_hoist_predicated_load(ptr %dst, ptr %invariant_ptr, ptr %cond_ptr, i32 %n) { +; CHECK-LABEL: define void @dont_hoist_predicated_load( +; CHECK-SAME: ptr [[DST:%.*]], ptr [[INVARIANT_PTR:%.*]], ptr [[COND_PTR:%.*]], i32 [[N:%.*]]) { +; CHECK-NEXT:  [[ENTRY:.*]]: +; CHECK-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[N]], 4 +; CHECK-NEXT:    br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_MEMCHECK:.*]] +; CHECK:       [[VECTOR_MEMCHECK]]: +; CHECK-NEXT:    [[TMP5:%.*]] = add i32 [[N]], -1 +; CHECK-NEXT:    [[TMP20:%.*]] = zext i32 [[TMP5]] to i64 +; CHECK-NEXT:    [[TMP22:%.*]] = shl nuw nsw i64 [[TMP20]], 2 +; CHECK-NEXT:    [[TMP3:%.*]] = add nuw nsw i64 [[TMP22]], 4 +; CHECK-NEXT:    [[SCEVGEP:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP3]] +; CHECK-NEXT:    [[SCEVGEP1:%.*]] = getelementptr i8, ptr [[COND_PTR]], i64 [[TMP3]] +; CHECK-NEXT:    [[SCEVGEP2:%.*]] = getelementptr i8, ptr [[INVARIANT_PTR]], i64 4 +; CHECK-NEXT:    [[BOUND0:%.*]] = icmp ult ptr [[DST]], [[SCEVGEP1]] +; CHECK-NEXT:    [[BOUND1:%.*]] = icmp ult ptr [[COND_PTR]], [[SCEVGEP]] +; CHECK-NEXT:    [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND0]], [[BOUND1]] +; CHECK-NEXT:    [[BOUND03:%.*]] = icmp ult ptr [[DST]], [[SCEVGEP2]] +; CHECK-NEXT:    [[BOUND14:%.*]] = icmp ult ptr [[INVARIANT_PTR]], [[SCEVGEP]] +; CHECK-NEXT:    [[FOUND_CONFLICT5:%.*]] = and i1 [[BOUND03]], [[BOUND14]] +; CHECK-NEXT:    [[CONFLICT_RDX:%.*]] = or i1 [[FOUND_CONFLICT]], [[FOUND_CONFLICT5]] +; CHECK-NEXT:    br i1 [[CONFLICT_RDX]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]] +; CHECK:       [[VECTOR_PH]]: +; CHECK-NEXT:    [[N_MOD_VF:%.*]] = urem i32 [[N]], 4 +; CHECK-NEXT:    [[N_VEC:%.*]] = sub i32 [[N]], [[N_MOD_VF]] +; CHECK-NEXT:    br label %[[VECTOR_BODY:.*]] +; CHECK:       [[VECTOR_BODY]]: +; CHECK-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_STORE_CONTINUE11:.*]] ] +; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds i32, ptr [[COND_PTR]], i32 [[INDEX]] +; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP0]], align 4, !alias.scope [[META11:![0-9]+]] +; CHECK-NEXT:    [[TMP1:%.*]] = icmp sgt <4 x i32> [[WIDE_LOAD]], zeroinitializer +; CHECK-NEXT:    [[TMP2:%.*]] = extractelement <4 x i1> [[TMP1]], i32 0 +; CHECK-NEXT:    br i1 [[TMP2]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]] +; CHECK:       [[PRED_STORE_IF]]: +; CHECK-NEXT:    [[TMP7:%.*]] = load i32, ptr [[INVARIANT_PTR]], align 4, !alias.scope [[META14:![0-9]+]] +; CHECK-NEXT:    [[TMP4:%.*]] = add i32 [[INDEX]], 0 +; CHECK-NEXT:    [[TMP9:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[TMP4]] +; CHECK-NEXT:    store i32 [[TMP7]], ptr [[TMP9]], align 4, !alias.scope [[META16:![0-9]+]], !noalias [[META18:![0-9]+]] +; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE]] +; CHECK:       [[PRED_STORE_CONTINUE]]: +; CHECK-NEXT:    [[TMP6:%.*]] = extractelement <4 x i1> [[TMP1]], i32 1 +; CHECK-NEXT:    br i1 [[TMP6]], label %[[PRED_STORE_IF6:.*]], label %[[PRED_STORE_CONTINUE7:.*]] +; CHECK:       [[PRED_STORE_IF6]]: +; CHECK-NEXT:    [[TMP11:%.*]] = load i32, ptr [[INVARIANT_PTR]], align 4, !alias.scope [[META14]] +; CHECK-NEXT:    [[TMP8:%.*]] = add i32 [[INDEX]], 1 +; CHECK-NEXT:    [[TMP13:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[TMP8]] +; CHECK-NEXT:    store i32 [[TMP11]], ptr [[TMP13]], align 4, !alias.scope [[META16]], !noalias [[META18]] +; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE7]] +; CHECK:       [[PRED_STORE_CONTINUE7]]: +; CHECK-NEXT:    [[TMP10:%.*]] = extractelement <4 x i1> [[TMP1]], i32 2 +; CHECK-NEXT:    br i1 [[TMP10]], label %[[PRED_STORE_IF8:.*]], label %[[PRED_STORE_CONTINUE9:.*]] +; CHECK:       [[PRED_STORE_IF8]]: +; CHECK-NEXT:    [[TMP15:%.*]] = load i32, ptr [[INVARIANT_PTR]], align 4, !alias.scope [[META14]] +; CHECK-NEXT:    [[TMP12:%.*]] = add i32 [[INDEX]], 2 +; CHECK-NEXT:    [[TMP17:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[TMP12]] +; CHECK-NEXT:    store i32 [[TMP15]], ptr [[TMP17]], align 4, !alias.scope [[META16]], !noalias [[META18]] +; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE9]] +; CHECK:       [[PRED_STORE_CONTINUE9]]: +; CHECK-NEXT:    [[TMP14:%.*]] = extractelement <4 x i1> [[TMP1]], i32 3 +; CHECK-NEXT:    br i1 [[TMP14]], label %[[PRED_STORE_IF10:.*]], label %[[PRED_STORE_CONTINUE11]] +; CHECK:       [[PRED_STORE_IF10]]: +; CHECK-NEXT:    [[TMP19:%.*]] = load i32, ptr [[INVARIANT_PTR]], align 4, !alias.scope [[META14]] +; CHECK-NEXT:    [[TMP16:%.*]] = add i32 [[INDEX]], 3 +; CHECK-NEXT:    [[TMP21:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[TMP16]] +; CHECK-NEXT:    store i32 [[TMP19]], ptr [[TMP21]], align 4, !alias.scope [[META16]], !noalias [[META18]] +; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE11]] +; CHECK:       [[PRED_STORE_CONTINUE11]]: +; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4 +; CHECK-NEXT:    [[TMP18:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT:    br i1 [[TMP18]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP19:![0-9]+]] +; CHECK:       [[MIDDLE_BLOCK]]: +; CHECK-NEXT:    [[CMP_N:%.*]] = icmp eq i32 [[N]], [[N_VEC]] +; CHECK-NEXT:    br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]] +; CHECK:       [[SCALAR_PH]]: +; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i32 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ], [ 0, %[[VECTOR_MEMCHECK]] ] +; CHECK-NEXT:    br label %[[LOOP:.*]] +; CHECK:       [[LOOP]]: +; CHECK-NEXT:    [[IV:%.*]] = phi i32 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ] +; CHECK-NEXT:    [[GEP_COND:%.*]] = getelementptr inbounds i32, ptr [[COND_PTR]], i32 [[IV]] +; CHECK-NEXT:    [[COND:%.*]] = load i32, ptr [[GEP_COND]], align 4 +; CHECK-NEXT:    [[CMP:%.*]] = icmp sgt i32 [[COND]], 0 +; CHECK-NEXT:    br i1 [[CMP]], label %[[IF_THEN:.*]], label %[[LOOP_LATCH]] +; CHECK:       [[IF_THEN]]: +; CHECK-NEXT:    [[INV_VAL:%.*]] = load i32, ptr [[INVARIANT_PTR]], align 4 +; CHECK-NEXT:    [[GEP:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[IV]] +; CHECK-NEXT:    store i32 [[INV_VAL]], ptr [[GEP]], align 4 +; CHECK-NEXT:    br label %[[LOOP_LATCH]] +; CHECK:       [[LOOP_LATCH]]: +; CHECK-NEXT:    [[IV_NEXT]] = add nuw nsw i32 [[IV]], 1 +; CHECK-NEXT:    [[EC:%.*]] = icmp eq i32 [[IV_NEXT]], [[N]] +; CHECK-NEXT:    br i1 [[EC]], label %[[EXIT]], label %[[LOOP]], !llvm.loop [[LOOP20:![0-9]+]] +; CHECK:       [[EXIT]]: +; CHECK-NEXT:    ret void +; +entry: +  br label %loop + +loop: +  %iv = phi i32 [ 0, %entry ], [ %iv.next, %loop.latch ] +  %gep.cond = getelementptr inbounds i32, ptr %cond_ptr, i32 %iv +  %cond = load i32, ptr %gep.cond, align 4 +  %cmp = icmp sgt i32 %cond, 0 +  br i1 %cmp, label %if.then, label %loop.latch + +if.then: +  %inv_val = load i32, ptr %invariant_ptr, align 4 +  %gep = getelementptr inbounds i32, ptr %dst, i32 %iv +  store i32 %inv_val, ptr %gep, align 4 +  br label %loop.latch + +loop.latch: +  %iv.next = add nuw nsw i32 %iv, 1 +  %ec = icmp eq i32 %iv.next, %n +  br i1 %ec, label %exit, label %loop + +exit: +  ret void +} diff --git a/llvm/test/Transforms/LoopVectorize/vplan-printing-metadata.ll b/llvm/test/Transforms/LoopVectorize/vplan-printing-metadata.ll new file mode 100644 index 0000000..857b913 --- /dev/null +++ b/llvm/test/Transforms/LoopVectorize/vplan-printing-metadata.ll @@ -0,0 +1,100 @@ +; REQUIRES: asserts + +; RUN: opt -passes=loop-vectorize -debug-only=loop-vectorize -force-vector-interleave=1 -force-vector-width=4 -disable-output %s 2>&1 | FileCheck %s + +define void @test_widen_metadata(ptr noalias %A, ptr noalias %B, i32 %n) { +; CHECK-LABEL: Checking a loop in 'test_widen_metadata' +; CHECK:      VPlan 'Initial VPlan for VF={4},UF>=1' { +; CHECK:      <x1> vector loop: { +; CHECK:        vector.body: +; CHECK:          WIDEN ir<%lv> = load vp<{{.*}}> +; CHECK:          WIDEN-CAST ir<%conv> = sitofp ir<%lv> to float +; CHECK:          WIDEN ir<%mul> = fmul ir<%conv>, ir<2.000000e+00> +; CHECK:          WIDEN-CAST ir<%conv.back> = fptosi ir<%mul> to i32 +; CHECK:          WIDEN store vp<{{.*}}>, ir<%conv.back> +; +entry: +  br label %loop + +loop: +  %i = phi i32 [ 0, %entry ], [ %i.next, %loop ] +  %gep.A = getelementptr inbounds i32, ptr %A, i32 %i +  %lv = load i32, ptr %gep.A, align 4, !tbaa !0, !range !6 +  %conv = sitofp i32 %lv to float, !fpmath !5 +  %mul = fmul float %conv, 2.0, !fpmath !5 +  %conv.back = fptosi float %mul to i32 +  %gep.B = getelementptr inbounds i32, ptr %B, i32 %i +  store i32 %conv.back, ptr %gep.B, align 4, !tbaa !0 +  %i.next = add i32 %i, 1 +  %cond = icmp eq i32 %i.next, %n +  br i1 %cond, label %exit, label %loop + +exit: +  ret void +} + +declare float @llvm.sqrt.f32(float) + +define void @test_intrinsic_with_metadata(ptr noalias %A, ptr noalias %B, i32 %n) { +; CHECK-LABEL: Checking a loop in 'test_intrinsic_with_metadata' +; CHECK:      VPlan 'Initial VPlan for VF={4},UF>=1' { +; CHECK:      <x1> vector loop: { +; CHECK:        vector.body: +; CHECK:          WIDEN ir<%lv> = load vp<{{.*}}> +; CHECK:          WIDEN-INTRINSIC ir<%sqrt> = call llvm.sqrt(ir<%lv>) +; CHECK:          WIDEN store vp<{{.*}}>, ir<%sqrt> +; +entry: +  br label %loop + +loop: +  %i = phi i32 [ 0, %entry ], [ %i.next, %loop ] +  %gep.A = getelementptr inbounds float, ptr %A, i32 %i +  %lv = load float, ptr %gep.A, align 4, !tbaa !0 +  %sqrt = call float @llvm.sqrt.f32(float %lv), !fpmath !5 +  %gep.B = getelementptr inbounds float, ptr %B, i32 %i +  store float %sqrt, ptr %gep.B, align 4, !tbaa !0 +  %i.next = add i32 %i, 1 +  %cond = icmp eq i32 %i.next, %n +  br i1 %cond, label %exit, label %loop + +exit: +  ret void +} + +define void @test_widen_with_multiple_metadata(ptr noalias %A, ptr noalias %B, i32 %n) { +; CHECK-LABEL: Checking a loop in 'test_widen_with_multiple_metadata' +; CHECK:      VPlan 'Initial VPlan for VF={4},UF>=1' { +; CHECK:      <x1> vector loop: { +; CHECK:        vector.body: +; CHECK:          WIDEN ir<%lv> = load vp<{{.*}}> +; CHECK:          WIDEN-CAST ir<%conv> = sitofp ir<%lv> to float +; CHECK:          WIDEN ir<%mul> = fmul ir<%conv>, ir<2.000000e+00> +; CHECK:          WIDEN-CAST ir<%conv.back> = fptosi ir<%mul> to i32 +; CHECK:          WIDEN store vp<{{.*}}>, ir<%conv.back> +; +entry: +  br label %loop + +loop: +  %i = phi i32 [ 0, %entry ], [ %i.next, %loop ] +  %gep.A = getelementptr inbounds i32, ptr %A, i32 %i +  %lv = load i32, ptr %gep.A, align 4, !tbaa !0, !range !6 +  %conv = sitofp i32 %lv to float +  %mul = fmul float %conv, 2.0 +  %conv.back = fptosi float %mul to i32 +  %gep.B = getelementptr inbounds i32, ptr %B, i32 %i +  store i32 %conv.back, ptr %gep.B, align 4, !tbaa !0 +  %i.next = add i32 %i, 1 +  %cond = icmp eq i32 %i.next, %n +  br i1 %cond, label %exit, label %loop + +exit: +  ret void +} + +!0 = !{!1, !1, i64 0} +!1 = !{!"float", !2} +!2 = !{!"root"} +!5 = !{float 2.500000e+00} +!6 = !{i32 0, i32 100} diff --git a/llvm/test/Transforms/LoopVectorize/vplan-sink-scalars-and-merge.ll b/llvm/test/Transforms/LoopVectorize/vplan-sink-scalars-and-merge.ll index 994e9c1..2dd6a04 100644 --- a/llvm/test/Transforms/LoopVectorize/vplan-sink-scalars-and-merge.ll +++ b/llvm/test/Transforms/LoopVectorize/vplan-sink-scalars-and-merge.ll @@ -29,11 +29,13 @@ target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f3  ; CHECK-NEXT:   EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION  ; CHECK-NEXT:   ir<%iv> = WIDEN-INDUCTION ir<0>, ir<1>, vp<[[VF]]>  ; CHECK-NEXT:   EMIT vp<[[MASK:%.+]]> = icmp ule ir<%iv>, vp<[[BTC]]> +; CHECK-NEXT:   WIDEN ir<%cond> = icmp eq ir<%iv>, ir<%x> +; CHECK-NEXT:   EMIT vp<[[AND:%.+]]> = logical-and vp<[[MASK]]>, ir<%cond>  ; CHECK-NEXT: Successor(s): pred.store  ; CHECK:      <xVFxUF> pred.store: {  ; CHECK-NEXT:   pred.store.entry: -; CHECK-NEXT:     BRANCH-ON-MASK vp<[[MASK]]> +; CHECK-NEXT:     BRANCH-ON-MASK vp<[[AND]]>  ; CHECK-NEXT:   Successor(s): pred.store.if, pred.store.continue  ; CHECK:      pred.store.if: @@ -50,24 +52,31 @@ target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f3  ; CHECK-NEXT:   No successors  ; CHECK-NEXT: } -; CHECK:      loop.1: +; CHECK:      if.1:  ; CHECK-NEXT:   EMIT vp<[[CAN_IV_NEXT:%.+]]> = add nuw vp<[[CAN_IV]]>, vp<[[VFxUF]]>  ; CHECK-NEXT:   EMIT branch-on-count vp<[[CAN_IV_NEXT]]>, vp<[[VEC_TC]]>  ; CHECK-NEXT: No successors  ; CHECK-NEXT: }  ; -define void @sink1(i32 %k) { +define void @sink1(i32 %k, i32 %x) {  entry:    br label %loop  loop: -  %iv = phi i32 [ 0, %entry ], [ %iv.next, %loop ] +  %iv = phi i32 [ 0, %entry ], [ %iv.next, %latch ] +  %cond = icmp eq i32 %iv, %x +  br i1 %cond, label %if, label %latch + +if:    %gep.b = getelementptr inbounds [2048 x i32], ptr @b, i32 0, i32 %iv    %lv.b  = load i32, ptr %gep.b, align 4    %add = add i32 %lv.b, 10    %mul = mul i32 2, %add    %gep.a = getelementptr inbounds [2048 x i32], ptr @a, i32 0, i32 %iv    store i32 %mul, ptr %gep.a, align 4 +  br label %latch + +latch:    %iv.next = add i32 %iv, 1    %large = icmp sge i32 %iv, 8    %exitcond = icmp eq i32 %iv, %k diff --git a/llvm/test/Transforms/OpenMP/parallel_deletion.ll b/llvm/test/Transforms/OpenMP/parallel_deletion.ll index 67970c4..0b6c4f3 100644 --- a/llvm/test/Transforms/OpenMP/parallel_deletion.ll +++ b/llvm/test/Transforms/OpenMP/parallel_deletion.ll @@ -385,7 +385,7 @@ define internal void @.omp_outlined..4(ptr noalias %.global_tid., ptr noalias %.  ; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..4  ; CHECK-SAME: (ptr noalias nofree noundef nonnull readonly align 4 captures(none) dereferenceable(4) [[DOTGLOBAL_TID_:%.*]], ptr noalias nofree readnone captures(none) [[DOTBOUND_TID_:%.*]], ptr nofree noundef nonnull align 4 captures(none) dereferenceable(4) [[A:%.*]]) {  ; CHECK-NEXT:  entry: -; CHECK-NEXT:    [[TMP:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4 +; CHECK-NEXT:    [[TMP:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4, !invariant.load [[META1:![0-9]+]]  ; CHECK-NEXT:    [[TMP1:%.*]] = call i32 @__kmpc_master(ptr noundef nonnull @[[GLOB0]], i32 [[TMP]])  ; CHECK-NEXT:    [[TMP2:%.*]] = icmp eq i32 [[TMP1]], 0  ; CHECK-NEXT:    br i1 [[TMP2]], label [[OMP_IF_END:%.*]], label [[OMP_IF_THEN:%.*]] @@ -458,7 +458,7 @@ define internal void @.omp_outlined..5(ptr noalias %.global_tid., ptr noalias %.  ; CHECK-SAME: (ptr noalias nofree readonly captures(none) [[DOTGLOBAL_TID_:%.*]], ptr noalias nofree readnone captures(none) [[DOTBOUND_TID_:%.*]], ptr nofree noundef nonnull align 4 captures(none) dereferenceable(4) [[A:%.*]]) {  ; CHECK-NEXT:  entry:  ; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(ptr noundef nonnull @[[GLOB0]]) #[[ATTR19]] -; CHECK-NEXT:    [[TMP:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4 +; CHECK-NEXT:    [[TMP:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4, !invariant.load [[META1]]  ; CHECK-NEXT:    [[TMP1:%.*]] = call i32 @__kmpc_single(ptr noundef nonnull @[[GLOB0]], i32 [[TMP]])  ; CHECK-NEXT:    [[TMP2:%.*]] = icmp eq i32 [[TMP1]], 0  ; CHECK-NEXT:    br i1 [[TMP2]], label [[OMP_IF_END:%.*]], label [[OMP_IF_THEN:%.*]] @@ -534,7 +534,7 @@ define internal void @.omp_outlined..6(ptr noalias %.global_tid., ptr noalias %.  ; CHECK-NEXT:    call void @llvm.lifetime.start.p0(ptr noundef nonnull align 4 [[A1]]) #[[ATTR20:[0-9]+]]  ; CHECK-NEXT:    store i32 1, ptr [[A1]], align 4  ; CHECK-NEXT:    store ptr [[A1]], ptr [[DOTOMP_REDUCTION_RED_LIST]], align 8 -; CHECK-NEXT:    [[TMP2:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4 +; CHECK-NEXT:    [[TMP2:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4, !invariant.load [[META1]]  ; CHECK-NEXT:    [[TMP4:%.*]] = call i32 @__kmpc_reduce_nowait(ptr noundef nonnull @[[GLOB2:[0-9]+]], i32 [[TMP2]], i32 noundef 1, i64 noundef 8, ptr noundef nonnull align 8 [[DOTOMP_REDUCTION_RED_LIST]], ptr noundef nonnull @.omp.reduction.reduction_func, ptr noundef nonnull @.gomp_critical_user_.reduction.var)  ; CHECK-NEXT:    switch i32 [[TMP4]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [  ; CHECK-NEXT:      i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] @@ -646,10 +646,10 @@ define internal void @.omp.reduction.reduction_func(ptr %arg, ptr %arg1) {  ; CHECK-LABEL: define {{[^@]+}}@.omp.reduction.reduction_func  ; CHECK-SAME: (ptr nofree noundef nonnull readonly align 8 captures(none) dereferenceable(8) [[ARG:%.*]], ptr nofree noundef nonnull readonly align 8 captures(none) dereferenceable(8) [[ARG1:%.*]]) #[[ATTR10:[0-9]+]] {  ; CHECK-NEXT:  entry: -; CHECK-NEXT:    [[TMP2:%.*]] = load ptr, ptr [[ARG1]], align 8 -; CHECK-NEXT:    [[TMP4:%.*]] = load ptr, ptr [[ARG]], align 8 -; CHECK-NEXT:    [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 -; CHECK-NEXT:    [[TMP6:%.*]] = load i32, ptr [[TMP2]], align 4 +; CHECK-NEXT:    [[TMP2:%.*]] = load ptr, ptr [[ARG1]], align 8, !invariant.load [[META1]] +; CHECK-NEXT:    [[TMP4:%.*]] = load ptr, ptr [[ARG]], align 8, !invariant.load [[META1]] +; CHECK-NEXT:    [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4, !invariant.load [[META1]] +; CHECK-NEXT:    [[TMP6:%.*]] = load i32, ptr [[TMP2]], align 4, !invariant.load [[META1]]  ; CHECK-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP5]], [[TMP6]]  ; CHECK-NEXT:    store i32 [[ADD]], ptr [[TMP4]], align 4  ; CHECK-NEXT:    ret void diff --git a/llvm/test/Transforms/PhaseOrdering/AArch64/hoist-load-from-vector-loop.ll b/llvm/test/Transforms/PhaseOrdering/AArch64/hoist-load-from-vector-loop.ll new file mode 100644 index 0000000..a35bcf1 --- /dev/null +++ b/llvm/test/Transforms/PhaseOrdering/AArch64/hoist-load-from-vector-loop.ll @@ -0,0 +1,46 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals none --version 6 +; RUN: opt -passes='default<O3>' -S %s | FileCheck %s + +target triple = "arm64-apple-macosx" + +%"class.dealii::VectorizedArray" = type { [4 x double] } + +define void @hoist_invariant_load(ptr %invariant_ptr, i64 %num_elements, ptr %array) { +; CHECK-LABEL: define void @hoist_invariant_load( +; CHECK-SAME: ptr readonly captures(none) [[INVARIANT_PTR:%.*]], i64 [[NUM_ELEMENTS:%.*]], ptr captures(none) [[ARRAY:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] { +; CHECK-NEXT:  [[ENTRY:.*]]: +; CHECK-NEXT:    [[CMP1_NOT:%.*]] = icmp eq i64 [[NUM_ELEMENTS]], 0 +; CHECK-NEXT:    br i1 [[CMP1_NOT]], label %[[EXIT:.*]], label %[[LOOP_LATCH:.*]] +; CHECK:       [[LOOP_LATCH]]: +; CHECK-NEXT:    [[I2:%.*]] = phi i64 [ [[I_NEXT:%.*]], %[[LOOP_LATCH]] ], [ 0, %[[ENTRY]] ] +; CHECK-NEXT:    [[GEP:%.*]] = getelementptr nusw %"class.dealii::VectorizedArray", ptr [[ARRAY]], i64 [[I2]] +; CHECK-NEXT:    [[INVARIANT_VAL:%.*]] = load double, ptr [[INVARIANT_PTR]], align 8 +; CHECK-NEXT:    [[ARRAY_VAL:%.*]] = load double, ptr [[GEP]], align 8 +; CHECK-NEXT:    [[SUM:%.*]] = fadd double [[INVARIANT_VAL]], [[ARRAY_VAL]] +; CHECK-NEXT:    store double [[SUM]], ptr [[GEP]], align 8 +; CHECK-NEXT:    [[I_NEXT]] = add nuw i64 [[I2]], 1 +; CHECK-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i64 [[I_NEXT]], [[NUM_ELEMENTS]] +; CHECK-NEXT:    br i1 [[EXITCOND_NOT]], label %[[EXIT]], label %[[LOOP_LATCH]] +; CHECK:       [[EXIT]]: +; CHECK-NEXT:    ret void +; +entry: +  br label %loop.header + +loop.header:                                      ; preds = %loop.latch, %entry +  %i = phi i64 [ 0, %entry ], [ %i.next, %loop.latch ] +  %cmp = icmp ult i64 %i, %num_elements +  br i1 %cmp, label %loop.latch, label %exit + +loop.latch:                                       ; preds = %loop.header +  %gep = getelementptr nusw %"class.dealii::VectorizedArray", ptr %array, i64 %i +  %invariant_val = load double, ptr %invariant_ptr, align 8 +  %array_val = load double, ptr %gep, align 8 +  %sum = fadd double %array_val, %invariant_val +  store double %sum, ptr %gep, align 8 +  %i.next = add i64 %i, 1 +  br label %loop.header + +exit:                                             ; preds = %loop.header +  ret void +} diff --git a/llvm/test/Transforms/SLPVectorizer/X86/parent-non-schedule-multi-use-in-binop.ll b/llvm/test/Transforms/SLPVectorizer/X86/parent-non-schedule-multi-use-in-binop.ll new file mode 100644 index 0000000..590b0be --- /dev/null +++ b/llvm/test/Transforms/SLPVectorizer/X86/parent-non-schedule-multi-use-in-binop.ll @@ -0,0 +1,40 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6 +; RUN: opt -passes=slp-vectorizer -S --mtriple=x86_64-unknown-linux-gnu < %s | FileCheck %s + +@a = common global [100 x i64] zeroinitializer, align 64 + +define void @test() { +; CHECK-LABEL: define void @test() { +; CHECK-NEXT:  [[ENTRY:.*]]: +; CHECK-NEXT:    [[TMP0:%.*]] = load <2 x i64>, ptr getelementptr inbounds nuw (i8, ptr @a, i64 48), align 8 +; CHECK-NEXT:    [[TMP1:%.*]] = add <2 x i64> [[TMP0]], splat (i64 1) +; CHECK-NEXT:    [[TMP2:%.*]] = shufflevector <2 x i64> [[TMP0]], <2 x i64> [[TMP1]], <2 x i32> <i32 0, i32 3> +; CHECK-NEXT:    [[TMP3:%.*]] = add <2 x i64> [[TMP2]], splat (i64 1) +; CHECK-NEXT:    br i1 false, label %[[LOP_RHSCNT_I_PEEL:.*]], label %[[LAND_END_I_PEEL:.*]] +; CHECK:       [[LOP_RHSCNT_I_PEEL]]: +; CHECK-NEXT:    [[TMP4:%.*]] = or <2 x i64> [[TMP1]], <i64 1, i64 0> +; CHECK-NEXT:    br label %[[LAND_END_I_PEEL]] +; CHECK:       [[LAND_END_I_PEEL]]: +; CHECK-NEXT:    [[TMP5:%.*]] = phi <2 x i64> [ [[TMP3]], %[[ENTRY]] ], [ [[TMP4]], %[[LOP_RHSCNT_I_PEEL]] ] +; CHECK-NEXT:    store <2 x i64> [[TMP5]], ptr getelementptr inbounds nuw (i8, ptr @a, i64 48), align 8 +; CHECK-NEXT:    ret void +; +entry: +  %.promoted104.i = load i64, ptr getelementptr inbounds nuw (i8, ptr @a, i64 56), align 8 +  %.promoted103.i = load i64, ptr getelementptr inbounds nuw (i8, ptr @a, i64 48), align 8 +  %0 = add i64 %.promoted104.i, 1 +  %1 = add i64 %.promoted103.i, 1 +  %2 = add i64 %0, 1 +  br i1 false, label %lop.rhscnt.i.peel, label %land.end.i.peel + +lop.rhscnt.i.peel: +  %3 = or i64 %1, 1 +  br label %land.end.i.peel + +land.end.i.peel: +  %4 = phi i64 [ %2, %entry ], [ %0, %lop.rhscnt.i.peel ] +  %5 = phi i64 [ %1, %entry ], [ %3, %lop.rhscnt.i.peel ] +  store i64 %5, ptr getelementptr inbounds nuw (i8, ptr @a, i64 48), align 8 +  store i64 %4, ptr getelementptr inbounds nuw (i8, ptr @a, i64 56), align 8 +  ret void +} diff --git a/llvm/test/Transforms/SimpleLoopUnswitch/partial-unswitch.ll b/llvm/test/Transforms/SimpleLoopUnswitch/partial-unswitch.ll index 1d89420..8716170 100644 --- a/llvm/test/Transforms/SimpleLoopUnswitch/partial-unswitch.ll +++ b/llvm/test/Transforms/SimpleLoopUnswitch/partial-unswitch.ll @@ -1,14 +1,14 @@ -; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals  ; RUN: opt -passes='loop-mssa(simple-loop-unswitch<nontrivial>),verify<loops>' -S < %s | FileCheck %s  declare void @clobber() -define i32 @partial_unswitch_true_successor(ptr %ptr, i32 %N) { +define i32 @partial_unswitch_true_successor(ptr %ptr, i32 %N) !prof !0 {  ; CHECK-LABEL: @partial_unswitch_true_successor(  ; CHECK-NEXT:  entry:  ; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[PTR:%.*]], align 4  ; CHECK-NEXT:    [[TMP1:%.*]] = icmp eq i32 [[TMP0]], 100 -; CHECK-NEXT:    br i1 [[TMP1]], label [[ENTRY_SPLIT_US:%.*]], label [[ENTRY_SPLIT:%.*]] +; CHECK-NEXT:    br i1 [[TMP1]], label [[ENTRY_SPLIT_US:%.*]], label [[ENTRY_SPLIT:%.*]], !prof [[PROF1:![0-9]+]]  ; CHECK:       entry.split.us:  ; CHECK-NEXT:    br label [[LOOP_HEADER_US:%.*]]  ; CHECK:       loop.header.us: @@ -19,7 +19,7 @@ define i32 @partial_unswitch_true_successor(ptr %ptr, i32 %N) {  ; CHECK:       loop.latch.us:  ; CHECK-NEXT:    [[C_US:%.*]] = icmp ult i32 [[IV_US]], [[N:%.*]]  ; CHECK-NEXT:    [[IV_NEXT_US]] = add i32 [[IV_US]], 1 -; CHECK-NEXT:    br i1 [[C_US]], label [[LOOP_HEADER_US]], label [[EXIT_SPLIT_US:%.*]] +; CHECK-NEXT:    br i1 [[C_US]], label [[LOOP_HEADER_US]], label [[EXIT_SPLIT_US:%.*]], !prof [[PROF2:![0-9]+]]  ; CHECK:       exit.split.us:  ; CHECK-NEXT:    br label [[EXIT:%.*]]  ; CHECK:       entry.split: @@ -28,7 +28,7 @@ define i32 @partial_unswitch_true_successor(ptr %ptr, i32 %N) {  ; CHECK-NEXT:    [[IV:%.*]] = phi i32 [ 0, [[ENTRY_SPLIT]] ], [ [[IV_NEXT:%.*]], [[LOOP_LATCH:%.*]] ]  ; CHECK-NEXT:    [[LV:%.*]] = load i32, ptr [[PTR]], align 4  ; CHECK-NEXT:    [[SC:%.*]] = icmp eq i32 [[LV]], 100 -; CHECK-NEXT:    br i1 [[SC]], label [[NOCLOBBER:%.*]], label [[CLOBBER:%.*]] +; CHECK-NEXT:    br i1 [[SC]], label [[NOCLOBBER:%.*]], label [[CLOBBER:%.*]], !prof [[PROF1]]  ; CHECK:       noclobber:  ; CHECK-NEXT:    br label [[LOOP_LATCH]]  ; CHECK:       clobber: @@ -37,7 +37,7 @@ define i32 @partial_unswitch_true_successor(ptr %ptr, i32 %N) {  ; CHECK:       loop.latch:  ; CHECK-NEXT:    [[C:%.*]] = icmp ult i32 [[IV]], [[N]]  ; CHECK-NEXT:    [[IV_NEXT]] = add i32 [[IV]], 1 -; CHECK-NEXT:    br i1 [[C]], label [[LOOP_HEADER]], label [[EXIT_SPLIT:%.*]], !llvm.loop [[LOOP0:![0-9]+]] +; CHECK-NEXT:    br i1 [[C]], label [[LOOP_HEADER]], label [[EXIT_SPLIT:%.*]], !prof [[PROF2]], !llvm.loop [[LOOP3:![0-9]+]]  ; CHECK:       exit.split:  ; CHECK-NEXT:    br label [[EXIT]]  ; CHECK:       exit: @@ -50,7 +50,7 @@ loop.header:    %iv = phi i32 [ 0, %entry ], [ %iv.next, %loop.latch ]    %lv = load i32, ptr %ptr    %sc = icmp eq i32 %lv, 100 -  br i1 %sc, label %noclobber, label %clobber +  br i1 %sc, label %noclobber, label %clobber, !prof !1  noclobber:    br label %loop.latch @@ -62,7 +62,7 @@ clobber:  loop.latch:    %c = icmp ult i32 %iv, %N    %iv.next = add i32 %iv, 1 -  br i1 %c, label %loop.header, label %exit +  br i1 %c, label %loop.header, label %exit, !prof !2  exit:    ret i32 10 @@ -102,7 +102,7 @@ define i32 @partial_unswitch_false_successor(ptr %ptr, i32 %N) {  ; CHECK:       loop.latch:  ; CHECK-NEXT:    [[C:%.*]] = icmp ult i32 [[IV]], [[N]]  ; CHECK-NEXT:    [[IV_NEXT]] = add i32 [[IV]], 1 -; CHECK-NEXT:    br i1 [[C]], label [[LOOP_HEADER]], label [[EXIT_SPLIT:%.*]], !llvm.loop [[LOOP2:![0-9]+]] +; CHECK-NEXT:    br i1 [[C]], label [[LOOP_HEADER]], label [[EXIT_SPLIT:%.*]], !llvm.loop [[LOOP5:![0-9]+]]  ; CHECK:       exit.split:  ; CHECK-NEXT:    br label [[EXIT]]  ; CHECK:       exit: @@ -171,7 +171,7 @@ define i32 @partial_unswtich_gep_load_icmp(ptr %ptr, i32 %N) {  ; CHECK:       loop.latch:  ; CHECK-NEXT:    [[C:%.*]] = icmp ult i32 [[IV]], [[N]]  ; CHECK-NEXT:    [[IV_NEXT]] = add i32 [[IV]], 1 -; CHECK-NEXT:    br i1 [[C]], label [[LOOP_HEADER]], label [[EXIT_SPLIT:%.*]], !llvm.loop [[LOOP3:![0-9]+]] +; CHECK-NEXT:    br i1 [[C]], label [[LOOP_HEADER]], label [[EXIT_SPLIT:%.*]], !llvm.loop [[LOOP6:![0-9]+]]  ; CHECK:       exit.split:  ; CHECK-NEXT:    br label [[EXIT]]  ; CHECK:       exit: @@ -246,7 +246,7 @@ define i32 @partial_unswitch_reduction_phi(ptr %ptr, i32 %N) {  ; CHECK-NEXT:    [[RED_NEXT]] = phi i32 [ [[ADD_5]], [[CLOBBER]] ], [ [[ADD_10]], [[NOCLOBBER]] ]  ; CHECK-NEXT:    [[C:%.*]] = icmp ult i32 [[IV]], [[N]]  ; CHECK-NEXT:    [[IV_NEXT]] = add i32 [[IV]], 1 -; CHECK-NEXT:    br i1 [[C]], label [[LOOP_HEADER]], label [[EXIT_SPLIT:%.*]], !llvm.loop [[LOOP4:![0-9]+]] +; CHECK-NEXT:    br i1 [[C]], label [[LOOP_HEADER]], label [[EXIT_SPLIT:%.*]], !llvm.loop [[LOOP7:![0-9]+]]  ; CHECK:       exit.split:  ; CHECK-NEXT:    [[RED_NEXT_LCSSA:%.*]] = phi i32 [ [[RED_NEXT]], [[LOOP_LATCH]] ]  ; CHECK-NEXT:    br label [[EXIT]] @@ -325,7 +325,7 @@ define i32 @partial_unswitch_true_successor_noclobber(ptr noalias %ptr.1, ptr no  ; CHECK:       loop.latch:  ; CHECK-NEXT:    [[C:%.*]] = icmp ult i32 [[IV]], [[N]]  ; CHECK-NEXT:    [[IV_NEXT]] = add i32 [[IV]], 1 -; CHECK-NEXT:    br i1 [[C]], label [[LOOP_HEADER]], label [[EXIT_SPLIT:%.*]], !llvm.loop [[LOOP5:![0-9]+]] +; CHECK-NEXT:    br i1 [[C]], label [[LOOP_HEADER]], label [[EXIT_SPLIT:%.*]], !llvm.loop [[LOOP8:![0-9]+]]  ; CHECK:       exit.split:  ; CHECK-NEXT:    br label [[EXIT]]  ; CHECK:       exit: @@ -637,7 +637,7 @@ define i32 @partial_unswitch_true_successor_preheader_insertion(ptr %ptr, i32 %N  ; CHECK:       loop.latch:  ; CHECK-NEXT:    [[C:%.*]] = icmp ult i32 [[IV]], [[N]]  ; CHECK-NEXT:    [[IV_NEXT]] = add i32 [[IV]], 1 -; CHECK-NEXT:    br i1 [[C]], label [[LOOP_HEADER]], label [[EXIT_LOOPEXIT_SPLIT:%.*]], !llvm.loop [[LOOP6:![0-9]+]] +; CHECK-NEXT:    br i1 [[C]], label [[LOOP_HEADER]], label [[EXIT_LOOPEXIT_SPLIT:%.*]], !llvm.loop [[LOOP9:![0-9]+]]  ; CHECK:       exit.loopexit.split:  ; CHECK-NEXT:    br label [[EXIT_LOOPEXIT]]  ; CHECK:       exit.loopexit: @@ -713,7 +713,7 @@ define i32 @partial_unswitch_true_successor_insert_point(ptr %ptr, i32 %N) {  ; CHECK:       loop.latch:  ; CHECK-NEXT:    [[C:%.*]] = icmp ult i32 [[IV]], [[N]]  ; CHECK-NEXT:    [[IV_NEXT]] = add i32 [[IV]], 1 -; CHECK-NEXT:    br i1 [[C]], label [[LOOP_HEADER]], label [[EXIT_SPLIT:%.*]], !llvm.loop [[LOOP7:![0-9]+]] +; CHECK-NEXT:    br i1 [[C]], label [[LOOP_HEADER]], label [[EXIT_SPLIT:%.*]], !llvm.loop [[LOOP10:![0-9]+]]  ; CHECK:       exit.split:  ; CHECK-NEXT:    br label [[EXIT]]  ; CHECK:       exit: @@ -784,7 +784,7 @@ define i32 @partial_unswitch_true_successor_hoist_invariant(ptr %ptr, i32 %N) {  ; CHECK:       loop.latch:  ; CHECK-NEXT:    [[C:%.*]] = icmp ult i32 [[IV]], [[N]]  ; CHECK-NEXT:    [[IV_NEXT]] = add i32 [[IV]], 1 -; CHECK-NEXT:    br i1 [[C]], label [[LOOP_HEADER]], label [[EXIT_SPLIT:%.*]], !llvm.loop [[LOOP8:![0-9]+]] +; CHECK-NEXT:    br i1 [[C]], label [[LOOP_HEADER]], label [[EXIT_SPLIT:%.*]], !llvm.loop [[LOOP11:![0-9]+]]  ; CHECK:       exit.split:  ; CHECK-NEXT:    br label [[EXIT]]  ; CHECK:       exit: @@ -1073,7 +1073,7 @@ define i32 @partial_unswitch_true_to_latch(ptr %ptr, i32 %N) {  ; CHECK:       loop.latch:  ; CHECK-NEXT:    [[C:%.*]] = icmp ult i32 [[IV]], [[N]]  ; CHECK-NEXT:    [[IV_NEXT]] = add i32 [[IV]], 1 -; CHECK-NEXT:    br i1 [[C]], label [[LOOP_HEADER]], label [[EXIT_SPLIT:%.*]], !llvm.loop [[LOOP9:![0-9]+]] +; CHECK-NEXT:    br i1 [[C]], label [[LOOP_HEADER]], label [[EXIT_SPLIT:%.*]], !llvm.loop [[LOOP12:![0-9]+]]  ; CHECK:       exit.split:  ; CHECK-NEXT:    br label [[EXIT]]  ; CHECK:       exit: @@ -1138,7 +1138,7 @@ define i32 @partial_unswitch_exiting_block_with_multiple_unswitch_candidates(i32  ; CHECK-NEXT:    store i32 [[TMP1:%.*]], ptr [[PTR]], align 16  ; CHECK-NEXT:    br label [[EXITING]]  ; CHECK:       exiting: -; CHECK-NEXT:    br i1 [[EXIT_COND]], label [[LOOP]], label [[EXIT_SPLIT:%.*]], !llvm.loop [[LOOP10:![0-9]+]] +; CHECK-NEXT:    br i1 [[EXIT_COND]], label [[LOOP]], label [[EXIT_SPLIT:%.*]], !llvm.loop [[LOOP13:![0-9]+]]  ; CHECK:       exit.split:  ; CHECK-NEXT:    [[RET_VAL:%.*]] = phi i32 [ 1, [[EXITING]] ]  ; CHECK-NEXT:    br label [[EXIT]] @@ -1249,7 +1249,7 @@ define i32 @partial_unswitch_true_successor_for_cost_calculation(ptr %ptr, i32 %  ; CHECK:       loop.latch:  ; CHECK-NEXT:    [[C:%.*]] = icmp ult i32 [[IV]], [[N]]  ; CHECK-NEXT:    [[IV_NEXT]] = add i32 [[IV]], 1 -; CHECK-NEXT:    br i1 [[C]], label [[LOOP_HEADER]], label [[EXIT_SPLIT:%.*]], !llvm.loop [[LOOP11:![0-9]+]] +; CHECK-NEXT:    br i1 [[C]], label [[LOOP_HEADER]], label [[EXIT_SPLIT:%.*]], !llvm.loop [[LOOP14:![0-9]+]]  ; CHECK:       exit.split:  ; CHECK-NEXT:    br label [[EXIT]]  ; CHECK:       exit: @@ -1360,7 +1360,7 @@ define i32 @partial_unswitch_true_successor_trunc(ptr %ptr, i32 %N) {  ; CHECK:       loop.latch:  ; CHECK-NEXT:    [[C:%.*]] = icmp ult i32 [[IV]], [[N]]  ; CHECK-NEXT:    [[IV_NEXT]] = add i32 [[IV]], 1 -; CHECK-NEXT:    br i1 [[C]], label [[LOOP_HEADER]], label [[EXIT_SPLIT:%.*]], !llvm.loop [[LOOP12:![0-9]+]] +; CHECK-NEXT:    br i1 [[C]], label [[LOOP_HEADER]], label [[EXIT_SPLIT:%.*]], !llvm.loop [[LOOP15:![0-9]+]]  ; CHECK:       exit.split:  ; CHECK-NEXT:    br label [[EXIT]]  ; CHECK:       exit: @@ -1425,7 +1425,7 @@ define i32 @partial_unswitch_false_successor_trunc(ptr %ptr, i32 %N) {  ; CHECK:       loop.latch:  ; CHECK-NEXT:    [[C:%.*]] = icmp ult i32 [[IV]], [[N]]  ; CHECK-NEXT:    [[IV_NEXT]] = add i32 [[IV]], 1 -; CHECK-NEXT:    br i1 [[C]], label [[LOOP_HEADER]], label [[EXIT_SPLIT:%.*]], !llvm.loop [[LOOP13:![0-9]+]] +; CHECK-NEXT:    br i1 [[C]], label [[LOOP_HEADER]], label [[EXIT_SPLIT:%.*]], !llvm.loop [[LOOP16:![0-9]+]]  ; CHECK:       exit.split:  ; CHECK-NEXT:    br label [[EXIT]]  ; CHECK:       exit: @@ -1456,15 +1456,26 @@ exit:    ret i32 10  } -; CHECK: [[LOOP0]] = distinct !{[[LOOP0]], [[UNSWITCH_PARTIAL_DISABLE:![0-9]+]]} -; CHECK: [[UNSWITCH_PARTIAL_DISABLE]] = !{!"llvm.loop.unswitch.partial.disable"} -; CHECK: [[LOOP2]] = distinct !{[[LOOP2]], [[UNSWITCH_PARTIAL_DISABLE]]} -; CHECK: [[LOOP3]] = distinct !{[[LOOP3]], [[UNSWITCH_PARTIAL_DISABLE]]} -; CHECK: [[LOOP4]] = distinct !{[[LOOP4]], [[UNSWITCH_PARTIAL_DISABLE]]} -; CHECK: [[LOOP5]] = distinct !{[[LOOP5]], [[UNSWITCH_PARTIAL_DISABLE]]} -; CHECK: [[LOOP6]] = distinct !{[[LOOP6]], [[UNSWITCH_PARTIAL_DISABLE]]} -; CHECK: [[LOOP7]] = distinct !{[[LOOP7]], [[UNSWITCH_PARTIAL_DISABLE]]} -; CHECK: [[LOOP8]] = distinct !{[[LOOP8]], [[UNSWITCH_PARTIAL_DISABLE]]} -; CHECK: [[LOOP9]] = distinct !{[[LOOP9]], [[UNSWITCH_PARTIAL_DISABLE]]} -; CHECK: [[LOOP10]] = distinct !{[[LOOP10]], [[UNSWITCH_PARTIAL_DISABLE]]} -; CHECK: [[LOOP11]] = distinct !{[[LOOP11]], [[UNSWITCH_PARTIAL_DISABLE]]} +!0 = !{!"function_entry_count", i32 10} +!1 = !{!"branch_weights", i32 1000, i32 1} +!2 = !{!"branch_weights", i32 100, i32 3} + +;. +; CHECK: [[META0:![0-9]+]] = !{!"function_entry_count", i32 10} +; CHECK: [[PROF1]] = !{!"branch_weights", i32 1000, i32 1} +; CHECK: [[PROF2]] = !{!"branch_weights", i32 100, i32 3} +; CHECK: [[LOOP3]] = distinct !{[[LOOP3]], [[META4:![0-9]+]]} +; CHECK: [[META4]] = !{!"llvm.loop.unswitch.partial.disable"} +; CHECK: [[LOOP5]] = distinct !{[[LOOP5]], [[META4]]} +; CHECK: [[LOOP6]] = distinct !{[[LOOP6]], [[META4]]} +; CHECK: [[LOOP7]] = distinct !{[[LOOP7]], [[META4]]} +; CHECK: [[LOOP8]] = distinct !{[[LOOP8]], [[META4]]} +; CHECK: [[LOOP9]] = distinct !{[[LOOP9]], [[META4]]} +; CHECK: [[LOOP10]] = distinct !{[[LOOP10]], [[META4]]} +; CHECK: [[LOOP11]] = distinct !{[[LOOP11]], [[META4]]} +; CHECK: [[LOOP12]] = distinct !{[[LOOP12]], [[META4]]} +; CHECK: [[LOOP13]] = distinct !{[[LOOP13]], [[META4]]} +; CHECK: [[LOOP14]] = distinct !{[[LOOP14]], [[META4]]} +; CHECK: [[LOOP15]] = distinct !{[[LOOP15]], [[META4]]} +; CHECK: [[LOOP16]] = distinct !{[[LOOP16]], [[META4]]} +;. diff --git a/llvm/test/Transforms/StructurizeCFG/callbr.ll b/llvm/test/Transforms/StructurizeCFG/callbr.ll new file mode 100644 index 0000000..42f9519 --- /dev/null +++ b/llvm/test/Transforms/StructurizeCFG/callbr.ll @@ -0,0 +1,235 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 +; RUN: opt -S -passes=structurizecfg %s -o - | FileCheck %s + +; Structurize as usual, but don't tear callbr and its destination blocks apart. +; +; Note: currently, callbr blocks and their corresponding target blocks +; themselves are not handled by the structurizer.* If the CFG turns out to be +; unstructured at the end, the CFG lowering (si-annotate-control-flow) will +; detect this. For the currently intended use cases of callbr in the context of +; the AMDGPU backend, this is not a limitation (cf. +; https://discourse.llvm.org/t/rfc-add-callbr-intrinsic-support/86087). +; +; Note 2: while callbr and its targets remain untouched, everything else is +; handled as usual, even if it is nested in a callbr region. +; +; *FIXME: this will be fixed in the future. Callbr can be handled as follows: +; Input IR: +; ``` +; define void @foo_callbr() { +;   callbr void asm "", "!i"() to label %fallthrough [label %indirect, ...] +; fallthrough: +;   br label %exit +; indirect: +;   br label %exit +; ... +; exit: +;   ret void +; } +; ``` +; +; Output IR: +; ``` +; define void @foo_callbr() { +;   callbr void asm "", "!i"() +;          to label %fallthrough [label %fake.indirect, label %fake.indirect1, label %fake.indirect2, ...] +; fake.indirect:                                    ; preds = %0 +;   br label %Flow +; fake.indirect1:                                   ; preds = %0 +;   br label %Flow +; fake.indirect2:                                   ; preds = %0 +;   br label %Flow +; ... +; Flow:                                             ; preds = %fallthrough, %fake.indirect[0-N] +;   %1 = phi i1 [ false, %fallthrough ], [ true, %fake.indirect ], [ false, %fake.indirect[1-N] ] +;   br i1 %1, label %indirect, label %Flow1 +; Flow1:                                            ; preds = %Flow, %indirect +;   %2 = phi i1 [ false, %Flow], [ true, %fake.indirect1 ], [ false, %indirect ] +;   br i1 %2, label %indirect1, label %Flow2 +; Flow2:                                            ; preds = %Flow, %indirect1 +;   %2 = phi i1 [ false, %Flow], [ true, %fake.indirect2 ], [ false, %indirect1 ] +;   br i1 %2, label %indirect2, label %Flow3 +; ... +; fallthrough:                                      ; preds = %0 +;   br label %Flow +; indirect:                                         ; preds = %Flow +;   br label %Flow1 +; indirect1:                                        ; preds = %Flow1 +;   br label %Flow2 +; indirect2:                                        : preds = %Flow2 +;   br label %Flow3 +; ... +; exit:                                             ; preds = %indirectN, %FlowN +;   ret void +; } +; ``` +; +; Output IR as ASCII-art: +;          %0 +; --------------------- +; |     |     |     | +; v     v     v     v +; f    f.i   f.i1  f.i2 +; |     |     |     | +; v     v     v     v +; --------------------- +;        %Flow +;          |   \ +;          |    %indirect +;          |   / +;       %Flow1 +;          |   \ +;          |    %indirect1 +;          |   / +;       %Flow2 +;          |   \ +;          |    %indirect2 +;          |   / +;        %exit +; + +; Only callbr, nothing to do. +define void @callbr_simple() { +; CHECK-LABEL: define void @callbr_simple() { +; CHECK-NEXT:  [[CALLBR:.*:]] +; CHECK-NEXT:    callbr void asm "", "!i"() +; CHECK-NEXT:            to label %[[INDIRECT:.*]] [label %indirect] +; CHECK:       [[INDIRECT]]: +; CHECK-NEXT:    br label %[[EXIT:.*]] +; CHECK:       [[INDIRECT1:.*:]] +; CHECK-NEXT:    br label %[[EXIT]] +; CHECK:       [[EXIT]]: +; CHECK-NEXT:    ret void +; +callbr: +  callbr void asm "", "!i"() to label %fallthrough [label %indirect] +fallthrough: +  br label %exit +indirect: +  br label %exit +exit: +  ret void +} + +; Callbr nested in non-callbr: non-callbr is transformed +define void @callbr_in_non_callbr(i1 %c) { +; CHECK-LABEL: define void @callbr_in_non_callbr( +; CHECK-SAME: i1 [[C:%.*]]) { +; CHECK-NEXT:    [[C_INV:%.*]] = xor i1 [[C]], true +; CHECK-NEXT:    br i1 [[C_INV]], label %[[NOCALLBR:.*]], label %[[FLOW:.*]] +; CHECK:       [[FLOW]]: +; CHECK-NEXT:    [[TMP1:%.*]] = phi i1 [ false, %[[NOCALLBR]] ], [ true, [[TMP0:%.*]] ] +; CHECK-NEXT:    br i1 [[TMP1]], label %[[CALLBR:.*]], label %[[EXIT:.*]] +; CHECK:       [[CALLBR]]: +; CHECK-NEXT:    callbr void asm "", "!i"() +; CHECK-NEXT:            to label %[[INDIRECT:.*]] [label %indirect] +; CHECK:       [[INDIRECT]]: +; CHECK-NEXT:    br label %[[EXIT]] +; CHECK:       [[INDIRECT1:.*:]] +; CHECK-NEXT:    br label %[[EXIT]] +; CHECK:       [[NOCALLBR]]: +; CHECK-NEXT:    br label %[[FLOW]] +; CHECK:       [[EXIT]]: +; CHECK-NEXT:    ret void +; +  br i1 %c, label %callbr, label %nocallbr +callbr: +  callbr void asm "", "!i"() to label %fallthrough [label %indirect] +fallthrough: +  br label %exit +indirect: +  br label %exit +nocallbr: +  br label %exit +exit: +  ret void +} + +; Callbr parent of non-callbr: non-callbr is transformed +define void @non_callbr_in_callbr(i1 %c) { +; CHECK-LABEL: define void @non_callbr_in_callbr( +; CHECK-SAME: i1 [[C:%.*]]) { +; CHECK-NEXT:    [[C_INV:%.*]] = xor i1 [[C]], true +; CHECK-NEXT:    callbr void asm "", "!i"() +; CHECK-NEXT:            to label %[[INDIRECT:.*]] [label %indirect] +; CHECK:       [[INDIRECT]]: +; CHECK-NEXT:    br i1 [[C_INV]], label %[[FALLTHROUGH2:.*]], label %[[FLOW:.*]] +; CHECK:       [[FLOW]]: +; CHECK-NEXT:    [[TMP1:%.*]] = phi i1 [ false, %[[FALLTHROUGH2]] ], [ true, %[[INDIRECT]] ] +; CHECK-NEXT:    br i1 [[TMP1]], label %[[FALLTHROUGH1:.*]], label %[[FLOW1:.*]] +; CHECK:       [[FALLTHROUGH1]]: +; CHECK-NEXT:    br label %[[FLOW1]] +; CHECK:       [[FALLTHROUGH2]]: +; CHECK-NEXT:    br label %[[FLOW]] +; CHECK:       [[INDIRECT1:.*:]] +; CHECK-NEXT:    br label %[[EXIT:.*]] +; CHECK:       [[FLOW1]]: +; CHECK-NEXT:    br label %[[EXIT]] +; CHECK:       [[EXIT]]: +; CHECK-NEXT:    ret void +; +  callbr void asm "", "!i"() to label %fallthrough [label %indirect] +fallthrough: +  br i1 %c, label %fallthrough1, label %fallthrough2 +fallthrough1: +  br label %exit +fallthrough2: +  br label %exit +indirect: +  br label %exit +exit: +  ret void +} + +; Callbr surrounded by non-callbr: all three regular branches are handled +; correctly +define void @callbr_nested_in_non_callbr(i1 %c, i1 %d, i1 %e, i1 %f) { +; CHECK-LABEL: define void @callbr_nested_in_non_callbr( +; CHECK-SAME: i1 [[C:%.*]], i1 [[D:%.*]], i1 [[E:%.*]], i1 [[F:%.*]]) { +; CHECK-NEXT:    [[C_INV:%.*]] = xor i1 [[C]], true +; CHECK-NEXT:    br i1 [[C_INV]], label %[[NOCALLBR:.*]], label %[[FLOW3:.*]] +; CHECK:       [[FLOW3]]: +; CHECK-NEXT:    [[TMP1:%.*]] = phi i1 [ false, %[[FLOW:.*]] ], [ true, [[TMP0:%.*]] ] +; CHECK-NEXT:    br i1 [[TMP1]], label %[[CALLBR:.*]], label %[[RET:.*]] +; CHECK:       [[CALLBR]]: +; CHECK-NEXT:    callbr void asm "", "!i"() +; CHECK-NEXT:            to label %[[INDIRECT:.*]] [label %indirect] +; CHECK:       [[INDIRECT]]: +; CHECK-NEXT:    br i1 [[D]], label %[[FALLTHROUGH1:.*]], label %[[FLOW2:.*]] +; CHECK:       [[FALLTHROUGH1]]: +; CHECK-NEXT:    br label %[[FLOW2]] +; CHECK:       [[INDIRECT2:.*:]] +; CHECK-NEXT:    br i1 [[E]], label %[[INDIRECT1:.*]], label %[[FLOW1:.*]] +; CHECK:       [[INDIRECT1]]: +; CHECK-NEXT:    br label %[[FLOW1]] +; CHECK:       [[NOCALLBR]]: +; CHECK-NEXT:    br i1 [[F]], label %[[NOCALLBR1:.*]], label %[[FLOW]] +; CHECK:       [[NOCALLBR1]]: +; CHECK-NEXT:    br label %[[FLOW]] +; CHECK:       [[FLOW]]: +; CHECK-NEXT:    br label %[[FLOW3]] +; CHECK:       [[FLOW1]]: +; CHECK-NEXT:    br label %[[RET]] +; CHECK:       [[FLOW2]]: +; CHECK-NEXT:    br label %[[RET]] +; CHECK:       [[RET]]: +; CHECK-NEXT:    ret void +; +  br i1 %c, label %callbr, label %nocallbr +callbr: +  callbr void asm "", "!i"() to label %fallthrough [label %indirect] +fallthrough: +  br i1 %d, label %fallthrough1, label %ret +fallthrough1: +  br label %ret +indirect: +  br i1 %e, label %indirect1, label %ret +indirect1: +  br label %ret +nocallbr: +  br i1 %f, label %nocallbr1, label %ret +nocallbr1: +  br label %ret +ret: +  ret void +} diff --git a/llvm/test/Transforms/VectorCombine/AMDGPU/extract-insert-chain-to-shuffles.ll b/llvm/test/Transforms/VectorCombine/AMDGPU/extract-insert-chain-to-shuffles.ll new file mode 100644 index 0000000..4b551fa --- /dev/null +++ b/llvm/test/Transforms/VectorCombine/AMDGPU/extract-insert-chain-to-shuffles.ll @@ -0,0 +1,567 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6 +; RUN: opt -S -mtriple=amdgcn-- -mcpu=gfx1100 -passes=vector-combine < %s | FileCheck -check-prefix=OPT %s + +; Generated from amdgpu-promote-alloca on array of vectors +; VectorCombiner should recognize chain of extract-insert vectors +; and turn them into one or two shuffles +define amdgpu_kernel void @extract_insert_chain_to_shuffles(<16 x i8> %in, <16 x i8> %add, ptr addrspace(3) %out) #0 { +; OPT-LABEL: define amdgpu_kernel void @extract_insert_chain_to_shuffles( +; OPT-SAME: <16 x i8> [[IN:%.*]], <16 x i8> [[ADD:%.*]], ptr addrspace(3) [[OUT:%.*]]) #[[ATTR0:[0-9]+]] { +; OPT-NEXT:  [[ENTRY:.*:]] +; OPT-NEXT:    [[ALLOCA:%.*]] = freeze <128 x i8> poison +; OPT-NEXT:    [[TMP0:%.*]] = shufflevector <16 x i8> [[IN]], <16 x i8> poison, <128 x i32> <i32 0, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison> +; OPT-NEXT:    [[TMP1:%.*]] = shufflevector <128 x i8> [[ALLOCA]], <128 x i8> [[TMP0]], <128 x i32> <i32 128, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62, i32 63, i32 64, i32 65, i32 66, i32 67, i32 68, i32 69, i32 70, i32 71, i32 72, i32 73, i32 74, i32 75, i32 76, i32 77, i32 78, i32 79, i32 80, i32 81, i32 82, i32 83, i32 84, i32 85, i32 86, i32 87, i32 88, i32 89, i32 90, i32 91, i32 92, i32 93, i32 94, i32 95, i32 96, i32 97, i32 98, i32 99, i32 100, i32 101, i32 102, i32 103, i32 104, i32 105, i32 106, i32 107, i32 108, i32 109, i32 110, i32 111, i32 112, i32 113, i32 114, i32 115, i32 116, i32 117, i32 118, i32 119, i32 120, i32 121, i32 122, i32 123, i32 124, i32 125, i32 126, i32 127> +; OPT-NEXT:    [[TMP2:%.*]] = extractelement <16 x i8> [[IN]], i64 1 +; OPT-NEXT:    [[TMP3:%.*]] = insertelement <128 x i8> [[TMP1]], i8 [[TMP2]], i32 1 +; OPT-NEXT:    [[TMP4:%.*]] = extractelement <16 x i8> [[IN]], i64 2 +; OPT-NEXT:    [[TMP5:%.*]] = insertelement <128 x i8> [[TMP3]], i8 [[TMP4]], i32 2 +; OPT-NEXT:    [[TMP6:%.*]] = extractelement <16 x i8> [[IN]], i64 3 +; OPT-NEXT:    [[TMP7:%.*]] = insertelement <128 x i8> [[TMP5]], i8 [[TMP6]], i32 3 +; OPT-NEXT:    [[TMP8:%.*]] = extractelement <16 x i8> [[IN]], i64 4 +; OPT-NEXT:    [[TMP9:%.*]] = insertelement <128 x i8> [[TMP7]], i8 [[TMP8]], i32 4 +; OPT-NEXT:    [[TMP10:%.*]] = extractelement <16 x i8> [[IN]], i64 5 +; OPT-NEXT:    [[TMP11:%.*]] = insertelement <128 x i8> [[TMP9]], i8 [[TMP10]], i32 5 +; OPT-NEXT:    [[TMP12:%.*]] = extractelement <16 x i8> [[IN]], i64 6 +; OPT-NEXT:    [[TMP13:%.*]] = insertelement <128 x i8> [[TMP11]], i8 [[TMP12]], i32 6 +; OPT-NEXT:    [[TMP14:%.*]] = extractelement <16 x i8> [[IN]], i64 7 +; OPT-NEXT:    [[TMP15:%.*]] = insertelement <128 x i8> [[TMP13]], i8 [[TMP14]], i32 7 +; OPT-NEXT:    [[TMP16:%.*]] = extractelement <16 x i8> [[IN]], i64 8 +; OPT-NEXT:    [[TMP17:%.*]] = insertelement <128 x i8> [[TMP15]], i8 [[TMP16]], i32 8 +; OPT-NEXT:    [[TMP18:%.*]] = extractelement <16 x i8> [[IN]], i64 9 +; OPT-NEXT:    [[TMP19:%.*]] = insertelement <128 x i8> [[TMP17]], i8 [[TMP18]], i32 9 +; OPT-NEXT:    [[TMP20:%.*]] = extractelement <16 x i8> [[IN]], i64 10 +; OPT-NEXT:    [[TMP21:%.*]] = insertelement <128 x i8> [[TMP19]], i8 [[TMP20]], i32 10 +; OPT-NEXT:    [[TMP22:%.*]] = extractelement <16 x i8> [[IN]], i64 11 +; OPT-NEXT:    [[TMP23:%.*]] = insertelement <128 x i8> [[TMP21]], i8 [[TMP22]], i32 11 +; OPT-NEXT:    [[TMP24:%.*]] = extractelement <16 x i8> [[IN]], i64 12 +; OPT-NEXT:    [[TMP25:%.*]] = insertelement <128 x i8> [[TMP23]], i8 [[TMP24]], i32 12 +; OPT-NEXT:    [[TMP26:%.*]] = extractelement <16 x i8> [[IN]], i64 13 +; OPT-NEXT:    [[TMP27:%.*]] = insertelement <128 x i8> [[TMP25]], i8 [[TMP26]], i32 13 +; OPT-NEXT:    [[TMP28:%.*]] = extractelement <16 x i8> [[IN]], i64 14 +; OPT-NEXT:    [[TMP29:%.*]] = insertelement <128 x i8> [[TMP27]], i8 [[TMP28]], i32 14 +; OPT-NEXT:    [[TMP30:%.*]] = extractelement <16 x i8> [[IN]], i64 15 +; OPT-NEXT:    [[TMP31:%.*]] = insertelement <128 x i8> [[TMP29]], i8 [[TMP30]], i32 15 +; OPT-NEXT:    [[TMP32:%.*]] = shufflevector <16 x i8> [[IN]], <16 x i8> poison, <128 x i32> <i32 0, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison> +; OPT-NEXT:    [[TMP33:%.*]] = shufflevector <128 x i8> [[TMP31]], <128 x i8> [[TMP32]], <128 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 128, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62, i32 63, i32 64, i32 65, i32 66, i32 67, i32 68, i32 69, i32 70, i32 71, i32 72, i32 73, i32 74, i32 75, i32 76, i32 77, i32 78, i32 79, i32 80, i32 81, i32 82, i32 83, i32 84, i32 85, i32 86, i32 87, i32 88, i32 89, i32 90, i32 91, i32 92, i32 93, i32 94, i32 95, i32 96, i32 97, i32 98, i32 99, i32 100, i32 101, i32 102, i32 103, i32 104, i32 105, i32 106, i32 107, i32 108, i32 109, i32 110, i32 111, i32 112, i32 113, i32 114, i32 115, i32 116, i32 117, i32 118, i32 119, i32 120, i32 121, i32 122, i32 123, i32 124, i32 125, i32 126, i32 127> +; OPT-NEXT:    [[TMP34:%.*]] = extractelement <16 x i8> [[IN]], i64 1 +; OPT-NEXT:    [[TMP35:%.*]] = insertelement <128 x i8> [[TMP33]], i8 [[TMP34]], i32 17 +; OPT-NEXT:    [[TMP36:%.*]] = extractelement <16 x i8> [[IN]], i64 2 +; OPT-NEXT:    [[TMP37:%.*]] = insertelement <128 x i8> [[TMP35]], i8 [[TMP36]], i32 18 +; OPT-NEXT:    [[TMP38:%.*]] = extractelement <16 x i8> [[IN]], i64 3 +; OPT-NEXT:    [[TMP39:%.*]] = insertelement <128 x i8> [[TMP37]], i8 [[TMP38]], i32 19 +; OPT-NEXT:    [[TMP40:%.*]] = extractelement <16 x i8> [[IN]], i64 4 +; OPT-NEXT:    [[TMP41:%.*]] = insertelement <128 x i8> [[TMP39]], i8 [[TMP40]], i32 20 +; OPT-NEXT:    [[TMP42:%.*]] = extractelement <16 x i8> [[IN]], i64 5 +; OPT-NEXT:    [[TMP43:%.*]] = insertelement <128 x i8> [[TMP41]], i8 [[TMP42]], i32 21 +; OPT-NEXT:    [[TMP44:%.*]] = extractelement <16 x i8> [[IN]], i64 6 +; OPT-NEXT:    [[TMP45:%.*]] = insertelement <128 x i8> [[TMP43]], i8 [[TMP44]], i32 22 +; OPT-NEXT:    [[TMP46:%.*]] = extractelement <16 x i8> [[IN]], i64 7 +; OPT-NEXT:    [[TMP47:%.*]] = insertelement <128 x i8> [[TMP45]], i8 [[TMP46]], i32 23 +; OPT-NEXT:    [[TMP48:%.*]] = extractelement <16 x i8> [[IN]], i64 8 +; OPT-NEXT:    [[TMP49:%.*]] = insertelement <128 x i8> [[TMP47]], i8 [[TMP48]], i32 24 +; OPT-NEXT:    [[TMP50:%.*]] = extractelement <16 x i8> [[IN]], i64 9 +; OPT-NEXT:    [[TMP51:%.*]] = insertelement <128 x i8> [[TMP49]], i8 [[TMP50]], i32 25 +; OPT-NEXT:    [[TMP52:%.*]] = extractelement <16 x i8> [[IN]], i64 10 +; OPT-NEXT:    [[TMP53:%.*]] = insertelement <128 x i8> [[TMP51]], i8 [[TMP52]], i32 26 +; OPT-NEXT:    [[TMP54:%.*]] = extractelement <16 x i8> [[IN]], i64 11 +; OPT-NEXT:    [[TMP55:%.*]] = insertelement <128 x i8> [[TMP53]], i8 [[TMP54]], i32 27 +; OPT-NEXT:    [[TMP56:%.*]] = extractelement <16 x i8> [[IN]], i64 12 +; OPT-NEXT:    [[TMP57:%.*]] = insertelement <128 x i8> [[TMP55]], i8 [[TMP56]], i32 28 +; OPT-NEXT:    [[TMP58:%.*]] = extractelement <16 x i8> [[IN]], i64 13 +; OPT-NEXT:    [[TMP59:%.*]] = insertelement <128 x i8> [[TMP57]], i8 [[TMP58]], i32 29 +; OPT-NEXT:    [[TMP60:%.*]] = extractelement <16 x i8> [[IN]], i64 14 +; OPT-NEXT:    [[TMP61:%.*]] = insertelement <128 x i8> [[TMP59]], i8 [[TMP60]], i32 30 +; OPT-NEXT:    [[TMP62:%.*]] = extractelement <16 x i8> [[IN]], i64 15 +; OPT-NEXT:    [[TMP63:%.*]] = insertelement <128 x i8> [[TMP61]], i8 [[TMP62]], i32 31 +; OPT-NEXT:    [[TMP64:%.*]] = shufflevector <16 x i8> [[IN]], <16 x i8> poison, <128 x i32> <i32 0, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison> +; OPT-NEXT:    [[TMP65:%.*]] = shufflevector <128 x i8> [[TMP63]], <128 x i8> [[TMP64]], <128 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 128, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62, i32 63, i32 64, i32 65, i32 66, i32 67, i32 68, i32 69, i32 70, i32 71, i32 72, i32 73, i32 74, i32 75, i32 76, i32 77, i32 78, i32 79, i32 80, i32 81, i32 82, i32 83, i32 84, i32 85, i32 86, i32 87, i32 88, i32 89, i32 90, i32 91, i32 92, i32 93, i32 94, i32 95, i32 96, i32 97, i32 98, i32 99, i32 100, i32 101, i32 102, i32 103, i32 104, i32 105, i32 106, i32 107, i32 108, i32 109, i32 110, i32 111, i32 112, i32 113, i32 114, i32 115, i32 116, i32 117, i32 118, i32 119, i32 120, i32 121, i32 122, i32 123, i32 124, i32 125, i32 126, i32 127> +; OPT-NEXT:    [[TMP66:%.*]] = extractelement <16 x i8> [[IN]], i64 1 +; OPT-NEXT:    [[TMP67:%.*]] = insertelement <128 x i8> [[TMP65]], i8 [[TMP66]], i32 33 +; OPT-NEXT:    [[TMP68:%.*]] = extractelement <16 x i8> [[IN]], i64 2 +; OPT-NEXT:    [[TMP69:%.*]] = insertelement <128 x i8> [[TMP67]], i8 [[TMP68]], i32 34 +; OPT-NEXT:    [[TMP70:%.*]] = extractelement <16 x i8> [[IN]], i64 3 +; OPT-NEXT:    [[TMP71:%.*]] = insertelement <128 x i8> [[TMP69]], i8 [[TMP70]], i32 35 +; OPT-NEXT:    [[TMP72:%.*]] = extractelement <16 x i8> [[IN]], i64 4 +; OPT-NEXT:    [[TMP73:%.*]] = insertelement <128 x i8> [[TMP71]], i8 [[TMP72]], i32 36 +; OPT-NEXT:    [[TMP74:%.*]] = extractelement <16 x i8> [[IN]], i64 5 +; OPT-NEXT:    [[TMP75:%.*]] = insertelement <128 x i8> [[TMP73]], i8 [[TMP74]], i32 37 +; OPT-NEXT:    [[TMP76:%.*]] = extractelement <16 x i8> [[IN]], i64 6 +; OPT-NEXT:    [[TMP77:%.*]] = insertelement <128 x i8> [[TMP75]], i8 [[TMP76]], i32 38 +; OPT-NEXT:    [[TMP78:%.*]] = extractelement <16 x i8> [[IN]], i64 7 +; OPT-NEXT:    [[TMP79:%.*]] = insertelement <128 x i8> [[TMP77]], i8 [[TMP78]], i32 39 +; OPT-NEXT:    [[TMP80:%.*]] = extractelement <16 x i8> [[IN]], i64 8 +; OPT-NEXT:    [[TMP81:%.*]] = insertelement <128 x i8> [[TMP79]], i8 [[TMP80]], i32 40 +; OPT-NEXT:    [[TMP82:%.*]] = extractelement <16 x i8> [[IN]], i64 9 +; OPT-NEXT:    [[TMP83:%.*]] = insertelement <128 x i8> [[TMP81]], i8 [[TMP82]], i32 41 +; OPT-NEXT:    [[TMP84:%.*]] = extractelement <16 x i8> [[IN]], i64 10 +; OPT-NEXT:    [[TMP85:%.*]] = insertelement <128 x i8> [[TMP83]], i8 [[TMP84]], i32 42 +; OPT-NEXT:    [[TMP86:%.*]] = extractelement <16 x i8> [[IN]], i64 11 +; OPT-NEXT:    [[TMP87:%.*]] = insertelement <128 x i8> [[TMP85]], i8 [[TMP86]], i32 43 +; OPT-NEXT:    [[TMP88:%.*]] = extractelement <16 x i8> [[IN]], i64 12 +; OPT-NEXT:    [[TMP89:%.*]] = insertelement <128 x i8> [[TMP87]], i8 [[TMP88]], i32 44 +; OPT-NEXT:    [[TMP90:%.*]] = extractelement <16 x i8> [[IN]], i64 13 +; OPT-NEXT:    [[TMP91:%.*]] = insertelement <128 x i8> [[TMP89]], i8 [[TMP90]], i32 45 +; OPT-NEXT:    [[TMP92:%.*]] = extractelement <16 x i8> [[IN]], i64 14 +; OPT-NEXT:    [[TMP93:%.*]] = insertelement <128 x i8> [[TMP91]], i8 [[TMP92]], i32 46 +; OPT-NEXT:    [[TMP94:%.*]] = extractelement <16 x i8> [[IN]], i64 15 +; OPT-NEXT:    [[TMP95:%.*]] = insertelement <128 x i8> [[TMP93]], i8 [[TMP94]], i32 47 +; OPT-NEXT:    [[TMP96:%.*]] = shufflevector <16 x i8> [[IN]], <16 x i8> poison, <128 x i32> <i32 0, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison> +; OPT-NEXT:    [[TMP97:%.*]] = shufflevector <128 x i8> [[TMP95]], <128 x i8> [[TMP96]], <128 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 128, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62, i32 63, i32 64, i32 65, i32 66, i32 67, i32 68, i32 69, i32 70, i32 71, i32 72, i32 73, i32 74, i32 75, i32 76, i32 77, i32 78, i32 79, i32 80, i32 81, i32 82, i32 83, i32 84, i32 85, i32 86, i32 87, i32 88, i32 89, i32 90, i32 91, i32 92, i32 93, i32 94, i32 95, i32 96, i32 97, i32 98, i32 99, i32 100, i32 101, i32 102, i32 103, i32 104, i32 105, i32 106, i32 107, i32 108, i32 109, i32 110, i32 111, i32 112, i32 113, i32 114, i32 115, i32 116, i32 117, i32 118, i32 119, i32 120, i32 121, i32 122, i32 123, i32 124, i32 125, i32 126, i32 127> +; OPT-NEXT:    [[TMP98:%.*]] = extractelement <16 x i8> [[IN]], i64 1 +; OPT-NEXT:    [[TMP99:%.*]] = insertelement <128 x i8> [[TMP97]], i8 [[TMP98]], i32 49 +; OPT-NEXT:    [[TMP100:%.*]] = extractelement <16 x i8> [[IN]], i64 2 +; OPT-NEXT:    [[TMP101:%.*]] = insertelement <128 x i8> [[TMP99]], i8 [[TMP100]], i32 50 +; OPT-NEXT:    [[TMP102:%.*]] = extractelement <16 x i8> [[IN]], i64 3 +; OPT-NEXT:    [[TMP103:%.*]] = insertelement <128 x i8> [[TMP101]], i8 [[TMP102]], i32 51 +; OPT-NEXT:    [[TMP104:%.*]] = extractelement <16 x i8> [[IN]], i64 4 +; OPT-NEXT:    [[TMP105:%.*]] = insertelement <128 x i8> [[TMP103]], i8 [[TMP104]], i32 52 +; OPT-NEXT:    [[TMP106:%.*]] = extractelement <16 x i8> [[IN]], i64 5 +; OPT-NEXT:    [[TMP107:%.*]] = insertelement <128 x i8> [[TMP105]], i8 [[TMP106]], i32 53 +; OPT-NEXT:    [[TMP108:%.*]] = extractelement <16 x i8> [[IN]], i64 6 +; OPT-NEXT:    [[TMP109:%.*]] = insertelement <128 x i8> [[TMP107]], i8 [[TMP108]], i32 54 +; OPT-NEXT:    [[TMP110:%.*]] = extractelement <16 x i8> [[IN]], i64 7 +; OPT-NEXT:    [[TMP111:%.*]] = insertelement <128 x i8> [[TMP109]], i8 [[TMP110]], i32 55 +; OPT-NEXT:    [[TMP112:%.*]] = extractelement <16 x i8> [[IN]], i64 8 +; OPT-NEXT:    [[TMP113:%.*]] = insertelement <128 x i8> [[TMP111]], i8 [[TMP112]], i32 56 +; OPT-NEXT:    [[TMP114:%.*]] = extractelement <16 x i8> [[IN]], i64 9 +; OPT-NEXT:    [[TMP115:%.*]] = insertelement <128 x i8> [[TMP113]], i8 [[TMP114]], i32 57 +; OPT-NEXT:    [[TMP116:%.*]] = extractelement <16 x i8> [[IN]], i64 10 +; OPT-NEXT:    [[TMP117:%.*]] = insertelement <128 x i8> [[TMP115]], i8 [[TMP116]], i32 58 +; OPT-NEXT:    [[TMP118:%.*]] = extractelement <16 x i8> [[IN]], i64 11 +; OPT-NEXT:    [[TMP119:%.*]] = insertelement <128 x i8> [[TMP117]], i8 [[TMP118]], i32 59 +; OPT-NEXT:    [[TMP120:%.*]] = extractelement <16 x i8> [[IN]], i64 12 +; OPT-NEXT:    [[TMP121:%.*]] = insertelement <128 x i8> [[TMP119]], i8 [[TMP120]], i32 60 +; OPT-NEXT:    [[TMP122:%.*]] = extractelement <16 x i8> [[IN]], i64 13 +; OPT-NEXT:    [[TMP123:%.*]] = insertelement <128 x i8> [[TMP121]], i8 [[TMP122]], i32 61 +; OPT-NEXT:    [[TMP124:%.*]] = extractelement <16 x i8> [[IN]], i64 14 +; OPT-NEXT:    [[TMP125:%.*]] = insertelement <128 x i8> [[TMP123]], i8 [[TMP124]], i32 62 +; OPT-NEXT:    [[TMP126:%.*]] = extractelement <16 x i8> [[IN]], i64 15 +; OPT-NEXT:    [[TMP127:%.*]] = insertelement <128 x i8> [[TMP125]], i8 [[TMP126]], i32 63 +; OPT-NEXT:    [[TMP128:%.*]] = shufflevector <16 x i8> [[IN]], <16 x i8> poison, <128 x i32> <i32 0, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison> +; OPT-NEXT:    [[TMP129:%.*]] = shufflevector <128 x i8> [[TMP127]], <128 x i8> [[TMP128]], <128 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62, i32 63, i32 128, i32 65, i32 66, i32 67, i32 68, i32 69, i32 70, i32 71, i32 72, i32 73, i32 74, i32 75, i32 76, i32 77, i32 78, i32 79, i32 80, i32 81, i32 82, i32 83, i32 84, i32 85, i32 86, i32 87, i32 88, i32 89, i32 90, i32 91, i32 92, i32 93, i32 94, i32 95, i32 96, i32 97, i32 98, i32 99, i32 100, i32 101, i32 102, i32 103, i32 104, i32 105, i32 106, i32 107, i32 108, i32 109, i32 110, i32 111, i32 112, i32 113, i32 114, i32 115, i32 116, i32 117, i32 118, i32 119, i32 120, i32 121, i32 122, i32 123, i32 124, i32 125, i32 126, i32 127> +; OPT-NEXT:    [[TMP130:%.*]] = extractelement <16 x i8> [[IN]], i64 1 +; OPT-NEXT:    [[TMP131:%.*]] = insertelement <128 x i8> [[TMP129]], i8 [[TMP130]], i32 65 +; OPT-NEXT:    [[TMP132:%.*]] = extractelement <16 x i8> [[IN]], i64 2 +; OPT-NEXT:    [[TMP133:%.*]] = insertelement <128 x i8> [[TMP131]], i8 [[TMP132]], i32 66 +; OPT-NEXT:    [[TMP134:%.*]] = extractelement <16 x i8> [[IN]], i64 3 +; OPT-NEXT:    [[TMP135:%.*]] = insertelement <128 x i8> [[TMP133]], i8 [[TMP134]], i32 67 +; OPT-NEXT:    [[TMP136:%.*]] = extractelement <16 x i8> [[IN]], i64 4 +; OPT-NEXT:    [[TMP137:%.*]] = insertelement <128 x i8> [[TMP135]], i8 [[TMP136]], i32 68 +; OPT-NEXT:    [[TMP138:%.*]] = extractelement <16 x i8> [[IN]], i64 5 +; OPT-NEXT:    [[TMP139:%.*]] = insertelement <128 x i8> [[TMP137]], i8 [[TMP138]], i32 69 +; OPT-NEXT:    [[TMP140:%.*]] = extractelement <16 x i8> [[IN]], i64 6 +; OPT-NEXT:    [[TMP141:%.*]] = insertelement <128 x i8> [[TMP139]], i8 [[TMP140]], i32 70 +; OPT-NEXT:    [[TMP142:%.*]] = extractelement <16 x i8> [[IN]], i64 7 +; OPT-NEXT:    [[TMP143:%.*]] = insertelement <128 x i8> [[TMP141]], i8 [[TMP142]], i32 71 +; OPT-NEXT:    [[TMP144:%.*]] = extractelement <16 x i8> [[IN]], i64 8 +; OPT-NEXT:    [[TMP145:%.*]] = insertelement <128 x i8> [[TMP143]], i8 [[TMP144]], i32 72 +; OPT-NEXT:    [[TMP146:%.*]] = extractelement <16 x i8> [[IN]], i64 9 +; OPT-NEXT:    [[TMP147:%.*]] = insertelement <128 x i8> [[TMP145]], i8 [[TMP146]], i32 73 +; OPT-NEXT:    [[TMP148:%.*]] = extractelement <16 x i8> [[IN]], i64 10 +; OPT-NEXT:    [[TMP149:%.*]] = insertelement <128 x i8> [[TMP147]], i8 [[TMP148]], i32 74 +; OPT-NEXT:    [[TMP150:%.*]] = extractelement <16 x i8> [[IN]], i64 11 +; OPT-NEXT:    [[TMP151:%.*]] = insertelement <128 x i8> [[TMP149]], i8 [[TMP150]], i32 75 +; OPT-NEXT:    [[TMP152:%.*]] = extractelement <16 x i8> [[IN]], i64 12 +; OPT-NEXT:    [[TMP153:%.*]] = insertelement <128 x i8> [[TMP151]], i8 [[TMP152]], i32 76 +; OPT-NEXT:    [[TMP154:%.*]] = extractelement <16 x i8> [[IN]], i64 13 +; OPT-NEXT:    [[TMP155:%.*]] = insertelement <128 x i8> [[TMP153]], i8 [[TMP154]], i32 77 +; OPT-NEXT:    [[TMP156:%.*]] = extractelement <16 x i8> [[IN]], i64 14 +; OPT-NEXT:    [[TMP157:%.*]] = insertelement <128 x i8> [[TMP155]], i8 [[TMP156]], i32 78 +; OPT-NEXT:    [[TMP158:%.*]] = extractelement <16 x i8> [[IN]], i64 15 +; OPT-NEXT:    [[TMP159:%.*]] = insertelement <128 x i8> [[TMP157]], i8 [[TMP158]], i32 79 +; OPT-NEXT:    [[TMP160:%.*]] = shufflevector <16 x i8> [[IN]], <16 x i8> poison, <128 x i32> <i32 0, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison> +; OPT-NEXT:    [[TMP161:%.*]] = shufflevector <128 x i8> [[TMP159]], <128 x i8> [[TMP160]], <128 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62, i32 63, i32 64, i32 65, i32 66, i32 67, i32 68, i32 69, i32 70, i32 71, i32 72, i32 73, i32 74, i32 75, i32 76, i32 77, i32 78, i32 79, i32 128, i32 81, i32 82, i32 83, i32 84, i32 85, i32 86, i32 87, i32 88, i32 89, i32 90, i32 91, i32 92, i32 93, i32 94, i32 95, i32 96, i32 97, i32 98, i32 99, i32 100, i32 101, i32 102, i32 103, i32 104, i32 105, i32 106, i32 107, i32 108, i32 109, i32 110, i32 111, i32 112, i32 113, i32 114, i32 115, i32 116, i32 117, i32 118, i32 119, i32 120, i32 121, i32 122, i32 123, i32 124, i32 125, i32 126, i32 127> +; OPT-NEXT:    [[TMP162:%.*]] = extractelement <16 x i8> [[IN]], i64 1 +; OPT-NEXT:    [[TMP163:%.*]] = insertelement <128 x i8> [[TMP161]], i8 [[TMP162]], i32 81 +; OPT-NEXT:    [[TMP164:%.*]] = extractelement <16 x i8> [[IN]], i64 2 +; OPT-NEXT:    [[TMP165:%.*]] = insertelement <128 x i8> [[TMP163]], i8 [[TMP164]], i32 82 +; OPT-NEXT:    [[TMP166:%.*]] = extractelement <16 x i8> [[IN]], i64 3 +; OPT-NEXT:    [[TMP167:%.*]] = insertelement <128 x i8> [[TMP165]], i8 [[TMP166]], i32 83 +; OPT-NEXT:    [[TMP168:%.*]] = extractelement <16 x i8> [[IN]], i64 4 +; OPT-NEXT:    [[TMP169:%.*]] = insertelement <128 x i8> [[TMP167]], i8 [[TMP168]], i32 84 +; OPT-NEXT:    [[TMP170:%.*]] = extractelement <16 x i8> [[IN]], i64 5 +; OPT-NEXT:    [[TMP171:%.*]] = insertelement <128 x i8> [[TMP169]], i8 [[TMP170]], i32 85 +; OPT-NEXT:    [[TMP172:%.*]] = extractelement <16 x i8> [[IN]], i64 6 +; OPT-NEXT:    [[TMP173:%.*]] = insertelement <128 x i8> [[TMP171]], i8 [[TMP172]], i32 86 +; OPT-NEXT:    [[TMP174:%.*]] = extractelement <16 x i8> [[IN]], i64 7 +; OPT-NEXT:    [[TMP175:%.*]] = insertelement <128 x i8> [[TMP173]], i8 [[TMP174]], i32 87 +; OPT-NEXT:    [[TMP176:%.*]] = extractelement <16 x i8> [[IN]], i64 8 +; OPT-NEXT:    [[TMP177:%.*]] = insertelement <128 x i8> [[TMP175]], i8 [[TMP176]], i32 88 +; OPT-NEXT:    [[TMP178:%.*]] = extractelement <16 x i8> [[IN]], i64 9 +; OPT-NEXT:    [[TMP179:%.*]] = insertelement <128 x i8> [[TMP177]], i8 [[TMP178]], i32 89 +; OPT-NEXT:    [[TMP180:%.*]] = extractelement <16 x i8> [[IN]], i64 10 +; OPT-NEXT:    [[TMP181:%.*]] = insertelement <128 x i8> [[TMP179]], i8 [[TMP180]], i32 90 +; OPT-NEXT:    [[TMP182:%.*]] = extractelement <16 x i8> [[IN]], i64 11 +; OPT-NEXT:    [[TMP183:%.*]] = insertelement <128 x i8> [[TMP181]], i8 [[TMP182]], i32 91 +; OPT-NEXT:    [[TMP184:%.*]] = extractelement <16 x i8> [[IN]], i64 12 +; OPT-NEXT:    [[TMP185:%.*]] = insertelement <128 x i8> [[TMP183]], i8 [[TMP184]], i32 92 +; OPT-NEXT:    [[TMP186:%.*]] = extractelement <16 x i8> [[IN]], i64 13 +; OPT-NEXT:    [[TMP187:%.*]] = insertelement <128 x i8> [[TMP185]], i8 [[TMP186]], i32 93 +; OPT-NEXT:    [[TMP188:%.*]] = extractelement <16 x i8> [[IN]], i64 14 +; OPT-NEXT:    [[TMP189:%.*]] = insertelement <128 x i8> [[TMP187]], i8 [[TMP188]], i32 94 +; OPT-NEXT:    [[TMP190:%.*]] = extractelement <16 x i8> [[IN]], i64 15 +; OPT-NEXT:    [[TMP191:%.*]] = insertelement <128 x i8> [[TMP189]], i8 [[TMP190]], i32 95 +; OPT-NEXT:    [[TMP192:%.*]] = shufflevector <16 x i8> [[IN]], <16 x i8> poison, <128 x i32> <i32 0, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison> +; OPT-NEXT:    [[TMP193:%.*]] = shufflevector <128 x i8> [[TMP191]], <128 x i8> [[TMP192]], <128 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62, i32 63, i32 64, i32 65, i32 66, i32 67, i32 68, i32 69, i32 70, i32 71, i32 72, i32 73, i32 74, i32 75, i32 76, i32 77, i32 78, i32 79, i32 80, i32 81, i32 82, i32 83, i32 84, i32 85, i32 86, i32 87, i32 88, i32 89, i32 90, i32 91, i32 92, i32 93, i32 94, i32 95, i32 128, i32 97, i32 98, i32 99, i32 100, i32 101, i32 102, i32 103, i32 104, i32 105, i32 106, i32 107, i32 108, i32 109, i32 110, i32 111, i32 112, i32 113, i32 114, i32 115, i32 116, i32 117, i32 118, i32 119, i32 120, i32 121, i32 122, i32 123, i32 124, i32 125, i32 126, i32 127> +; OPT-NEXT:    [[TMP194:%.*]] = extractelement <16 x i8> [[IN]], i64 1 +; OPT-NEXT:    [[TMP195:%.*]] = insertelement <128 x i8> [[TMP193]], i8 [[TMP194]], i32 97 +; OPT-NEXT:    [[TMP196:%.*]] = extractelement <16 x i8> [[IN]], i64 2 +; OPT-NEXT:    [[TMP197:%.*]] = insertelement <128 x i8> [[TMP195]], i8 [[TMP196]], i32 98 +; OPT-NEXT:    [[TMP198:%.*]] = extractelement <16 x i8> [[IN]], i64 3 +; OPT-NEXT:    [[TMP199:%.*]] = insertelement <128 x i8> [[TMP197]], i8 [[TMP198]], i32 99 +; OPT-NEXT:    [[TMP200:%.*]] = extractelement <16 x i8> [[IN]], i64 4 +; OPT-NEXT:    [[TMP201:%.*]] = insertelement <128 x i8> [[TMP199]], i8 [[TMP200]], i32 100 +; OPT-NEXT:    [[TMP202:%.*]] = extractelement <16 x i8> [[IN]], i64 5 +; OPT-NEXT:    [[TMP203:%.*]] = insertelement <128 x i8> [[TMP201]], i8 [[TMP202]], i32 101 +; OPT-NEXT:    [[TMP204:%.*]] = extractelement <16 x i8> [[IN]], i64 6 +; OPT-NEXT:    [[TMP205:%.*]] = insertelement <128 x i8> [[TMP203]], i8 [[TMP204]], i32 102 +; OPT-NEXT:    [[TMP206:%.*]] = extractelement <16 x i8> [[IN]], i64 7 +; OPT-NEXT:    [[TMP207:%.*]] = insertelement <128 x i8> [[TMP205]], i8 [[TMP206]], i32 103 +; OPT-NEXT:    [[TMP208:%.*]] = extractelement <16 x i8> [[IN]], i64 8 +; OPT-NEXT:    [[TMP209:%.*]] = insertelement <128 x i8> [[TMP207]], i8 [[TMP208]], i32 104 +; OPT-NEXT:    [[TMP210:%.*]] = extractelement <16 x i8> [[IN]], i64 9 +; OPT-NEXT:    [[TMP211:%.*]] = insertelement <128 x i8> [[TMP209]], i8 [[TMP210]], i32 105 +; OPT-NEXT:    [[TMP212:%.*]] = extractelement <16 x i8> [[IN]], i64 10 +; OPT-NEXT:    [[TMP213:%.*]] = insertelement <128 x i8> [[TMP211]], i8 [[TMP212]], i32 106 +; OPT-NEXT:    [[TMP214:%.*]] = extractelement <16 x i8> [[IN]], i64 11 +; OPT-NEXT:    [[TMP215:%.*]] = insertelement <128 x i8> [[TMP213]], i8 [[TMP214]], i32 107 +; OPT-NEXT:    [[TMP216:%.*]] = extractelement <16 x i8> [[IN]], i64 12 +; OPT-NEXT:    [[TMP217:%.*]] = insertelement <128 x i8> [[TMP215]], i8 [[TMP216]], i32 108 +; OPT-NEXT:    [[TMP218:%.*]] = extractelement <16 x i8> [[IN]], i64 13 +; OPT-NEXT:    [[TMP219:%.*]] = insertelement <128 x i8> [[TMP217]], i8 [[TMP218]], i32 109 +; OPT-NEXT:    [[TMP220:%.*]] = extractelement <16 x i8> [[IN]], i64 14 +; OPT-NEXT:    [[TMP221:%.*]] = insertelement <128 x i8> [[TMP219]], i8 [[TMP220]], i32 110 +; OPT-NEXT:    [[TMP222:%.*]] = extractelement <16 x i8> [[IN]], i64 15 +; OPT-NEXT:    [[TMP223:%.*]] = insertelement <128 x i8> [[TMP221]], i8 [[TMP222]], i32 111 +; OPT-NEXT:    [[TMP224:%.*]] = shufflevector <16 x i8> [[IN]], <16 x i8> poison, <128 x i32> <i32 0, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison> +; OPT-NEXT:    [[TMP225:%.*]] = shufflevector <128 x i8> [[TMP223]], <128 x i8> [[TMP224]], <128 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62, i32 63, i32 64, i32 65, i32 66, i32 67, i32 68, i32 69, i32 70, i32 71, i32 72, i32 73, i32 74, i32 75, i32 76, i32 77, i32 78, i32 79, i32 80, i32 81, i32 82, i32 83, i32 84, i32 85, i32 86, i32 87, i32 88, i32 89, i32 90, i32 91, i32 92, i32 93, i32 94, i32 95, i32 96, i32 97, i32 98, i32 99, i32 100, i32 101, i32 102, i32 103, i32 104, i32 105, i32 106, i32 107, i32 108, i32 109, i32 110, i32 111, i32 128, i32 113, i32 114, i32 115, i32 116, i32 117, i32 118, i32 119, i32 120, i32 121, i32 122, i32 123, i32 124, i32 125, i32 126, i32 127> +; OPT-NEXT:    [[TMP226:%.*]] = extractelement <16 x i8> [[IN]], i64 1 +; OPT-NEXT:    [[TMP227:%.*]] = insertelement <128 x i8> [[TMP225]], i8 [[TMP226]], i32 113 +; OPT-NEXT:    [[TMP228:%.*]] = extractelement <16 x i8> [[IN]], i64 2 +; OPT-NEXT:    [[TMP229:%.*]] = insertelement <128 x i8> [[TMP227]], i8 [[TMP228]], i32 114 +; OPT-NEXT:    [[TMP230:%.*]] = extractelement <16 x i8> [[IN]], i64 3 +; OPT-NEXT:    [[TMP231:%.*]] = insertelement <128 x i8> [[TMP229]], i8 [[TMP230]], i32 115 +; OPT-NEXT:    [[TMP232:%.*]] = extractelement <16 x i8> [[IN]], i64 4 +; OPT-NEXT:    [[TMP233:%.*]] = insertelement <128 x i8> [[TMP231]], i8 [[TMP232]], i32 116 +; OPT-NEXT:    [[TMP234:%.*]] = extractelement <16 x i8> [[IN]], i64 5 +; OPT-NEXT:    [[TMP235:%.*]] = insertelement <128 x i8> [[TMP233]], i8 [[TMP234]], i32 117 +; OPT-NEXT:    [[TMP236:%.*]] = extractelement <16 x i8> [[IN]], i64 6 +; OPT-NEXT:    [[TMP237:%.*]] = insertelement <128 x i8> [[TMP235]], i8 [[TMP236]], i32 118 +; OPT-NEXT:    [[TMP238:%.*]] = extractelement <16 x i8> [[IN]], i64 7 +; OPT-NEXT:    [[TMP239:%.*]] = insertelement <128 x i8> [[TMP237]], i8 [[TMP238]], i32 119 +; OPT-NEXT:    [[TMP240:%.*]] = extractelement <16 x i8> [[IN]], i64 8 +; OPT-NEXT:    [[TMP241:%.*]] = insertelement <128 x i8> [[TMP239]], i8 [[TMP240]], i32 120 +; OPT-NEXT:    [[TMP242:%.*]] = extractelement <16 x i8> [[IN]], i64 9 +; OPT-NEXT:    [[TMP243:%.*]] = insertelement <128 x i8> [[TMP241]], i8 [[TMP242]], i32 121 +; OPT-NEXT:    [[TMP244:%.*]] = extractelement <16 x i8> [[IN]], i64 10 +; OPT-NEXT:    [[TMP245:%.*]] = insertelement <128 x i8> [[TMP243]], i8 [[TMP244]], i32 122 +; OPT-NEXT:    [[TMP246:%.*]] = extractelement <16 x i8> [[IN]], i64 11 +; OPT-NEXT:    [[TMP247:%.*]] = insertelement <128 x i8> [[TMP245]], i8 [[TMP246]], i32 123 +; OPT-NEXT:    [[TMP248:%.*]] = extractelement <16 x i8> [[IN]], i64 12 +; OPT-NEXT:    [[TMP249:%.*]] = insertelement <128 x i8> [[TMP247]], i8 [[TMP248]], i32 124 +; OPT-NEXT:    [[TMP250:%.*]] = extractelement <16 x i8> [[IN]], i64 13 +; OPT-NEXT:    [[TMP251:%.*]] = insertelement <128 x i8> [[TMP249]], i8 [[TMP250]], i32 125 +; OPT-NEXT:    [[TMP252:%.*]] = extractelement <16 x i8> [[IN]], i64 14 +; OPT-NEXT:    [[TMP253:%.*]] = insertelement <128 x i8> [[TMP251]], i8 [[TMP252]], i32 126 +; OPT-NEXT:    [[TMP254:%.*]] = extractelement <16 x i8> [[IN]], i64 15 +; OPT-NEXT:    [[TMP255:%.*]] = insertelement <128 x i8> [[TMP253]], i8 [[TMP254]], i32 127 +; OPT-NEXT:    [[TMP256:%.*]] = shufflevector <16 x i8> [[IN]], <16 x i8> poison, <16 x i32> <i32 0, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison> +; OPT-NEXT:    [[TMP257:%.*]] = insertelement <16 x i8> [[TMP256]], i8 [[TMP162]], i64 1 +; OPT-NEXT:    [[TMP258:%.*]] = insertelement <16 x i8> [[TMP257]], i8 [[TMP164]], i64 2 +; OPT-NEXT:    [[TMP259:%.*]] = insertelement <16 x i8> [[TMP258]], i8 [[TMP166]], i64 3 +; OPT-NEXT:    [[TMP260:%.*]] = insertelement <16 x i8> [[TMP259]], i8 [[TMP168]], i64 4 +; OPT-NEXT:    [[TMP261:%.*]] = insertelement <16 x i8> [[TMP260]], i8 [[TMP170]], i64 5 +; OPT-NEXT:    [[TMP262:%.*]] = insertelement <16 x i8> [[TMP261]], i8 [[TMP172]], i64 6 +; OPT-NEXT:    [[TMP263:%.*]] = insertelement <16 x i8> [[TMP262]], i8 [[TMP174]], i64 7 +; OPT-NEXT:    [[TMP264:%.*]] = insertelement <16 x i8> [[TMP263]], i8 [[TMP176]], i64 8 +; OPT-NEXT:    [[TMP265:%.*]] = insertelement <16 x i8> [[TMP264]], i8 [[TMP178]], i64 9 +; OPT-NEXT:    [[TMP266:%.*]] = insertelement <16 x i8> [[TMP265]], i8 [[TMP180]], i64 10 +; OPT-NEXT:    [[TMP267:%.*]] = insertelement <16 x i8> [[TMP266]], i8 [[TMP182]], i64 11 +; OPT-NEXT:    [[TMP268:%.*]] = insertelement <16 x i8> [[TMP267]], i8 [[TMP184]], i64 12 +; OPT-NEXT:    [[TMP269:%.*]] = insertelement <16 x i8> [[TMP268]], i8 [[TMP186]], i64 13 +; OPT-NEXT:    [[TMP270:%.*]] = insertelement <16 x i8> [[TMP269]], i8 [[TMP188]], i64 14 +; OPT-NEXT:    [[TMP271:%.*]] = shufflevector <16 x i8> [[TMP270]], <16 x i8> [[IN]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 31> +; OPT-NEXT:    [[SUM:%.*]] = add <16 x i8> [[TMP271]], [[ADD]] +; OPT-NEXT:    store <16 x i8> [[SUM]], ptr addrspace(3) [[OUT]], align 16 +; OPT-NEXT:    ret void +; +entry: +  %alloca = freeze <128 x i8> poison +  %0 = extractelement <16 x i8> %in, i64 0 +  %1 = insertelement <128 x i8> %alloca, i8 %0, i32 0 +  %2 = extractelement <16 x i8> %in, i64 1 +  %3 = insertelement <128 x i8> %1, i8 %2, i32 1 +  %4 = extractelement <16 x i8> %in, i64 2 +  %5 = insertelement <128 x i8> %3, i8 %4, i32 2 +  %6 = extractelement <16 x i8> %in, i64 3 +  %7 = insertelement <128 x i8> %5, i8 %6, i32 3 +  %8 = extractelement <16 x i8> %in, i64 4 +  %9 = insertelement <128 x i8> %7, i8 %8, i32 4 +  %10 = extractelement <16 x i8> %in, i64 5 +  %11 = insertelement <128 x i8> %9, i8 %10, i32 5 +  %12 = extractelement <16 x i8> %in, i64 6 +  %13 = insertelement <128 x i8> %11, i8 %12, i32 6 +  %14 = extractelement <16 x i8> %in, i64 7 +  %15 = insertelement <128 x i8> %13, i8 %14, i32 7 +  %16 = extractelement <16 x i8> %in, i64 8 +  %17 = insertelement <128 x i8> %15, i8 %16, i32 8 +  %18 = extractelement <16 x i8> %in, i64 9 +  %19 = insertelement <128 x i8> %17, i8 %18, i32 9 +  %20 = extractelement <16 x i8> %in, i64 10 +  %21 = insertelement <128 x i8> %19, i8 %20, i32 10 +  %22 = extractelement <16 x i8> %in, i64 11 +  %23 = insertelement <128 x i8> %21, i8 %22, i32 11 +  %24 = extractelement <16 x i8> %in, i64 12 +  %25 = insertelement <128 x i8> %23, i8 %24, i32 12 +  %26 = extractelement <16 x i8> %in, i64 13 +  %27 = insertelement <128 x i8> %25, i8 %26, i32 13 +  %28 = extractelement <16 x i8> %in, i64 14 +  %29 = insertelement <128 x i8> %27, i8 %28, i32 14 +  %30 = extractelement <16 x i8> %in, i64 15 +  %31 = insertelement <128 x i8> %29, i8 %30, i32 15 +  %32 = extractelement <16 x i8> %in, i64 0 +  %33 = insertelement <128 x i8> %31, i8 %32, i32 16 +  %34 = extractelement <16 x i8> %in, i64 1 +  %35 = insertelement <128 x i8> %33, i8 %34, i32 17 +  %36 = extractelement <16 x i8> %in, i64 2 +  %37 = insertelement <128 x i8> %35, i8 %36, i32 18 +  %38 = extractelement <16 x i8> %in, i64 3 +  %39 = insertelement <128 x i8> %37, i8 %38, i32 19 +  %40 = extractelement <16 x i8> %in, i64 4 +  %41 = insertelement <128 x i8> %39, i8 %40, i32 20 +  %42 = extractelement <16 x i8> %in, i64 5 +  %43 = insertelement <128 x i8> %41, i8 %42, i32 21 +  %44 = extractelement <16 x i8> %in, i64 6 +  %45 = insertelement <128 x i8> %43, i8 %44, i32 22 +  %46 = extractelement <16 x i8> %in, i64 7 +  %47 = insertelement <128 x i8> %45, i8 %46, i32 23 +  %48 = extractelement <16 x i8> %in, i64 8 +  %49 = insertelement <128 x i8> %47, i8 %48, i32 24 +  %50 = extractelement <16 x i8> %in, i64 9 +  %51 = insertelement <128 x i8> %49, i8 %50, i32 25 +  %52 = extractelement <16 x i8> %in, i64 10 +  %53 = insertelement <128 x i8> %51, i8 %52, i32 26 +  %54 = extractelement <16 x i8> %in, i64 11 +  %55 = insertelement <128 x i8> %53, i8 %54, i32 27 +  %56 = extractelement <16 x i8> %in, i64 12 +  %57 = insertelement <128 x i8> %55, i8 %56, i32 28 +  %58 = extractelement <16 x i8> %in, i64 13 +  %59 = insertelement <128 x i8> %57, i8 %58, i32 29 +  %60 = extractelement <16 x i8> %in, i64 14 +  %61 = insertelement <128 x i8> %59, i8 %60, i32 30 +  %62 = extractelement <16 x i8> %in, i64 15 +  %63 = insertelement <128 x i8> %61, i8 %62, i32 31 +  %64 = extractelement <16 x i8> %in, i64 0 +  %65 = insertelement <128 x i8> %63, i8 %64, i32 32 +  %66 = extractelement <16 x i8> %in, i64 1 +  %67 = insertelement <128 x i8> %65, i8 %66, i32 33 +  %68 = extractelement <16 x i8> %in, i64 2 +  %69 = insertelement <128 x i8> %67, i8 %68, i32 34 +  %70 = extractelement <16 x i8> %in, i64 3 +  %71 = insertelement <128 x i8> %69, i8 %70, i32 35 +  %72 = extractelement <16 x i8> %in, i64 4 +  %73 = insertelement <128 x i8> %71, i8 %72, i32 36 +  %74 = extractelement <16 x i8> %in, i64 5 +  %75 = insertelement <128 x i8> %73, i8 %74, i32 37 +  %76 = extractelement <16 x i8> %in, i64 6 +  %77 = insertelement <128 x i8> %75, i8 %76, i32 38 +  %78 = extractelement <16 x i8> %in, i64 7 +  %79 = insertelement <128 x i8> %77, i8 %78, i32 39 +  %80 = extractelement <16 x i8> %in, i64 8 +  %81 = insertelement <128 x i8> %79, i8 %80, i32 40 +  %82 = extractelement <16 x i8> %in, i64 9 +  %83 = insertelement <128 x i8> %81, i8 %82, i32 41 +  %84 = extractelement <16 x i8> %in, i64 10 +  %85 = insertelement <128 x i8> %83, i8 %84, i32 42 +  %86 = extractelement <16 x i8> %in, i64 11 +  %87 = insertelement <128 x i8> %85, i8 %86, i32 43 +  %88 = extractelement <16 x i8> %in, i64 12 +  %89 = insertelement <128 x i8> %87, i8 %88, i32 44 +  %90 = extractelement <16 x i8> %in, i64 13 +  %91 = insertelement <128 x i8> %89, i8 %90, i32 45 +  %92 = extractelement <16 x i8> %in, i64 14 +  %93 = insertelement <128 x i8> %91, i8 %92, i32 46 +  %94 = extractelement <16 x i8> %in, i64 15 +  %95 = insertelement <128 x i8> %93, i8 %94, i32 47 +  %96 = extractelement <16 x i8> %in, i64 0 +  %97 = insertelement <128 x i8> %95, i8 %96, i32 48 +  %98 = extractelement <16 x i8> %in, i64 1 +  %99 = insertelement <128 x i8> %97, i8 %98, i32 49 +  %100 = extractelement <16 x i8> %in, i64 2 +  %101 = insertelement <128 x i8> %99, i8 %100, i32 50 +  %102 = extractelement <16 x i8> %in, i64 3 +  %103 = insertelement <128 x i8> %101, i8 %102, i32 51 +  %104 = extractelement <16 x i8> %in, i64 4 +  %105 = insertelement <128 x i8> %103, i8 %104, i32 52 +  %106 = extractelement <16 x i8> %in, i64 5 +  %107 = insertelement <128 x i8> %105, i8 %106, i32 53 +  %108 = extractelement <16 x i8> %in, i64 6 +  %109 = insertelement <128 x i8> %107, i8 %108, i32 54 +  %110 = extractelement <16 x i8> %in, i64 7 +  %111 = insertelement <128 x i8> %109, i8 %110, i32 55 +  %112 = extractelement <16 x i8> %in, i64 8 +  %113 = insertelement <128 x i8> %111, i8 %112, i32 56 +  %114 = extractelement <16 x i8> %in, i64 9 +  %115 = insertelement <128 x i8> %113, i8 %114, i32 57 +  %116 = extractelement <16 x i8> %in, i64 10 +  %117 = insertelement <128 x i8> %115, i8 %116, i32 58 +  %118 = extractelement <16 x i8> %in, i64 11 +  %119 = insertelement <128 x i8> %117, i8 %118, i32 59 +  %120 = extractelement <16 x i8> %in, i64 12 +  %121 = insertelement <128 x i8> %119, i8 %120, i32 60 +  %122 = extractelement <16 x i8> %in, i64 13 +  %123 = insertelement <128 x i8> %121, i8 %122, i32 61 +  %124 = extractelement <16 x i8> %in, i64 14 +  %125 = insertelement <128 x i8> %123, i8 %124, i32 62 +  %126 = extractelement <16 x i8> %in, i64 15 +  %127 = insertelement <128 x i8> %125, i8 %126, i32 63 +  %128 = extractelement <16 x i8> %in, i64 0 +  %129 = insertelement <128 x i8> %127, i8 %128, i32 64 +  %130 = extractelement <16 x i8> %in, i64 1 +  %131 = insertelement <128 x i8> %129, i8 %130, i32 65 +  %132 = extractelement <16 x i8> %in, i64 2 +  %133 = insertelement <128 x i8> %131, i8 %132, i32 66 +  %134 = extractelement <16 x i8> %in, i64 3 +  %135 = insertelement <128 x i8> %133, i8 %134, i32 67 +  %136 = extractelement <16 x i8> %in, i64 4 +  %137 = insertelement <128 x i8> %135, i8 %136, i32 68 +  %138 = extractelement <16 x i8> %in, i64 5 +  %139 = insertelement <128 x i8> %137, i8 %138, i32 69 +  %140 = extractelement <16 x i8> %in, i64 6 +  %141 = insertelement <128 x i8> %139, i8 %140, i32 70 +  %142 = extractelement <16 x i8> %in, i64 7 +  %143 = insertelement <128 x i8> %141, i8 %142, i32 71 +  %144 = extractelement <16 x i8> %in, i64 8 +  %145 = insertelement <128 x i8> %143, i8 %144, i32 72 +  %146 = extractelement <16 x i8> %in, i64 9 +  %147 = insertelement <128 x i8> %145, i8 %146, i32 73 +  %148 = extractelement <16 x i8> %in, i64 10 +  %149 = insertelement <128 x i8> %147, i8 %148, i32 74 +  %150 = extractelement <16 x i8> %in, i64 11 +  %151 = insertelement <128 x i8> %149, i8 %150, i32 75 +  %152 = extractelement <16 x i8> %in, i64 12 +  %153 = insertelement <128 x i8> %151, i8 %152, i32 76 +  %154 = extractelement <16 x i8> %in, i64 13 +  %155 = insertelement <128 x i8> %153, i8 %154, i32 77 +  %156 = extractelement <16 x i8> %in, i64 14 +  %157 = insertelement <128 x i8> %155, i8 %156, i32 78 +  %158 = extractelement <16 x i8> %in, i64 15 +  %159 = insertelement <128 x i8> %157, i8 %158, i32 79 +  %160 = extractelement <16 x i8> %in, i64 0 +  %161 = insertelement <128 x i8> %159, i8 %160, i32 80 +  %162 = extractelement <16 x i8> %in, i64 1 +  %163 = insertelement <128 x i8> %161, i8 %162, i32 81 +  %164 = extractelement <16 x i8> %in, i64 2 +  %165 = insertelement <128 x i8> %163, i8 %164, i32 82 +  %166 = extractelement <16 x i8> %in, i64 3 +  %167 = insertelement <128 x i8> %165, i8 %166, i32 83 +  %168 = extractelement <16 x i8> %in, i64 4 +  %169 = insertelement <128 x i8> %167, i8 %168, i32 84 +  %170 = extractelement <16 x i8> %in, i64 5 +  %171 = insertelement <128 x i8> %169, i8 %170, i32 85 +  %172 = extractelement <16 x i8> %in, i64 6 +  %173 = insertelement <128 x i8> %171, i8 %172, i32 86 +  %174 = extractelement <16 x i8> %in, i64 7 +  %175 = insertelement <128 x i8> %173, i8 %174, i32 87 +  %176 = extractelement <16 x i8> %in, i64 8 +  %177 = insertelement <128 x i8> %175, i8 %176, i32 88 +  %178 = extractelement <16 x i8> %in, i64 9 +  %179 = insertelement <128 x i8> %177, i8 %178, i32 89 +  %180 = extractelement <16 x i8> %in, i64 10 +  %181 = insertelement <128 x i8> %179, i8 %180, i32 90 +  %182 = extractelement <16 x i8> %in, i64 11 +  %183 = insertelement <128 x i8> %181, i8 %182, i32 91 +  %184 = extractelement <16 x i8> %in, i64 12 +  %185 = insertelement <128 x i8> %183, i8 %184, i32 92 +  %186 = extractelement <16 x i8> %in, i64 13 +  %187 = insertelement <128 x i8> %185, i8 %186, i32 93 +  %188 = extractelement <16 x i8> %in, i64 14 +  %189 = insertelement <128 x i8> %187, i8 %188, i32 94 +  %190 = extractelement <16 x i8> %in, i64 15 +  %191 = insertelement <128 x i8> %189, i8 %190, i32 95 +  %192 = extractelement <16 x i8> %in, i64 0 +  %193 = insertelement <128 x i8> %191, i8 %192, i32 96 +  %194 = extractelement <16 x i8> %in, i64 1 +  %195 = insertelement <128 x i8> %193, i8 %194, i32 97 +  %196 = extractelement <16 x i8> %in, i64 2 +  %197 = insertelement <128 x i8> %195, i8 %196, i32 98 +  %198 = extractelement <16 x i8> %in, i64 3 +  %199 = insertelement <128 x i8> %197, i8 %198, i32 99 +  %200 = extractelement <16 x i8> %in, i64 4 +  %201 = insertelement <128 x i8> %199, i8 %200, i32 100 +  %202 = extractelement <16 x i8> %in, i64 5 +  %203 = insertelement <128 x i8> %201, i8 %202, i32 101 +  %204 = extractelement <16 x i8> %in, i64 6 +  %205 = insertelement <128 x i8> %203, i8 %204, i32 102 +  %206 = extractelement <16 x i8> %in, i64 7 +  %207 = insertelement <128 x i8> %205, i8 %206, i32 103 +  %208 = extractelement <16 x i8> %in, i64 8 +  %209 = insertelement <128 x i8> %207, i8 %208, i32 104 +  %210 = extractelement <16 x i8> %in, i64 9 +  %211 = insertelement <128 x i8> %209, i8 %210, i32 105 +  %212 = extractelement <16 x i8> %in, i64 10 +  %213 = insertelement <128 x i8> %211, i8 %212, i32 106 +  %214 = extractelement <16 x i8> %in, i64 11 +  %215 = insertelement <128 x i8> %213, i8 %214, i32 107 +  %216 = extractelement <16 x i8> %in, i64 12 +  %217 = insertelement <128 x i8> %215, i8 %216, i32 108 +  %218 = extractelement <16 x i8> %in, i64 13 +  %219 = insertelement <128 x i8> %217, i8 %218, i32 109 +  %220 = extractelement <16 x i8> %in, i64 14 +  %221 = insertelement <128 x i8> %219, i8 %220, i32 110 +  %222 = extractelement <16 x i8> %in, i64 15 +  %223 = insertelement <128 x i8> %221, i8 %222, i32 111 +  %224 = extractelement <16 x i8> %in, i64 0 +  %225 = insertelement <128 x i8> %223, i8 %224, i32 112 +  %226 = extractelement <16 x i8> %in, i64 1 +  %227 = insertelement <128 x i8> %225, i8 %226, i32 113 +  %228 = extractelement <16 x i8> %in, i64 2 +  %229 = insertelement <128 x i8> %227, i8 %228, i32 114 +  %230 = extractelement <16 x i8> %in, i64 3 +  %231 = insertelement <128 x i8> %229, i8 %230, i32 115 +  %232 = extractelement <16 x i8> %in, i64 4 +  %233 = insertelement <128 x i8> %231, i8 %232, i32 116 +  %234 = extractelement <16 x i8> %in, i64 5 +  %235 = insertelement <128 x i8> %233, i8 %234, i32 117 +  %236 = extractelement <16 x i8> %in, i64 6 +  %237 = insertelement <128 x i8> %235, i8 %236, i32 118 +  %238 = extractelement <16 x i8> %in, i64 7 +  %239 = insertelement <128 x i8> %237, i8 %238, i32 119 +  %240 = extractelement <16 x i8> %in, i64 8 +  %241 = insertelement <128 x i8> %239, i8 %240, i32 120 +  %242 = extractelement <16 x i8> %in, i64 9 +  %243 = insertelement <128 x i8> %241, i8 %242, i32 121 +  %244 = extractelement <16 x i8> %in, i64 10 +  %245 = insertelement <128 x i8> %243, i8 %244, i32 122 +  %246 = extractelement <16 x i8> %in, i64 11 +  %247 = insertelement <128 x i8> %245, i8 %246, i32 123 +  %248 = extractelement <16 x i8> %in, i64 12 +  %249 = insertelement <128 x i8> %247, i8 %248, i32 124 +  %250 = extractelement <16 x i8> %in, i64 13 +  %251 = insertelement <128 x i8> %249, i8 %250, i32 125 +  %252 = extractelement <16 x i8> %in, i64 14 +  %253 = insertelement <128 x i8> %251, i8 %252, i32 126 +  %254 = extractelement <16 x i8> %in, i64 15 +  %255 = insertelement <128 x i8> %253, i8 %254, i32 127 +  %256 = insertelement <16 x i8> poison, i8 %160, i64 0 +  %257 = insertelement <16 x i8> %256, i8 %162, i64 1 +  %258 = insertelement <16 x i8> %257, i8 %164, i64 2 +  %259 = insertelement <16 x i8> %258, i8 %166, i64 3 +  %260 = insertelement <16 x i8> %259, i8 %168, i64 4 +  %261 = insertelement <16 x i8> %260, i8 %170, i64 5 +  %262 = insertelement <16 x i8> %261, i8 %172, i64 6 +  %263 = insertelement <16 x i8> %262, i8 %174, i64 7 +  %264 = insertelement <16 x i8> %263, i8 %176, i64 8 +  %265 = insertelement <16 x i8> %264, i8 %178, i64 9 +  %266 = insertelement <16 x i8> %265, i8 %180, i64 10 +  %267 = insertelement <16 x i8> %266, i8 %182, i64 11 +  %268 = insertelement <16 x i8> %267, i8 %184, i64 12 +  %269 = insertelement <16 x i8> %268, i8 %186, i64 13 +  %270 = insertelement <16 x i8> %269, i8 %188, i64 14 +  %271 = insertelement <16 x i8> %270, i8 %190, i64 15 +  %sum = add <16 x i8> %271, %add +  store <16 x i8> %sum, ptr addrspace(3) %out, align 16 +  ret void +} + +attributes #0 = { "amdgpu-waves-per-eu"="2,2" } diff --git a/llvm/test/lit.cfg.py b/llvm/test/lit.cfg.py index 11a5a57..cadf781 100644 --- a/llvm/test/lit.cfg.py +++ b/llvm/test/lit.cfg.py @@ -57,8 +57,13 @@ if config.enable_profcheck:      # so we just exclude llvm-reduce tests from this config altogether. This should      # be fine though as profcheck config tests are mostly concerned with opt.      config.excludes.append("llvm-reduce") +    # Exclude llvm-objcopy tests - not the target of this effort, and some use +    # cat in ways that conflict with how profcheck uses it. +    config.excludes.append("llvm-objcopy")      # (Issue #161235) Temporarily exclude LoopVectorize.      config.excludes.append("LoopVectorize") +    # exclude UpdateTestChecks - they fail because of inserted prof annotations +    config.excludes.append("UpdateTestChecks")  # test_source_root: The root path where tests are located.  config.test_source_root = os.path.dirname(__file__) @@ -474,7 +479,7 @@ if config.host_ldflags.find("-m32") < 0 and any(  config.available_features.add("host-byteorder-" + sys.byteorder + "-endian")  if config.target_triple:      if re.match( -        r"(aarch64_be|arc|armeb|bpfeb|lanai|m68k|mips|mips64|powerpc|powerpc64|sparc|sparcv9|s390x|s390|tce|thumbeb)-.*", +        r"(aarch64_be|arc|armeb|bpfeb|lanai|m68k|mips|mips64|powerpc|powerpc64|sparc|sparcv9|sparc64|s390x|s390|tce|thumbeb)-.*",          config.target_triple,      ):          config.available_features.add("target-byteorder-big-endian") diff --git a/llvm/test/tools/dsymutil/ARM/swiftmodule-include-from-interface.test b/llvm/test/tools/dsymutil/ARM/swiftmodule-include-from-interface.test new file mode 100644 index 0000000..00141f12 --- /dev/null +++ b/llvm/test/tools/dsymutil/ARM/swiftmodule-include-from-interface.test @@ -0,0 +1,33 @@ +# RUN: dsymutil -include-swiftmodules-from-interface -verbose -oso-prepend-path=%p -y -o %t.dSYM  %s | FileCheck %s +# +# RUN: dsymutil -include-swiftmodules-from-interface --linker parallel -verbose -oso-prepend-path=%p -y %s -o %t-parallel.dSYM | FileCheck %s +# +# To regenerate: +# echo ''>I.swift +# echo ''>B.swift +# echo 'import I'>main.swift +# xcrun swiftc -emit-module-interface-path I.swiftinterface -enable-library-evolution I.swift +# xcrun swiftc -emit-module-path B.swiftmodule B.swift -Xfrontend -no-serialize-debugging-options +# xcrun swiftc -explicit-module-build main.swift -I. -module-cache-path cache -g -Xfrontend  -no-serialize-debugging-options +# output is "B.swiftmodule" and "cache/I*.swiftmodule" +# +# CHECK-NOT: Skipping compiled textual Swift interface: {{.*}}/Inputs/Binary.swiftmodule +# CHECK-NOT: Skipping compiled textual Swift interface: {{.*}}/Inputs/FromInterface.swiftmodule + +# +--- +triple:          'arm64-apple-darwin' +objects: +  - filename:        '../Inputs/Binary.swiftmodule' +    timestamp:       0 +    type:            50 +    symbols:         [] +  - filename:        '../Inputs/FromInterface.swiftmodule' +    timestamp:       0 +    type:            50 +    symbols:         [] +  - filename:        '../Inputs/FromInterface.swiftmodule' +    timestamp:       0 +    type:            50 +    symbols:         [] +... diff --git a/llvm/test/tools/dsymutil/cmdline.test b/llvm/test/tools/dsymutil/cmdline.test index 1574fe3..0b0bce1 100644 --- a/llvm/test/tools/dsymutil/cmdline.test +++ b/llvm/test/tools/dsymutil/cmdline.test @@ -14,6 +14,7 @@ CHECK: -fat64  CHECK: -flat  CHECK: -gen-reproducer  CHECK: -help +CHECK: -include-swiftmodules-from-interface  CHECK: -keep-function-for-static  CHECK: -no-object-timestamp  CHECK: -no-odr  | 
