diff options
Diffstat (limited to 'llvm/test')
30 files changed, 6580 insertions, 594 deletions
| diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/combine-freeze.mir b/llvm/test/CodeGen/AArch64/GlobalISel/combine-freeze.mir index 6b84a84..1950e60 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/combine-freeze.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/combine-freeze.mir @@ -1440,3 +1440,50 @@ body:             |      %freeze:_(<4 x s32>) = G_FREEZE %extract      $q0 = COPY %freeze(<4 x s32>)      RET_ReallyLR implicit $x0 +... +--- +name:            ubfx_does_not_generate_poison +body:             | +  bb.1: +    liveins: $w0 + +    ; CHECK-LABEL: name: ubfx_does_not_generate_poison +    ; CHECK: liveins: $w0 +    ; CHECK-NEXT: {{  $}} +    ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $x0 +    ; CHECK-NEXT: %c1:_(s64) = G_CONSTANT i64 1 +    ; CHECK-NEXT: [[FREEZE:%[0-9]+]]:_(s64) = G_FREEZE [[COPY]] +    ; CHECK-NEXT: [[UBFX:%[0-9]+]]:_(s64) = G_UBFX [[FREEZE]], %c1(s64), %c1 +    ; CHECK-NEXT: $x0 = COPY [[UBFX]](s64) +    ; CHECK-NEXT: RET_ReallyLR implicit $x0 +    %0:_(s64) = COPY $x0 +    %c1:_(s64) = G_CONSTANT i64 1 +    %1:_(s64) = G_UBFX %0, %c1, %c1 +    %2:_(s64) = G_FREEZE %1 +    $x0 = COPY %2(s64) +    RET_ReallyLR implicit $x0 + +... +--- +name:            sbfx_does_not_generate_poison +body:             | +  bb.1: +    liveins: $w0 + +    ; CHECK-LABEL: name: sbfx_does_not_generate_poison +    ; CHECK: liveins: $w0 +    ; CHECK-NEXT: {{  $}} +    ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $x0 +    ; CHECK-NEXT: %c1:_(s64) = G_CONSTANT i64 1 +    ; CHECK-NEXT: [[FREEZE:%[0-9]+]]:_(s64) = G_FREEZE [[COPY]] +    ; CHECK-NEXT: [[SBFX:%[0-9]+]]:_(s64) = G_SBFX [[FREEZE]], %c1(s64), %c1 +    ; CHECK-NEXT: $x0 = COPY [[SBFX]](s64) +    ; CHECK-NEXT: RET_ReallyLR implicit $x0 +    %0:_(s64) = COPY $x0 +    %c1:_(s64) = G_CONSTANT i64 1 +    %1:_(s64) = G_SBFX %0, %c1, %c1 +    %2:_(s64) = G_FREEZE %1 +    $x0 = COPY %2(s64) +    RET_ReallyLR implicit $x0 + +... diff --git a/llvm/test/CodeGen/AArch64/llvm.sincos.ll b/llvm/test/CodeGen/AArch64/llvm.sincos.ll index f1dcb2a..21da864 100644 --- a/llvm/test/CodeGen/AArch64/llvm.sincos.ll +++ b/llvm/test/CodeGen/AArch64/llvm.sincos.ll @@ -215,6 +215,133 @@ define { <2 x half>, <2 x half> } @test_sincos_v2f16(<2 x half> %a) nounwind {    ret { <2 x half>, <2 x half> } %result  } +define { <3 x half>, <3 x half> } @test_sincos_v3f16(<3 x half> %a) nounwind { +; CHECK-LABEL: test_sincos_v3f16: +; CHECK:       // %bb.0: +; CHECK-NEXT:    sub sp, sp, #64 +; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0 +; CHECK-NEXT:    mov h1, v0.h[1] +; CHECK-NEXT:    str q0, [sp] // 16-byte Folded Spill +; CHECK-NEXT:    add x0, sp, #36 +; CHECK-NEXT:    add x1, sp, #32 +; CHECK-NEXT:    str x30, [sp, #48] // 8-byte Folded Spill +; CHECK-NEXT:    fcvt s0, h1 +; CHECK-NEXT:    bl sincosf +; CHECK-NEXT:    ldr q0, [sp] // 16-byte Folded Reload +; CHECK-NEXT:    add x0, sp, #28 +; CHECK-NEXT:    add x1, sp, #24 +; CHECK-NEXT:    fcvt s0, h0 +; CHECK-NEXT:    bl sincosf +; CHECK-NEXT:    ldr q0, [sp] // 16-byte Folded Reload +; CHECK-NEXT:    add x0, sp, #44 +; CHECK-NEXT:    add x1, sp, #40 +; CHECK-NEXT:    mov h0, v0.h[2] +; CHECK-NEXT:    fcvt s0, h0 +; CHECK-NEXT:    bl sincosf +; CHECK-NEXT:    ldr q0, [sp] // 16-byte Folded Reload +; CHECK-NEXT:    add x0, sp, #60 +; CHECK-NEXT:    add x1, sp, #56 +; CHECK-NEXT:    mov h0, v0.h[3] +; CHECK-NEXT:    fcvt s0, h0 +; CHECK-NEXT:    bl sincosf +; CHECK-NEXT:    ldp s2, s0, [sp, #32] +; CHECK-NEXT:    ldr x30, [sp, #48] // 8-byte Folded Reload +; CHECK-NEXT:    ldp s3, s1, [sp, #24] +; CHECK-NEXT:    fcvt h4, s0 +; CHECK-NEXT:    fcvt h2, s2 +; CHECK-NEXT:    fcvt h0, s1 +; CHECK-NEXT:    fcvt h1, s3 +; CHECK-NEXT:    ldp s5, s3, [sp, #40] +; CHECK-NEXT:    fcvt h3, s3 +; CHECK-NEXT:    mov v0.h[1], v4.h[0] +; CHECK-NEXT:    fcvt h4, s5 +; CHECK-NEXT:    mov v1.h[1], v2.h[0] +; CHECK-NEXT:    ldp s5, s2, [sp, #56] +; CHECK-NEXT:    mov v0.h[2], v3.h[0] +; CHECK-NEXT:    fcvt h2, s2 +; CHECK-NEXT:    fcvt h3, s5 +; CHECK-NEXT:    mov v1.h[2], v4.h[0] +; CHECK-NEXT:    mov v0.h[3], v2.h[0] +; CHECK-NEXT:    mov v1.h[3], v3.h[0] +; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0 +; CHECK-NEXT:    // kill: def $d1 killed $d1 killed $q1 +; CHECK-NEXT:    add sp, sp, #64 +; CHECK-NEXT:    ret +; +; NO-LIBCALL-LABEL: test_sincos_v3f16: +; NO-LIBCALL:       // %bb.0: +; NO-LIBCALL-NEXT:    sub sp, sp, #80 +; NO-LIBCALL-NEXT:    // kill: def $d0 killed $d0 def $q0 +; NO-LIBCALL-NEXT:    mov h1, v0.h[1] +; NO-LIBCALL-NEXT:    stp d9, d8, [sp, #48] // 16-byte Folded Spill +; NO-LIBCALL-NEXT:    str q0, [sp] // 16-byte Folded Spill +; NO-LIBCALL-NEXT:    stp d11, d10, [sp, #32] // 16-byte Folded Spill +; NO-LIBCALL-NEXT:    str x30, [sp, #64] // 8-byte Folded Spill +; NO-LIBCALL-NEXT:    fcvt s8, h1 +; NO-LIBCALL-NEXT:    fmov s0, s8 +; NO-LIBCALL-NEXT:    bl sinf +; NO-LIBCALL-NEXT:    ldr q1, [sp] // 16-byte Folded Reload +; NO-LIBCALL-NEXT:    fcvt h0, s0 +; NO-LIBCALL-NEXT:    fcvt s9, h1 +; NO-LIBCALL-NEXT:    str q0, [sp, #16] // 16-byte Folded Spill +; NO-LIBCALL-NEXT:    fmov s0, s9 +; NO-LIBCALL-NEXT:    bl sinf +; NO-LIBCALL-NEXT:    ldr q1, [sp] // 16-byte Folded Reload +; NO-LIBCALL-NEXT:    fcvt h0, s0 +; NO-LIBCALL-NEXT:    mov h1, v1.h[2] +; NO-LIBCALL-NEXT:    fcvt s10, h1 +; NO-LIBCALL-NEXT:    ldr q1, [sp, #16] // 16-byte Folded Reload +; NO-LIBCALL-NEXT:    mov v0.h[1], v1.h[0] +; NO-LIBCALL-NEXT:    str q0, [sp, #16] // 16-byte Folded Spill +; NO-LIBCALL-NEXT:    fmov s0, s10 +; NO-LIBCALL-NEXT:    bl sinf +; NO-LIBCALL-NEXT:    ldr q1, [sp] // 16-byte Folded Reload +; NO-LIBCALL-NEXT:    fcvt h0, s0 +; NO-LIBCALL-NEXT:    mov h1, v1.h[3] +; NO-LIBCALL-NEXT:    fcvt s11, h1 +; NO-LIBCALL-NEXT:    ldr q1, [sp, #16] // 16-byte Folded Reload +; NO-LIBCALL-NEXT:    mov v1.h[2], v0.h[0] +; NO-LIBCALL-NEXT:    fmov s0, s11 +; NO-LIBCALL-NEXT:    str q1, [sp, #16] // 16-byte Folded Spill +; NO-LIBCALL-NEXT:    bl sinf +; NO-LIBCALL-NEXT:    fcvt h0, s0 +; NO-LIBCALL-NEXT:    ldr q1, [sp, #16] // 16-byte Folded Reload +; NO-LIBCALL-NEXT:    mov v1.h[3], v0.h[0] +; NO-LIBCALL-NEXT:    fmov s0, s8 +; NO-LIBCALL-NEXT:    str q1, [sp, #16] // 16-byte Folded Spill +; NO-LIBCALL-NEXT:    bl cosf +; NO-LIBCALL-NEXT:    fcvt h0, s0 +; NO-LIBCALL-NEXT:    str q0, [sp] // 16-byte Folded Spill +; NO-LIBCALL-NEXT:    fmov s0, s9 +; NO-LIBCALL-NEXT:    bl cosf +; NO-LIBCALL-NEXT:    fcvt h0, s0 +; NO-LIBCALL-NEXT:    ldr q1, [sp] // 16-byte Folded Reload +; NO-LIBCALL-NEXT:    mov v0.h[1], v1.h[0] +; NO-LIBCALL-NEXT:    str q0, [sp] // 16-byte Folded Spill +; NO-LIBCALL-NEXT:    fmov s0, s10 +; NO-LIBCALL-NEXT:    bl cosf +; NO-LIBCALL-NEXT:    fcvt h0, s0 +; NO-LIBCALL-NEXT:    ldr q1, [sp] // 16-byte Folded Reload +; NO-LIBCALL-NEXT:    mov v1.h[2], v0.h[0] +; NO-LIBCALL-NEXT:    fmov s0, s11 +; NO-LIBCALL-NEXT:    str q1, [sp] // 16-byte Folded Spill +; NO-LIBCALL-NEXT:    bl cosf +; NO-LIBCALL-NEXT:    fmov s1, s0 +; NO-LIBCALL-NEXT:    ldp d9, d8, [sp, #48] // 16-byte Folded Reload +; NO-LIBCALL-NEXT:    ldp d11, d10, [sp, #32] // 16-byte Folded Reload +; NO-LIBCALL-NEXT:    ldr q0, [sp, #16] // 16-byte Folded Reload +; NO-LIBCALL-NEXT:    ldr x30, [sp, #64] // 8-byte Folded Reload +; NO-LIBCALL-NEXT:    // kill: def $d0 killed $d0 killed $q0 +; NO-LIBCALL-NEXT:    fcvt h2, s1 +; NO-LIBCALL-NEXT:    ldr q1, [sp] // 16-byte Folded Reload +; NO-LIBCALL-NEXT:    mov v1.h[3], v2.h[0] +; NO-LIBCALL-NEXT:    // kill: def $d1 killed $d1 killed $q1 +; NO-LIBCALL-NEXT:    add sp, sp, #80 +; NO-LIBCALL-NEXT:    ret +  %result = call { <3 x half>, <3 x half> } @llvm.sincos.v3f16(<3 x half> %a) +  ret { <3 x half>, <3 x half> } %result +} +  define { float, float } @test_sincos_f32(float %a) nounwind {  ; CHECK-LABEL: test_sincos_f32:  ; CHECK:       // %bb.0: @@ -493,3 +620,71 @@ define { <2 x double>, <2 x double> } @test_sincos_v2f64(<2 x double> %a) nounwi    %result = call { <2 x double>, <2 x double> } @llvm.sincos.v2f64(<2 x double> %a)    ret { <2 x double>, <2 x double> } %result  } + +define { <3 x double>, <3 x double> } @test_sincos_v3f64(<3 x double> %a) nounwind { +; CHECK-LABEL: test_sincos_v3f64: +; CHECK:       // %bb.0: +; CHECK-NEXT:    sub sp, sp, #80 +; CHECK-NEXT:    add x0, sp, #16 +; CHECK-NEXT:    add x1, sp, #8 +; CHECK-NEXT:    stp d9, d8, [sp, #48] // 16-byte Folded Spill +; CHECK-NEXT:    str x30, [sp, #64] // 8-byte Folded Spill +; CHECK-NEXT:    fmov d8, d2 +; CHECK-NEXT:    fmov d9, d1 +; CHECK-NEXT:    bl sincos +; CHECK-NEXT:    fmov d0, d9 +; CHECK-NEXT:    add x0, sp, #32 +; CHECK-NEXT:    add x1, sp, #24 +; CHECK-NEXT:    bl sincos +; CHECK-NEXT:    fmov d0, d8 +; CHECK-NEXT:    add x0, sp, #72 +; CHECK-NEXT:    add x1, sp, #40 +; CHECK-NEXT:    bl sincos +; CHECK-NEXT:    ldp d3, d0, [sp, #8] +; CHECK-NEXT:    ldr d2, [sp, #72] +; CHECK-NEXT:    ldp d4, d1, [sp, #24] +; CHECK-NEXT:    ldr d5, [sp, #40] +; CHECK-NEXT:    ldr x30, [sp, #64] // 8-byte Folded Reload +; CHECK-NEXT:    ldp d9, d8, [sp, #48] // 16-byte Folded Reload +; CHECK-NEXT:    add sp, sp, #80 +; CHECK-NEXT:    ret +; +; NO-LIBCALL-LABEL: test_sincos_v3f64: +; NO-LIBCALL:       // %bb.0: +; NO-LIBCALL-NEXT:    stp d13, d12, [sp, #-64]! // 16-byte Folded Spill +; NO-LIBCALL-NEXT:    stp d11, d10, [sp, #16] // 16-byte Folded Spill +; NO-LIBCALL-NEXT:    fmov d10, d0 +; NO-LIBCALL-NEXT:    stp d9, d8, [sp, #32] // 16-byte Folded Spill +; NO-LIBCALL-NEXT:    fmov d8, d2 +; NO-LIBCALL-NEXT:    fmov d9, d1 +; NO-LIBCALL-NEXT:    str x30, [sp, #48] // 8-byte Folded Spill +; NO-LIBCALL-NEXT:    bl sin +; NO-LIBCALL-NEXT:    fmov d11, d0 +; NO-LIBCALL-NEXT:    fmov d0, d9 +; NO-LIBCALL-NEXT:    bl sin +; NO-LIBCALL-NEXT:    fmov d12, d0 +; NO-LIBCALL-NEXT:    fmov d0, d8 +; NO-LIBCALL-NEXT:    bl sin +; NO-LIBCALL-NEXT:    fmov d13, d0 +; NO-LIBCALL-NEXT:    fmov d0, d10 +; NO-LIBCALL-NEXT:    bl cos +; NO-LIBCALL-NEXT:    fmov d10, d0 +; NO-LIBCALL-NEXT:    fmov d0, d9 +; NO-LIBCALL-NEXT:    bl cos +; NO-LIBCALL-NEXT:    fmov d9, d0 +; NO-LIBCALL-NEXT:    fmov d0, d8 +; NO-LIBCALL-NEXT:    bl cos +; NO-LIBCALL-NEXT:    fmov d5, d0 +; NO-LIBCALL-NEXT:    fmov d0, d11 +; NO-LIBCALL-NEXT:    ldr x30, [sp, #48] // 8-byte Folded Reload +; NO-LIBCALL-NEXT:    fmov d3, d10 +; NO-LIBCALL-NEXT:    fmov d4, d9 +; NO-LIBCALL-NEXT:    fmov d1, d12 +; NO-LIBCALL-NEXT:    ldp d9, d8, [sp, #32] // 16-byte Folded Reload +; NO-LIBCALL-NEXT:    fmov d2, d13 +; NO-LIBCALL-NEXT:    ldp d11, d10, [sp, #16] // 16-byte Folded Reload +; NO-LIBCALL-NEXT:    ldp d13, d12, [sp], #64 // 16-byte Folded Reload +; NO-LIBCALL-NEXT:    ret +  %result = call { <3 x double>, <3 x double> } @llvm.sincos.v3f64(<3 x double> %a) +  ret { <3 x double>, <3 x double> } %result +} diff --git a/llvm/test/CodeGen/Hexagon/isel/trunc-vNi1-HVX.ll b/llvm/test/CodeGen/Hexagon/isel/trunc-vNi1-HVX.ll new file mode 100644 index 0000000..1491729 --- /dev/null +++ b/llvm/test/CodeGen/Hexagon/isel/trunc-vNi1-HVX.ll @@ -0,0 +1,18 @@ +; RUN: llc --mtriple=hexagon -mattr=+hvxv79,+hvx-length128b < %s | FileCheck %s + +define void @f5(<64 x i32> %a0, ptr %a1) { +; CHECK-LABEL: f5: +; CHECK: [[REG0:(r[0-9]+)]] = ##16843009 +; CHECK-DAG: q[[Q0:[0-9]+]] = vand(v{{[0-9]+}},[[REG0]]) +; CHECK-DAG: q[[Q1:[0-9]+]] = vand(v{{[0-9]+}},[[REG0]]) +; CHECK: v{{[0-9]+}}.b = vpacke(v{{[0-9]+}}.h,v{{[0-9]+}}.h) +; CHECK: v{{[0-9]+}}.b = vpacke(v{{[0-9]+}}.h,v{{[0-9]+}}.h) +; CHECK: v[[VROR:[0-9]+]] = vror(v{{[0-9]+}},r{{[0-9]+}}) +; CHECK: v[[VOR:[0-9]+]] = vor(v[[VROR]],v{{[0-9]+}}) +; CHECK: q{{[0-9]+}} = vand(v[[VOR]],r{{[0-9]+}}) +b0: +  %v0 = trunc <64 x i32> %a0 to <64 x i1> +  store <64 x i1> %v0, ptr %a1, align 1 +  ret void +} + diff --git a/llvm/test/CodeGen/LoongArch/ir-instruction/flog2.ll b/llvm/test/CodeGen/LoongArch/ir-instruction/flog2.ll index 93fcd42..e02a2e7 100644 --- a/llvm/test/CodeGen/LoongArch/ir-instruction/flog2.ll +++ b/llvm/test/CodeGen/LoongArch/ir-instruction/flog2.ll @@ -12,8 +12,8 @@ define float @flog2_s(float %x) nounwind {  ;  ; LA64-LABEL: flog2_s:  ; LA64:       # %bb.0: -; LA64-NEXT:    pcaddu18i $t8, %call36(log2f) -; LA64-NEXT:    jr $t8 +; LA64-NEXT:    flogb.s $fa0, $fa0 +; LA64-NEXT:    ret    %y = call float @llvm.log2.f32(float %x)    ret float %y  } @@ -25,8 +25,8 @@ define double @flog2_d(double %x) nounwind {  ;  ; LA64-LABEL: flog2_d:  ; LA64:       # %bb.0: -; LA64-NEXT:    pcaddu18i $t8, %call36(log2) -; LA64-NEXT:    jr $t8 +; LA64-NEXT:    flogb.d $fa0, $fa0 +; LA64-NEXT:    ret    %y = call double @llvm.log2.f64(double %x)    ret double %y  } diff --git a/llvm/test/CodeGen/LoongArch/lasx/fp-rounding.ll b/llvm/test/CodeGen/LoongArch/lasx/fp-rounding.ll new file mode 100644 index 0000000..79407c3 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/fp-rounding.ll @@ -0,0 +1,308 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6 +; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lasx < %s | FileCheck %s +; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s + +;; ceilf +define void @ceil_v8f32(ptr %res, ptr %a0) nounwind { +; CHECK-LABEL: ceil_v8f32: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    xvld $xr0, $a1, 0 +; CHECK-NEXT:    xvpickve.w $xr1, $xr0, 5 +; CHECK-NEXT:    vreplvei.w $vr1, $vr1, 0 +; CHECK-NEXT:    vfrintrp.s $vr1, $vr1 +; CHECK-NEXT:    xvpickve.w $xr2, $xr0, 4 +; CHECK-NEXT:    vreplvei.w $vr2, $vr2, 0 +; CHECK-NEXT:    vfrintrp.s $vr2, $vr2 +; CHECK-NEXT:    vextrins.w $vr2, $vr1, 16 +; CHECK-NEXT:    xvpickve.w $xr1, $xr0, 6 +; CHECK-NEXT:    vreplvei.w $vr1, $vr1, 0 +; CHECK-NEXT:    vfrintrp.s $vr1, $vr1 +; CHECK-NEXT:    vextrins.w $vr2, $vr1, 32 +; CHECK-NEXT:    xvpickve.w $xr1, $xr0, 7 +; CHECK-NEXT:    vreplvei.w $vr1, $vr1, 0 +; CHECK-NEXT:    vfrintrp.s $vr1, $vr1 +; CHECK-NEXT:    vextrins.w $vr2, $vr1, 48 +; CHECK-NEXT:    xvpickve.w $xr1, $xr0, 1 +; CHECK-NEXT:    vreplvei.w $vr1, $vr1, 0 +; CHECK-NEXT:    vfrintrp.s $vr1, $vr1 +; CHECK-NEXT:    xvpickve.w $xr3, $xr0, 0 +; CHECK-NEXT:    vreplvei.w $vr3, $vr3, 0 +; CHECK-NEXT:    vfrintrp.s $vr3, $vr3 +; CHECK-NEXT:    vextrins.w $vr3, $vr1, 16 +; CHECK-NEXT:    xvpickve.w $xr1, $xr0, 2 +; CHECK-NEXT:    vreplvei.w $vr1, $vr1, 0 +; CHECK-NEXT:    vfrintrp.s $vr1, $vr1 +; CHECK-NEXT:    vextrins.w $vr3, $vr1, 32 +; CHECK-NEXT:    xvpickve.w $xr0, $xr0, 3 +; CHECK-NEXT:    vreplvei.w $vr0, $vr0, 0 +; CHECK-NEXT:    vfrintrp.s $vr0, $vr0 +; CHECK-NEXT:    vextrins.w $vr3, $vr0, 48 +; CHECK-NEXT:    xvpermi.q $xr3, $xr2, 2 +; CHECK-NEXT:    xvst $xr3, $a0, 0 +; CHECK-NEXT:    ret +entry: +  %v0 = load <8 x float>, ptr %a0 +  %r = call <8 x float> @llvm.ceil.v8f32(<8 x float> %v0) +  store <8 x float> %r, ptr %res +  ret void +} + +;; ceil +define void @ceil_v4f64(ptr %res, ptr %a0) nounwind { +; CHECK-LABEL: ceil_v4f64: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    xvld $xr0, $a1, 0 +; CHECK-NEXT:    xvpickve.d $xr1, $xr0, 3 +; CHECK-NEXT:    vreplvei.d $vr1, $vr1, 0 +; CHECK-NEXT:    vfrintrp.d $vr1, $vr1 +; CHECK-NEXT:    xvpickve.d $xr2, $xr0, 2 +; CHECK-NEXT:    vreplvei.d $vr2, $vr2, 0 +; CHECK-NEXT:    vfrintrp.d $vr2, $vr2 +; CHECK-NEXT:    vextrins.d $vr2, $vr1, 16 +; CHECK-NEXT:    xvpickve.d $xr1, $xr0, 1 +; CHECK-NEXT:    vreplvei.d $vr1, $vr1, 0 +; CHECK-NEXT:    vfrintrp.d $vr1, $vr1 +; CHECK-NEXT:    xvpickve.d $xr0, $xr0, 0 +; CHECK-NEXT:    vreplvei.d $vr0, $vr0, 0 +; CHECK-NEXT:    vfrintrp.d $vr0, $vr0 +; CHECK-NEXT:    vextrins.d $vr0, $vr1, 16 +; CHECK-NEXT:    xvpermi.q $xr0, $xr2, 2 +; CHECK-NEXT:    xvst $xr0, $a0, 0 +; CHECK-NEXT:    ret +entry: +  %v0 = load <4 x double>, ptr %a0 +  %r = call <4 x double> @llvm.ceil.v4f64(<4 x double> %v0) +  store <4 x double> %r, ptr %res +  ret void +} + +;; floorf +define void @floor_v8f32(ptr %res, ptr %a0) nounwind { +; CHECK-LABEL: floor_v8f32: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    xvld $xr0, $a1, 0 +; CHECK-NEXT:    xvpickve.w $xr1, $xr0, 5 +; CHECK-NEXT:    vreplvei.w $vr1, $vr1, 0 +; CHECK-NEXT:    vfrintrm.s $vr1, $vr1 +; CHECK-NEXT:    xvpickve.w $xr2, $xr0, 4 +; CHECK-NEXT:    vreplvei.w $vr2, $vr2, 0 +; CHECK-NEXT:    vfrintrm.s $vr2, $vr2 +; CHECK-NEXT:    vextrins.w $vr2, $vr1, 16 +; CHECK-NEXT:    xvpickve.w $xr1, $xr0, 6 +; CHECK-NEXT:    vreplvei.w $vr1, $vr1, 0 +; CHECK-NEXT:    vfrintrm.s $vr1, $vr1 +; CHECK-NEXT:    vextrins.w $vr2, $vr1, 32 +; CHECK-NEXT:    xvpickve.w $xr1, $xr0, 7 +; CHECK-NEXT:    vreplvei.w $vr1, $vr1, 0 +; CHECK-NEXT:    vfrintrm.s $vr1, $vr1 +; CHECK-NEXT:    vextrins.w $vr2, $vr1, 48 +; CHECK-NEXT:    xvpickve.w $xr1, $xr0, 1 +; CHECK-NEXT:    vreplvei.w $vr1, $vr1, 0 +; CHECK-NEXT:    vfrintrm.s $vr1, $vr1 +; CHECK-NEXT:    xvpickve.w $xr3, $xr0, 0 +; CHECK-NEXT:    vreplvei.w $vr3, $vr3, 0 +; CHECK-NEXT:    vfrintrm.s $vr3, $vr3 +; CHECK-NEXT:    vextrins.w $vr3, $vr1, 16 +; CHECK-NEXT:    xvpickve.w $xr1, $xr0, 2 +; CHECK-NEXT:    vreplvei.w $vr1, $vr1, 0 +; CHECK-NEXT:    vfrintrm.s $vr1, $vr1 +; CHECK-NEXT:    vextrins.w $vr3, $vr1, 32 +; CHECK-NEXT:    xvpickve.w $xr0, $xr0, 3 +; CHECK-NEXT:    vreplvei.w $vr0, $vr0, 0 +; CHECK-NEXT:    vfrintrm.s $vr0, $vr0 +; CHECK-NEXT:    vextrins.w $vr3, $vr0, 48 +; CHECK-NEXT:    xvpermi.q $xr3, $xr2, 2 +; CHECK-NEXT:    xvst $xr3, $a0, 0 +; CHECK-NEXT:    ret +entry: +  %v0 = load <8 x float>, ptr %a0 +  %r = call <8 x float> @llvm.floor.v8f32(<8 x float> %v0) +  store <8 x float> %r, ptr %res +  ret void +} + +;; floor +define void @floor_v4f64(ptr %res, ptr %a0) nounwind { +; CHECK-LABEL: floor_v4f64: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    xvld $xr0, $a1, 0 +; CHECK-NEXT:    xvpickve.d $xr1, $xr0, 3 +; CHECK-NEXT:    vreplvei.d $vr1, $vr1, 0 +; CHECK-NEXT:    vfrintrm.d $vr1, $vr1 +; CHECK-NEXT:    xvpickve.d $xr2, $xr0, 2 +; CHECK-NEXT:    vreplvei.d $vr2, $vr2, 0 +; CHECK-NEXT:    vfrintrm.d $vr2, $vr2 +; CHECK-NEXT:    vextrins.d $vr2, $vr1, 16 +; CHECK-NEXT:    xvpickve.d $xr1, $xr0, 1 +; CHECK-NEXT:    vreplvei.d $vr1, $vr1, 0 +; CHECK-NEXT:    vfrintrm.d $vr1, $vr1 +; CHECK-NEXT:    xvpickve.d $xr0, $xr0, 0 +; CHECK-NEXT:    vreplvei.d $vr0, $vr0, 0 +; CHECK-NEXT:    vfrintrm.d $vr0, $vr0 +; CHECK-NEXT:    vextrins.d $vr0, $vr1, 16 +; CHECK-NEXT:    xvpermi.q $xr0, $xr2, 2 +; CHECK-NEXT:    xvst $xr0, $a0, 0 +; CHECK-NEXT:    ret +entry: +  %v0 = load <4 x double>, ptr %a0 +  %r = call <4 x double> @llvm.floor.v4f64(<4 x double> %v0) +  store <4 x double> %r, ptr %res +  ret void +} + +;; truncf +define void @trunc_v8f32(ptr %res, ptr %a0) nounwind { +; CHECK-LABEL: trunc_v8f32: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    xvld $xr0, $a1, 0 +; CHECK-NEXT:    xvpickve.w $xr1, $xr0, 5 +; CHECK-NEXT:    vreplvei.w $vr1, $vr1, 0 +; CHECK-NEXT:    vfrintrz.s $vr1, $vr1 +; CHECK-NEXT:    xvpickve.w $xr2, $xr0, 4 +; CHECK-NEXT:    vreplvei.w $vr2, $vr2, 0 +; CHECK-NEXT:    vfrintrz.s $vr2, $vr2 +; CHECK-NEXT:    vextrins.w $vr2, $vr1, 16 +; CHECK-NEXT:    xvpickve.w $xr1, $xr0, 6 +; CHECK-NEXT:    vreplvei.w $vr1, $vr1, 0 +; CHECK-NEXT:    vfrintrz.s $vr1, $vr1 +; CHECK-NEXT:    vextrins.w $vr2, $vr1, 32 +; CHECK-NEXT:    xvpickve.w $xr1, $xr0, 7 +; CHECK-NEXT:    vreplvei.w $vr1, $vr1, 0 +; CHECK-NEXT:    vfrintrz.s $vr1, $vr1 +; CHECK-NEXT:    vextrins.w $vr2, $vr1, 48 +; CHECK-NEXT:    xvpickve.w $xr1, $xr0, 1 +; CHECK-NEXT:    vreplvei.w $vr1, $vr1, 0 +; CHECK-NEXT:    vfrintrz.s $vr1, $vr1 +; CHECK-NEXT:    xvpickve.w $xr3, $xr0, 0 +; CHECK-NEXT:    vreplvei.w $vr3, $vr3, 0 +; CHECK-NEXT:    vfrintrz.s $vr3, $vr3 +; CHECK-NEXT:    vextrins.w $vr3, $vr1, 16 +; CHECK-NEXT:    xvpickve.w $xr1, $xr0, 2 +; CHECK-NEXT:    vreplvei.w $vr1, $vr1, 0 +; CHECK-NEXT:    vfrintrz.s $vr1, $vr1 +; CHECK-NEXT:    vextrins.w $vr3, $vr1, 32 +; CHECK-NEXT:    xvpickve.w $xr0, $xr0, 3 +; CHECK-NEXT:    vreplvei.w $vr0, $vr0, 0 +; CHECK-NEXT:    vfrintrz.s $vr0, $vr0 +; CHECK-NEXT:    vextrins.w $vr3, $vr0, 48 +; CHECK-NEXT:    xvpermi.q $xr3, $xr2, 2 +; CHECK-NEXT:    xvst $xr3, $a0, 0 +; CHECK-NEXT:    ret +entry: +  %v0 = load <8 x float>, ptr %a0 +  %r = call <8 x float> @llvm.trunc.v8f32(<8 x float> %v0) +  store <8 x float> %r, ptr %res +  ret void +} + +;; trunc +define void @trunc_v4f64(ptr %res, ptr %a0) nounwind { +; CHECK-LABEL: trunc_v4f64: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    xvld $xr0, $a1, 0 +; CHECK-NEXT:    xvpickve.d $xr1, $xr0, 3 +; CHECK-NEXT:    vreplvei.d $vr1, $vr1, 0 +; CHECK-NEXT:    vfrintrz.d $vr1, $vr1 +; CHECK-NEXT:    xvpickve.d $xr2, $xr0, 2 +; CHECK-NEXT:    vreplvei.d $vr2, $vr2, 0 +; CHECK-NEXT:    vfrintrz.d $vr2, $vr2 +; CHECK-NEXT:    vextrins.d $vr2, $vr1, 16 +; CHECK-NEXT:    xvpickve.d $xr1, $xr0, 1 +; CHECK-NEXT:    vreplvei.d $vr1, $vr1, 0 +; CHECK-NEXT:    vfrintrz.d $vr1, $vr1 +; CHECK-NEXT:    xvpickve.d $xr0, $xr0, 0 +; CHECK-NEXT:    vreplvei.d $vr0, $vr0, 0 +; CHECK-NEXT:    vfrintrz.d $vr0, $vr0 +; CHECK-NEXT:    vextrins.d $vr0, $vr1, 16 +; CHECK-NEXT:    xvpermi.q $xr0, $xr2, 2 +; CHECK-NEXT:    xvst $xr0, $a0, 0 +; CHECK-NEXT:    ret +entry: +  %v0 = load <4 x double>, ptr %a0 +  %r = call <4 x double> @llvm.trunc.v4f64(<4 x double> %v0) +  store <4 x double> %r, ptr %res +  ret void +} + +;; roundevenf +define void @roundeven_v8f32(ptr %res, ptr %a0) nounwind { +; CHECK-LABEL: roundeven_v8f32: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    xvld $xr0, $a1, 0 +; CHECK-NEXT:    xvpickve.w $xr1, $xr0, 5 +; CHECK-NEXT:    vreplvei.w $vr1, $vr1, 0 +; CHECK-NEXT:    vfrintrne.s $vr1, $vr1 +; CHECK-NEXT:    xvpickve.w $xr2, $xr0, 4 +; CHECK-NEXT:    vreplvei.w $vr2, $vr2, 0 +; CHECK-NEXT:    vfrintrne.s $vr2, $vr2 +; CHECK-NEXT:    vextrins.w $vr2, $vr1, 16 +; CHECK-NEXT:    xvpickve.w $xr1, $xr0, 6 +; CHECK-NEXT:    vreplvei.w $vr1, $vr1, 0 +; CHECK-NEXT:    vfrintrne.s $vr1, $vr1 +; CHECK-NEXT:    vextrins.w $vr2, $vr1, 32 +; CHECK-NEXT:    xvpickve.w $xr1, $xr0, 7 +; CHECK-NEXT:    vreplvei.w $vr1, $vr1, 0 +; CHECK-NEXT:    vfrintrne.s $vr1, $vr1 +; CHECK-NEXT:    vextrins.w $vr2, $vr1, 48 +; CHECK-NEXT:    xvpickve.w $xr1, $xr0, 1 +; CHECK-NEXT:    vreplvei.w $vr1, $vr1, 0 +; CHECK-NEXT:    vfrintrne.s $vr1, $vr1 +; CHECK-NEXT:    xvpickve.w $xr3, $xr0, 0 +; CHECK-NEXT:    vreplvei.w $vr3, $vr3, 0 +; CHECK-NEXT:    vfrintrne.s $vr3, $vr3 +; CHECK-NEXT:    vextrins.w $vr3, $vr1, 16 +; CHECK-NEXT:    xvpickve.w $xr1, $xr0, 2 +; CHECK-NEXT:    vreplvei.w $vr1, $vr1, 0 +; CHECK-NEXT:    vfrintrne.s $vr1, $vr1 +; CHECK-NEXT:    vextrins.w $vr3, $vr1, 32 +; CHECK-NEXT:    xvpickve.w $xr0, $xr0, 3 +; CHECK-NEXT:    vreplvei.w $vr0, $vr0, 0 +; CHECK-NEXT:    vfrintrne.s $vr0, $vr0 +; CHECK-NEXT:    vextrins.w $vr3, $vr0, 48 +; CHECK-NEXT:    xvpermi.q $xr3, $xr2, 2 +; CHECK-NEXT:    xvst $xr3, $a0, 0 +; CHECK-NEXT:    ret +entry: +  %v0 = load <8 x float>, ptr %a0 +  %r = call <8 x float> @llvm.roundeven.v8f32(<8 x float> %v0) +  store <8 x float> %r, ptr %res +  ret void +} + +;; roundeven +define void @roundeven_v4f64(ptr %res, ptr %a0) nounwind { +; CHECK-LABEL: roundeven_v4f64: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    xvld $xr0, $a1, 0 +; CHECK-NEXT:    xvpickve.d $xr1, $xr0, 3 +; CHECK-NEXT:    vreplvei.d $vr1, $vr1, 0 +; CHECK-NEXT:    vfrintrne.d $vr1, $vr1 +; CHECK-NEXT:    xvpickve.d $xr2, $xr0, 2 +; CHECK-NEXT:    vreplvei.d $vr2, $vr2, 0 +; CHECK-NEXT:    vfrintrne.d $vr2, $vr2 +; CHECK-NEXT:    vextrins.d $vr2, $vr1, 16 +; CHECK-NEXT:    xvpickve.d $xr1, $xr0, 1 +; CHECK-NEXT:    vreplvei.d $vr1, $vr1, 0 +; CHECK-NEXT:    vfrintrne.d $vr1, $vr1 +; CHECK-NEXT:    xvpickve.d $xr0, $xr0, 0 +; CHECK-NEXT:    vreplvei.d $vr0, $vr0, 0 +; CHECK-NEXT:    vfrintrne.d $vr0, $vr0 +; CHECK-NEXT:    vextrins.d $vr0, $vr1, 16 +; CHECK-NEXT:    xvpermi.q $xr0, $xr2, 2 +; CHECK-NEXT:    xvst $xr0, $a0, 0 +; CHECK-NEXT:    ret +entry: +  %v0 = load <4 x double>, ptr %a0 +  %r = call <4 x double> @llvm.roundeven.v4f64(<4 x double> %v0) +  store <4 x double> %r, ptr %res +  ret void +} + +declare <8 x float> @llvm.ceil.v8f32(<8 x float>) +declare <4 x double> @llvm.ceil.v4f64(<4 x double>) +declare <8 x float> @llvm.floor.v8f32(<8 x float>) +declare <4 x double> @llvm.floor.v4f64(<4 x double>) +declare <8 x float> @llvm.trunc.v8f32(<8 x float>) +declare <4 x double> @llvm.trunc.v4f64(<4 x double>) +declare <8 x float> @llvm.roundeven.v8f32(<8 x float>) +declare <4 x double> @llvm.roundeven.v4f64(<4 x double>) diff --git a/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/avg.ll b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/avg.ll index 2a5a8fa..5c5c199 100644 --- a/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/avg.ll +++ b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/avg.ll @@ -1,14 +1,13 @@  ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6 -; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lasx < %s | FileCheck %s -; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s +; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lasx < %s | FileCheck %s --check-prefixes=CHECK,LA32 +; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s --check-prefixes=CHECK,LA64  define void @xvavg_b(ptr %res, ptr %a, ptr %b) nounwind {  ; CHECK-LABEL: xvavg_b:  ; CHECK:       # %bb.0: # %entry  ; CHECK-NEXT:    xvld $xr0, $a1, 0  ; CHECK-NEXT:    xvld $xr1, $a2, 0 -; CHECK-NEXT:    xvadd.b $xr0, $xr0, $xr1 -; CHECK-NEXT:    xvsrai.b $xr0, $xr0, 1 +; CHECK-NEXT:    xvavg.b $xr0, $xr0, $xr1  ; CHECK-NEXT:    xvst $xr0, $a0, 0  ; CHECK-NEXT:    ret  entry: @@ -25,8 +24,7 @@ define void @xvavg_h(ptr %res, ptr %a, ptr %b) nounwind {  ; CHECK:       # %bb.0: # %entry  ; CHECK-NEXT:    xvld $xr0, $a1, 0  ; CHECK-NEXT:    xvld $xr1, $a2, 0 -; CHECK-NEXT:    xvadd.h $xr0, $xr0, $xr1 -; CHECK-NEXT:    xvsrai.h $xr0, $xr0, 1 +; CHECK-NEXT:    xvavg.h $xr0, $xr0, $xr1  ; CHECK-NEXT:    xvst $xr0, $a0, 0  ; CHECK-NEXT:    ret  entry: @@ -43,8 +41,7 @@ define void @xvavg_w(ptr %res, ptr %a, ptr %b) nounwind {  ; CHECK:       # %bb.0: # %entry  ; CHECK-NEXT:    xvld $xr0, $a1, 0  ; CHECK-NEXT:    xvld $xr1, $a2, 0 -; CHECK-NEXT:    xvadd.w $xr0, $xr0, $xr1 -; CHECK-NEXT:    xvsrai.w $xr0, $xr0, 1 +; CHECK-NEXT:    xvavg.w $xr0, $xr0, $xr1  ; CHECK-NEXT:    xvst $xr0, $a0, 0  ; CHECK-NEXT:    ret  entry: @@ -57,14 +54,22 @@ entry:  }  define void @xvavg_d(ptr %res, ptr %a, ptr %b) nounwind { -; CHECK-LABEL: xvavg_d: -; CHECK:       # %bb.0: # %entry -; CHECK-NEXT:    xvld $xr0, $a1, 0 -; CHECK-NEXT:    xvld $xr1, $a2, 0 -; CHECK-NEXT:    xvadd.d $xr0, $xr0, $xr1 -; CHECK-NEXT:    xvsrai.d $xr0, $xr0, 1 -; CHECK-NEXT:    xvst $xr0, $a0, 0 -; CHECK-NEXT:    ret +; LA32-LABEL: xvavg_d: +; LA32:       # %bb.0: # %entry +; LA32-NEXT:    xvld $xr0, $a1, 0 +; LA32-NEXT:    xvld $xr1, $a2, 0 +; LA32-NEXT:    xvadd.d $xr0, $xr0, $xr1 +; LA32-NEXT:    xvsrai.d $xr0, $xr0, 1 +; LA32-NEXT:    xvst $xr0, $a0, 0 +; LA32-NEXT:    ret +; +; LA64-LABEL: xvavg_d: +; LA64:       # %bb.0: # %entry +; LA64-NEXT:    xvld $xr0, $a1, 0 +; LA64-NEXT:    xvld $xr1, $a2, 0 +; LA64-NEXT:    xvavg.d $xr0, $xr0, $xr1 +; LA64-NEXT:    xvst $xr0, $a0, 0 +; LA64-NEXT:    ret  entry:    %va = load <4 x i64>, ptr %a    %vb = load <4 x i64>, ptr %b @@ -79,8 +84,7 @@ define void @xvavg_bu(ptr %res, ptr %a, ptr %b) nounwind {  ; CHECK:       # %bb.0: # %entry  ; CHECK-NEXT:    xvld $xr0, $a1, 0  ; CHECK-NEXT:    xvld $xr1, $a2, 0 -; CHECK-NEXT:    xvadd.b $xr0, $xr0, $xr1 -; CHECK-NEXT:    xvsrli.b $xr0, $xr0, 1 +; CHECK-NEXT:    xvavg.bu $xr0, $xr0, $xr1  ; CHECK-NEXT:    xvst $xr0, $a0, 0  ; CHECK-NEXT:    ret  entry: @@ -97,8 +101,7 @@ define void @xvavg_hu(ptr %res, ptr %a, ptr %b) nounwind {  ; CHECK:       # %bb.0: # %entry  ; CHECK-NEXT:    xvld $xr0, $a1, 0  ; CHECK-NEXT:    xvld $xr1, $a2, 0 -; CHECK-NEXT:    xvadd.h $xr0, $xr0, $xr1 -; CHECK-NEXT:    xvsrli.h $xr0, $xr0, 1 +; CHECK-NEXT:    xvavg.hu $xr0, $xr0, $xr1  ; CHECK-NEXT:    xvst $xr0, $a0, 0  ; CHECK-NEXT:    ret  entry: @@ -115,8 +118,7 @@ define void @xvavg_wu(ptr %res, ptr %a, ptr %b) nounwind {  ; CHECK:       # %bb.0: # %entry  ; CHECK-NEXT:    xvld $xr0, $a1, 0  ; CHECK-NEXT:    xvld $xr1, $a2, 0 -; CHECK-NEXT:    xvadd.w $xr0, $xr0, $xr1 -; CHECK-NEXT:    xvsrli.w $xr0, $xr0, 1 +; CHECK-NEXT:    xvavg.wu $xr0, $xr0, $xr1  ; CHECK-NEXT:    xvst $xr0, $a0, 0  ; CHECK-NEXT:    ret  entry: @@ -129,14 +131,22 @@ entry:  }  define void @xvavg_du(ptr %res, ptr %a, ptr %b) nounwind { -; CHECK-LABEL: xvavg_du: -; CHECK:       # %bb.0: # %entry -; CHECK-NEXT:    xvld $xr0, $a1, 0 -; CHECK-NEXT:    xvld $xr1, $a2, 0 -; CHECK-NEXT:    xvadd.d $xr0, $xr0, $xr1 -; CHECK-NEXT:    xvsrli.d $xr0, $xr0, 1 -; CHECK-NEXT:    xvst $xr0, $a0, 0 -; CHECK-NEXT:    ret +; LA32-LABEL: xvavg_du: +; LA32:       # %bb.0: # %entry +; LA32-NEXT:    xvld $xr0, $a1, 0 +; LA32-NEXT:    xvld $xr1, $a2, 0 +; LA32-NEXT:    xvadd.d $xr0, $xr0, $xr1 +; LA32-NEXT:    xvsrli.d $xr0, $xr0, 1 +; LA32-NEXT:    xvst $xr0, $a0, 0 +; LA32-NEXT:    ret +; +; LA64-LABEL: xvavg_du: +; LA64:       # %bb.0: # %entry +; LA64-NEXT:    xvld $xr0, $a1, 0 +; LA64-NEXT:    xvld $xr1, $a2, 0 +; LA64-NEXT:    xvavg.du $xr0, $xr0, $xr1 +; LA64-NEXT:    xvst $xr0, $a0, 0 +; LA64-NEXT:    ret  entry:    %va = load <4 x i64>, ptr %a    %vb = load <4 x i64>, ptr %b @@ -151,9 +161,7 @@ define void @xvavgr_b(ptr %res, ptr %a, ptr %b) nounwind {  ; CHECK:       # %bb.0: # %entry  ; CHECK-NEXT:    xvld $xr0, $a1, 0  ; CHECK-NEXT:    xvld $xr1, $a2, 0 -; CHECK-NEXT:    xvadd.b $xr0, $xr0, $xr1 -; CHECK-NEXT:    xvaddi.bu $xr0, $xr0, 1 -; CHECK-NEXT:    xvsrai.b $xr0, $xr0, 1 +; CHECK-NEXT:    xvavgr.b $xr0, $xr0, $xr1  ; CHECK-NEXT:    xvst $xr0, $a0, 0  ; CHECK-NEXT:    ret  entry: @@ -171,9 +179,7 @@ define void @xvavgr_h(ptr %res, ptr %a, ptr %b) nounwind {  ; CHECK:       # %bb.0: # %entry  ; CHECK-NEXT:    xvld $xr0, $a1, 0  ; CHECK-NEXT:    xvld $xr1, $a2, 0 -; CHECK-NEXT:    xvadd.h $xr0, $xr0, $xr1 -; CHECK-NEXT:    xvaddi.hu $xr0, $xr0, 1 -; CHECK-NEXT:    xvsrai.h $xr0, $xr0, 1 +; CHECK-NEXT:    xvavgr.h $xr0, $xr0, $xr1  ; CHECK-NEXT:    xvst $xr0, $a0, 0  ; CHECK-NEXT:    ret  entry: @@ -191,9 +197,7 @@ define void @xvavgr_w(ptr %res, ptr %a, ptr %b) nounwind {  ; CHECK:       # %bb.0: # %entry  ; CHECK-NEXT:    xvld $xr0, $a1, 0  ; CHECK-NEXT:    xvld $xr1, $a2, 0 -; CHECK-NEXT:    xvadd.w $xr0, $xr0, $xr1 -; CHECK-NEXT:    xvaddi.wu $xr0, $xr0, 1 -; CHECK-NEXT:    xvsrai.w $xr0, $xr0, 1 +; CHECK-NEXT:    xvavgr.w $xr0, $xr0, $xr1  ; CHECK-NEXT:    xvst $xr0, $a0, 0  ; CHECK-NEXT:    ret  entry: @@ -207,15 +211,23 @@ entry:  }  define void @xvavgr_d(ptr %res, ptr %a, ptr %b) nounwind { -; CHECK-LABEL: xvavgr_d: -; CHECK:       # %bb.0: # %entry -; CHECK-NEXT:    xvld $xr0, $a1, 0 -; CHECK-NEXT:    xvld $xr1, $a2, 0 -; CHECK-NEXT:    xvadd.d $xr0, $xr0, $xr1 -; CHECK-NEXT:    xvaddi.du $xr0, $xr0, 1 -; CHECK-NEXT:    xvsrai.d $xr0, $xr0, 1 -; CHECK-NEXT:    xvst $xr0, $a0, 0 -; CHECK-NEXT:    ret +; LA32-LABEL: xvavgr_d: +; LA32:       # %bb.0: # %entry +; LA32-NEXT:    xvld $xr0, $a1, 0 +; LA32-NEXT:    xvld $xr1, $a2, 0 +; LA32-NEXT:    xvadd.d $xr0, $xr0, $xr1 +; LA32-NEXT:    xvaddi.du $xr0, $xr0, 1 +; LA32-NEXT:    xvsrai.d $xr0, $xr0, 1 +; LA32-NEXT:    xvst $xr0, $a0, 0 +; LA32-NEXT:    ret +; +; LA64-LABEL: xvavgr_d: +; LA64:       # %bb.0: # %entry +; LA64-NEXT:    xvld $xr0, $a1, 0 +; LA64-NEXT:    xvld $xr1, $a2, 0 +; LA64-NEXT:    xvavgr.d $xr0, $xr0, $xr1 +; LA64-NEXT:    xvst $xr0, $a0, 0 +; LA64-NEXT:    ret  entry:    %va = load <4 x i64>, ptr %a    %vb = load <4 x i64>, ptr %b @@ -231,9 +243,7 @@ define void @xvavgr_bu(ptr %res, ptr %a, ptr %b) nounwind {  ; CHECK:       # %bb.0: # %entry  ; CHECK-NEXT:    xvld $xr0, $a1, 0  ; CHECK-NEXT:    xvld $xr1, $a2, 0 -; CHECK-NEXT:    xvadd.b $xr0, $xr0, $xr1 -; CHECK-NEXT:    xvaddi.bu $xr0, $xr0, 1 -; CHECK-NEXT:    xvsrli.b $xr0, $xr0, 1 +; CHECK-NEXT:    xvavgr.bu $xr0, $xr0, $xr1  ; CHECK-NEXT:    xvst $xr0, $a0, 0  ; CHECK-NEXT:    ret  entry: @@ -251,9 +261,7 @@ define void @xvavgr_hu(ptr %res, ptr %a, ptr %b) nounwind {  ; CHECK:       # %bb.0: # %entry  ; CHECK-NEXT:    xvld $xr0, $a1, 0  ; CHECK-NEXT:    xvld $xr1, $a2, 0 -; CHECK-NEXT:    xvadd.h $xr0, $xr0, $xr1 -; CHECK-NEXT:    xvaddi.hu $xr0, $xr0, 1 -; CHECK-NEXT:    xvsrli.h $xr0, $xr0, 1 +; CHECK-NEXT:    xvavgr.hu $xr0, $xr0, $xr1  ; CHECK-NEXT:    xvst $xr0, $a0, 0  ; CHECK-NEXT:    ret  entry: @@ -271,9 +279,7 @@ define void @xvavgr_wu(ptr %res, ptr %a, ptr %b) nounwind {  ; CHECK:       # %bb.0: # %entry  ; CHECK-NEXT:    xvld $xr0, $a1, 0  ; CHECK-NEXT:    xvld $xr1, $a2, 0 -; CHECK-NEXT:    xvadd.w $xr0, $xr0, $xr1 -; CHECK-NEXT:    xvaddi.wu $xr0, $xr0, 1 -; CHECK-NEXT:    xvsrli.w $xr0, $xr0, 1 +; CHECK-NEXT:    xvavgr.wu $xr0, $xr0, $xr1  ; CHECK-NEXT:    xvst $xr0, $a0, 0  ; CHECK-NEXT:    ret  entry: @@ -287,15 +293,23 @@ entry:  }  define void @xvavgr_du(ptr %res, ptr %a, ptr %b) nounwind { -; CHECK-LABEL: xvavgr_du: -; CHECK:       # %bb.0: # %entry -; CHECK-NEXT:    xvld $xr0, $a1, 0 -; CHECK-NEXT:    xvld $xr1, $a2, 0 -; CHECK-NEXT:    xvadd.d $xr0, $xr0, $xr1 -; CHECK-NEXT:    xvaddi.du $xr0, $xr0, 1 -; CHECK-NEXT:    xvsrli.d $xr0, $xr0, 1 -; CHECK-NEXT:    xvst $xr0, $a0, 0 -; CHECK-NEXT:    ret +; LA32-LABEL: xvavgr_du: +; LA32:       # %bb.0: # %entry +; LA32-NEXT:    xvld $xr0, $a1, 0 +; LA32-NEXT:    xvld $xr1, $a2, 0 +; LA32-NEXT:    xvadd.d $xr0, $xr0, $xr1 +; LA32-NEXT:    xvaddi.du $xr0, $xr0, 1 +; LA32-NEXT:    xvsrli.d $xr0, $xr0, 1 +; LA32-NEXT:    xvst $xr0, $a0, 0 +; LA32-NEXT:    ret +; +; LA64-LABEL: xvavgr_du: +; LA64:       # %bb.0: # %entry +; LA64-NEXT:    xvld $xr0, $a1, 0 +; LA64-NEXT:    xvld $xr1, $a2, 0 +; LA64-NEXT:    xvavgr.du $xr0, $xr0, $xr1 +; LA64-NEXT:    xvst $xr0, $a0, 0 +; LA64-NEXT:    ret  entry:    %va = load <4 x i64>, ptr %a    %vb = load <4 x i64>, ptr %b diff --git a/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/avgfloor-ceil.ll b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/avgfloor-ceil.ll new file mode 100644 index 0000000..c82adcb --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/avgfloor-ceil.ll @@ -0,0 +1,379 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6 +; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lasx < %s | FileCheck %s +; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s + +define void @xvavg_b(ptr %res, ptr %a, ptr %b) nounwind { +; CHECK-LABEL: xvavg_b: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    xvld $xr0, $a1, 0 +; CHECK-NEXT:    xvld $xr1, $a2, 0 +; CHECK-NEXT:    xvand.v $xr2, $xr0, $xr1 +; CHECK-NEXT:    xvxor.v $xr0, $xr0, $xr1 +; CHECK-NEXT:    xvsrai.b $xr0, $xr0, 1 +; CHECK-NEXT:    xvadd.b $xr0, $xr2, $xr0 +; CHECK-NEXT:    xvst $xr0, $a0, 0 +; CHECK-NEXT:    ret +entry: +  %va = load <32 x i8>, ptr %a +  %vb = load <32 x i8>, ptr %b +  %ea = sext <32 x i8> %va to <32 x i16> +  %eb = sext <32 x i8> %vb to <32 x i16> +  %add = add <32 x i16> %ea, %eb +  %shr = lshr <32 x i16> %add, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1> +  %r = trunc <32 x i16> %shr to <32 x i8> +  store <32 x i8> %r, ptr %res +  ret void +} + +define void @xvavg_h(ptr %res, ptr %a, ptr %b) nounwind { +; CHECK-LABEL: xvavg_h: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    xvld $xr0, $a1, 0 +; CHECK-NEXT:    xvld $xr1, $a2, 0 +; CHECK-NEXT:    xvand.v $xr2, $xr0, $xr1 +; CHECK-NEXT:    xvxor.v $xr0, $xr0, $xr1 +; CHECK-NEXT:    xvsrai.h $xr0, $xr0, 1 +; CHECK-NEXT:    xvadd.h $xr0, $xr2, $xr0 +; CHECK-NEXT:    xvst $xr0, $a0, 0 +; CHECK-NEXT:    ret +entry: +  %va = load <16 x i16>, ptr %a +  %vb = load <16 x i16>, ptr %b +  %ea = sext <16 x i16> %va to <16 x i32> +  %eb = sext <16 x i16> %vb to <16 x i32> +  %add = add <16 x i32> %ea, %eb +  %shr = lshr <16 x i32> %add, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1> +  %r = trunc <16 x i32> %shr to <16 x i16> +  store <16 x i16> %r, ptr %res +  ret void +} + +define void @xvavg_w(ptr %res, ptr %a, ptr %b) nounwind { +; CHECK-LABEL: xvavg_w: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    xvld $xr0, $a1, 0 +; CHECK-NEXT:    xvld $xr1, $a2, 0 +; CHECK-NEXT:    xvand.v $xr2, $xr0, $xr1 +; CHECK-NEXT:    xvxor.v $xr0, $xr0, $xr1 +; CHECK-NEXT:    xvsrai.w $xr0, $xr0, 1 +; CHECK-NEXT:    xvadd.w $xr0, $xr2, $xr0 +; CHECK-NEXT:    xvst $xr0, $a0, 0 +; CHECK-NEXT:    ret +entry: +  %va = load <8 x i32>, ptr %a +  %vb = load <8 x i32>, ptr %b +  %ea = sext <8 x i32> %va to <8 x i64> +  %eb = sext <8 x i32> %vb to <8 x i64> +  %add = add <8 x i64> %ea, %eb +  %shr = lshr <8 x i64> %add, <i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1> +  %r = trunc <8 x i64> %shr to <8 x i32> +  store <8 x i32> %r, ptr %res +  ret void +} + +define void @xvavg_d(ptr %res, ptr %a, ptr %b) nounwind { +; CHECK-LABEL: xvavg_d: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    xvld $xr0, $a1, 0 +; CHECK-NEXT:    xvld $xr1, $a2, 0 +; CHECK-NEXT:    xvand.v $xr2, $xr0, $xr1 +; CHECK-NEXT:    xvxor.v $xr0, $xr0, $xr1 +; CHECK-NEXT:    xvsrai.d $xr0, $xr0, 1 +; CHECK-NEXT:    xvadd.d $xr0, $xr2, $xr0 +; CHECK-NEXT:    xvst $xr0, $a0, 0 +; CHECK-NEXT:    ret +entry: +  %va = load <4 x i64>, ptr %a +  %vb = load <4 x i64>, ptr %b +  %ea = sext <4 x i64> %va to <4 x i128> +  %eb = sext <4 x i64> %vb to <4 x i128> +  %add = add <4 x i128> %ea, %eb +  %shr = lshr <4 x i128> %add, <i128 1, i128 1, i128 1, i128 1> +  %r = trunc <4 x i128> %shr to <4 x i64> +  store <4 x i64> %r, ptr %res +  ret void +} + +define void @xvavg_bu(ptr %res, ptr %a, ptr %b) nounwind { +; CHECK-LABEL: xvavg_bu: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    xvld $xr0, $a1, 0 +; CHECK-NEXT:    xvld $xr1, $a2, 0 +; CHECK-NEXT:    xvand.v $xr2, $xr0, $xr1 +; CHECK-NEXT:    xvxor.v $xr0, $xr0, $xr1 +; CHECK-NEXT:    xvsrli.b $xr0, $xr0, 1 +; CHECK-NEXT:    xvadd.b $xr0, $xr2, $xr0 +; CHECK-NEXT:    xvst $xr0, $a0, 0 +; CHECK-NEXT:    ret +entry: +  %va = load <32 x i8>, ptr %a +  %vb = load <32 x i8>, ptr %b +  %ea = zext <32 x i8> %va to <32 x i16> +  %eb = zext <32 x i8> %vb to <32 x i16> +  %add = add <32 x i16> %ea, %eb +  %shr = lshr <32 x i16> %add, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1> +  %r = trunc <32 x i16> %shr to <32 x i8> +  store <32 x i8> %r, ptr %res +  ret void +} + +define void @xvavg_hu(ptr %res, ptr %a, ptr %b) nounwind { +; CHECK-LABEL: xvavg_hu: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    xvld $xr0, $a1, 0 +; CHECK-NEXT:    xvld $xr1, $a2, 0 +; CHECK-NEXT:    xvand.v $xr2, $xr0, $xr1 +; CHECK-NEXT:    xvxor.v $xr0, $xr0, $xr1 +; CHECK-NEXT:    xvsrli.h $xr0, $xr0, 1 +; CHECK-NEXT:    xvadd.h $xr0, $xr2, $xr0 +; CHECK-NEXT:    xvst $xr0, $a0, 0 +; CHECK-NEXT:    ret +entry: +  %va = load <16 x i16>, ptr %a +  %vb = load <16 x i16>, ptr %b +  %ea = zext <16 x i16> %va to <16 x i32> +  %eb = zext <16 x i16> %vb to <16 x i32> +  %add = add <16 x i32> %ea, %eb +  %shr = lshr <16 x i32> %add, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1> +  %r = trunc <16 x i32> %shr to <16 x i16> +  store <16 x i16> %r, ptr %res +  ret void +} + +define void @xvavg_wu(ptr %res, ptr %a, ptr %b) nounwind { +; CHECK-LABEL: xvavg_wu: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    xvld $xr0, $a1, 0 +; CHECK-NEXT:    xvld $xr1, $a2, 0 +; CHECK-NEXT:    xvand.v $xr2, $xr0, $xr1 +; CHECK-NEXT:    xvxor.v $xr0, $xr0, $xr1 +; CHECK-NEXT:    xvsrli.w $xr0, $xr0, 1 +; CHECK-NEXT:    xvadd.w $xr0, $xr2, $xr0 +; CHECK-NEXT:    xvst $xr0, $a0, 0 +; CHECK-NEXT:    ret +entry: +  %va = load <8 x i32>, ptr %a +  %vb = load <8 x i32>, ptr %b +  %ea = zext <8 x i32> %va to <8 x i64> +  %eb = zext <8 x i32> %vb to <8 x i64> +  %add = add <8 x i64> %ea, %eb +  %shr = lshr <8 x i64> %add, <i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1> +  %r = trunc <8 x i64> %shr to <8 x i32> +  store <8 x i32> %r, ptr %res +  ret void +} + +define void @xvavg_du(ptr %res, ptr %a, ptr %b) nounwind { +; CHECK-LABEL: xvavg_du: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    xvld $xr0, $a1, 0 +; CHECK-NEXT:    xvld $xr1, $a2, 0 +; CHECK-NEXT:    xvand.v $xr2, $xr0, $xr1 +; CHECK-NEXT:    xvxor.v $xr0, $xr0, $xr1 +; CHECK-NEXT:    xvsrli.d $xr0, $xr0, 1 +; CHECK-NEXT:    xvadd.d $xr0, $xr2, $xr0 +; CHECK-NEXT:    xvst $xr0, $a0, 0 +; CHECK-NEXT:    ret +entry: +  %va = load <4 x i64>, ptr %a +  %vb = load <4 x i64>, ptr %b +  %ea = zext <4 x i64> %va to <4 x i128> +  %eb = zext <4 x i64> %vb to <4 x i128> +  %add = add <4 x i128> %ea, %eb +  %shr = lshr <4 x i128> %add, <i128 1, i128 1, i128 1, i128 1> +  %r = trunc <4 x i128> %shr to <4 x i64> +  store <4 x i64> %r, ptr %res +  ret void +} + +define void @xvavgr_b(ptr %res, ptr %a, ptr %b) nounwind { +; CHECK-LABEL: xvavgr_b: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    xvld $xr0, $a1, 0 +; CHECK-NEXT:    xvld $xr1, $a2, 0 +; CHECK-NEXT:    xvor.v $xr2, $xr0, $xr1 +; CHECK-NEXT:    xvxor.v $xr0, $xr0, $xr1 +; CHECK-NEXT:    xvsrai.b $xr0, $xr0, 1 +; CHECK-NEXT:    xvsub.b $xr0, $xr2, $xr0 +; CHECK-NEXT:    xvst $xr0, $a0, 0 +; CHECK-NEXT:    ret +entry: +  %va = load <32 x i8>, ptr %a +  %vb = load <32 x i8>, ptr %b +  %ea = sext <32 x i8> %va to <32 x i16> +  %eb = sext <32 x i8> %vb to <32 x i16> +  %add = add <32 x i16> %ea, %eb +  %add1 = add <32 x i16> %add, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1> +  %shr = lshr <32 x i16> %add1, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1> +  %r = trunc <32 x i16> %shr to <32 x i8> +  store <32 x i8> %r, ptr %res +  ret void +} + +define void @xvavgr_h(ptr %res, ptr %a, ptr %b) nounwind { +; CHECK-LABEL: xvavgr_h: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    xvld $xr0, $a1, 0 +; CHECK-NEXT:    xvld $xr1, $a2, 0 +; CHECK-NEXT:    xvor.v $xr2, $xr0, $xr1 +; CHECK-NEXT:    xvxor.v $xr0, $xr0, $xr1 +; CHECK-NEXT:    xvsrai.h $xr0, $xr0, 1 +; CHECK-NEXT:    xvsub.h $xr0, $xr2, $xr0 +; CHECK-NEXT:    xvst $xr0, $a0, 0 +; CHECK-NEXT:    ret +entry: +  %va = load <16 x i16>, ptr %a +  %vb = load <16 x i16>, ptr %b +  %ea = sext <16 x i16> %va to <16 x i32> +  %eb = sext <16 x i16> %vb to <16 x i32> +  %add = add <16 x i32> %ea, %eb +  %add1 = add <16 x i32> %add, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1> +  %shr = lshr <16 x i32> %add1, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1> +  %r = trunc <16 x i32> %shr to <16 x i16> +  store <16 x i16> %r, ptr %res +  ret void +} + +define void @xvavgr_w(ptr %res, ptr %a, ptr %b) nounwind { +; CHECK-LABEL: xvavgr_w: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    xvld $xr0, $a1, 0 +; CHECK-NEXT:    xvld $xr1, $a2, 0 +; CHECK-NEXT:    xvor.v $xr2, $xr0, $xr1 +; CHECK-NEXT:    xvxor.v $xr0, $xr0, $xr1 +; CHECK-NEXT:    xvsrai.w $xr0, $xr0, 1 +; CHECK-NEXT:    xvsub.w $xr0, $xr2, $xr0 +; CHECK-NEXT:    xvst $xr0, $a0, 0 +; CHECK-NEXT:    ret +entry: +  %va = load <8 x i32>, ptr %a +  %vb = load <8 x i32>, ptr %b +  %ea = sext <8 x i32> %va to <8 x i64> +  %eb = sext <8 x i32> %vb to <8 x i64> +  %add = add <8 x i64> %ea, %eb +  %add1 = add <8 x i64> %add, <i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1> +  %shr = lshr <8 x i64> %add1, <i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1> +  %r = trunc <8 x i64> %shr to <8 x i32> +  store <8 x i32> %r, ptr %res +  ret void +} + +define void @xvavgr_d(ptr %res, ptr %a, ptr %b) nounwind { +; CHECK-LABEL: xvavgr_d: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    xvld $xr0, $a1, 0 +; CHECK-NEXT:    xvld $xr1, $a2, 0 +; CHECK-NEXT:    xvor.v $xr2, $xr0, $xr1 +; CHECK-NEXT:    xvxor.v $xr0, $xr0, $xr1 +; CHECK-NEXT:    xvsrai.d $xr0, $xr0, 1 +; CHECK-NEXT:    xvsub.d $xr0, $xr2, $xr0 +; CHECK-NEXT:    xvst $xr0, $a0, 0 +; CHECK-NEXT:    ret +entry: +  %va = load <4 x i64>, ptr %a +  %vb = load <4 x i64>, ptr %b +  %ea = sext <4 x i64> %va to <4 x i128> +  %eb = sext <4 x i64> %vb to <4 x i128> +  %add = add <4 x i128> %ea, %eb +  %add1 = add <4 x i128> %add, <i128 1, i128 1, i128 1, i128 1> +  %shr = lshr <4 x i128> %add1, <i128 1, i128 1, i128 1, i128 1> +  %r = trunc <4 x i128> %shr to <4 x i64> +  store <4 x i64> %r, ptr %res +  ret void +} + +define void @xvavgr_bu(ptr %res, ptr %a, ptr %b) nounwind { +; CHECK-LABEL: xvavgr_bu: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    xvld $xr0, $a1, 0 +; CHECK-NEXT:    xvld $xr1, $a2, 0 +; CHECK-NEXT:    xvor.v $xr2, $xr0, $xr1 +; CHECK-NEXT:    xvxor.v $xr0, $xr0, $xr1 +; CHECK-NEXT:    xvsrli.b $xr0, $xr0, 1 +; CHECK-NEXT:    xvsub.b $xr0, $xr2, $xr0 +; CHECK-NEXT:    xvst $xr0, $a0, 0 +; CHECK-NEXT:    ret +entry: +  %va = load <32 x i8>, ptr %a +  %vb = load <32 x i8>, ptr %b +  %ea = zext <32 x i8> %va to <32 x i16> +  %eb = zext <32 x i8> %vb to <32 x i16> +  %add = add <32 x i16> %ea, %eb +  %add1 = add <32 x i16> %add, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1> +  %shr = lshr <32 x i16> %add1, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1> +  %r = trunc <32 x i16> %shr to <32 x i8> +  store <32 x i8> %r, ptr %res +  ret void +} + +define void @xvavgr_hu(ptr %res, ptr %a, ptr %b) nounwind { +; CHECK-LABEL: xvavgr_hu: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    xvld $xr0, $a1, 0 +; CHECK-NEXT:    xvld $xr1, $a2, 0 +; CHECK-NEXT:    xvor.v $xr2, $xr0, $xr1 +; CHECK-NEXT:    xvxor.v $xr0, $xr0, $xr1 +; CHECK-NEXT:    xvsrli.h $xr0, $xr0, 1 +; CHECK-NEXT:    xvsub.h $xr0, $xr2, $xr0 +; CHECK-NEXT:    xvst $xr0, $a0, 0 +; CHECK-NEXT:    ret +entry: +  %va = load <16 x i16>, ptr %a +  %vb = load <16 x i16>, ptr %b +  %ea = zext <16 x i16> %va to <16 x i32> +  %eb = zext <16 x i16> %vb to <16 x i32> +  %add = add <16 x i32> %ea, %eb +  %add1 = add <16 x i32> %add, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1> +  %shr = lshr <16 x i32> %add1, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1> +  %r = trunc <16 x i32> %shr to <16 x i16> +  store <16 x i16> %r, ptr %res +  ret void +} + +define void @xvavgr_wu(ptr %res, ptr %a, ptr %b) nounwind { +; CHECK-LABEL: xvavgr_wu: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    xvld $xr0, $a1, 0 +; CHECK-NEXT:    xvld $xr1, $a2, 0 +; CHECK-NEXT:    xvor.v $xr2, $xr0, $xr1 +; CHECK-NEXT:    xvxor.v $xr0, $xr0, $xr1 +; CHECK-NEXT:    xvsrli.w $xr0, $xr0, 1 +; CHECK-NEXT:    xvsub.w $xr0, $xr2, $xr0 +; CHECK-NEXT:    xvst $xr0, $a0, 0 +; CHECK-NEXT:    ret +entry: +  %va = load <8 x i32>, ptr %a +  %vb = load <8 x i32>, ptr %b +  %ea = zext <8 x i32> %va to <8 x i64> +  %eb = zext <8 x i32> %vb to <8 x i64> +  %add = add <8 x i64> %ea, %eb +  %add1 = add <8 x i64> %add, <i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1> +  %shr = lshr <8 x i64> %add1, <i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1> +  %r = trunc <8 x i64> %shr to <8 x i32> +  store <8 x i32> %r, ptr %res +  ret void +} + +define void @xvavgr_du(ptr %res, ptr %a, ptr %b) nounwind { +; CHECK-LABEL: xvavgr_du: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    xvld $xr0, $a1, 0 +; CHECK-NEXT:    xvld $xr1, $a2, 0 +; CHECK-NEXT:    xvor.v $xr2, $xr0, $xr1 +; CHECK-NEXT:    xvxor.v $xr0, $xr0, $xr1 +; CHECK-NEXT:    xvsrli.d $xr0, $xr0, 1 +; CHECK-NEXT:    xvsub.d $xr0, $xr2, $xr0 +; CHECK-NEXT:    xvst $xr0, $a0, 0 +; CHECK-NEXT:    ret +entry: +  %va = load <4 x i64>, ptr %a +  %vb = load <4 x i64>, ptr %b +  %ea = zext <4 x i64> %va to <4 x i128> +  %eb = zext <4 x i64> %vb to <4 x i128> +  %add = add <4 x i128> %ea, %eb +  %add1 = add <4 x i128> %add, <i128 1, i128 1, i128 1, i128 1> +  %shr = lshr <4 x i128> %add1, <i128 1, i128 1, i128 1, i128 1> +  %r = trunc <4 x i128> %shr to <4 x i64> +  store <4 x i64> %r, ptr %res +  ret void +} diff --git a/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/flog2.ll b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/flog2.ll index 68f2e3a..6b5f575 100644 --- a/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/flog2.ll +++ b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/flog2.ll @@ -1,166 +1,17 @@  ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6 -; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lasx < %s | FileCheck %s --check-prefix=LA32 -; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s --check-prefix=LA64 +; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lasx < %s | FileCheck %s +; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s  declare <8 x float> @llvm.log2.v8f32(<8 x float>)  declare <4 x double> @llvm.log2.v4f64(<4 x double>)  define void @flog2_v8f32(ptr %res, ptr %a) nounwind { -; LA32-LABEL: flog2_v8f32: -; LA32:       # %bb.0: # %entry -; LA32-NEXT:    addi.w $sp, $sp, -128 -; LA32-NEXT:    st.w $ra, $sp, 124 # 4-byte Folded Spill -; LA32-NEXT:    st.w $fp, $sp, 120 # 4-byte Folded Spill -; LA32-NEXT:    xvld $xr0, $a1, 0 -; LA32-NEXT:    xvst $xr0, $sp, 80 # 32-byte Folded Spill -; LA32-NEXT:    move $fp, $a0 -; LA32-NEXT:    xvpickve.w $xr0, $xr0, 5 -; LA32-NEXT:    # kill: def $f0 killed $f0 killed $xr0 -; LA32-NEXT:    bl log2f -; LA32-NEXT:    # kill: def $f0 killed $f0 def $vr0 -; LA32-NEXT:    vst $vr0, $sp, 48 # 16-byte Folded Spill -; LA32-NEXT:    xvld $xr0, $sp, 80 # 32-byte Folded Reload -; LA32-NEXT:    xvpickve.w $xr0, $xr0, 4 -; LA32-NEXT:    # kill: def $f0 killed $f0 killed $xr0 -; LA32-NEXT:    bl log2f -; LA32-NEXT:    # kill: def $f0 killed $f0 def $xr0 -; LA32-NEXT:    vld $vr1, $sp, 48 # 16-byte Folded Reload -; LA32-NEXT:    vextrins.w $vr0, $vr1, 16 -; LA32-NEXT:    xvst $xr0, $sp, 48 # 32-byte Folded Spill -; LA32-NEXT:    xvld $xr0, $sp, 80 # 32-byte Folded Reload -; LA32-NEXT:    xvpickve.w $xr0, $xr0, 6 -; LA32-NEXT:    # kill: def $f0 killed $f0 killed $xr0 -; LA32-NEXT:    bl log2f -; LA32-NEXT:    # kill: def $f0 killed $f0 def $vr0 -; LA32-NEXT:    xvld $xr1, $sp, 48 # 32-byte Folded Reload -; LA32-NEXT:    vextrins.w $vr1, $vr0, 32 -; LA32-NEXT:    xvst $xr1, $sp, 48 # 32-byte Folded Spill -; LA32-NEXT:    xvld $xr0, $sp, 80 # 32-byte Folded Reload -; LA32-NEXT:    xvpickve.w $xr0, $xr0, 7 -; LA32-NEXT:    # kill: def $f0 killed $f0 killed $xr0 -; LA32-NEXT:    bl log2f -; LA32-NEXT:    # kill: def $f0 killed $f0 def $vr0 -; LA32-NEXT:    xvld $xr1, $sp, 48 # 32-byte Folded Reload -; LA32-NEXT:    vextrins.w $vr1, $vr0, 48 -; LA32-NEXT:    xvst $xr1, $sp, 48 # 32-byte Folded Spill -; LA32-NEXT:    xvld $xr0, $sp, 80 # 32-byte Folded Reload -; LA32-NEXT:    xvpickve.w $xr0, $xr0, 1 -; LA32-NEXT:    # kill: def $f0 killed $f0 killed $xr0 -; LA32-NEXT:    bl log2f -; LA32-NEXT:    # kill: def $f0 killed $f0 def $vr0 -; LA32-NEXT:    vst $vr0, $sp, 16 # 16-byte Folded Spill -; LA32-NEXT:    xvld $xr0, $sp, 80 # 32-byte Folded Reload -; LA32-NEXT:    xvpickve.w $xr0, $xr0, 0 -; LA32-NEXT:    # kill: def $f0 killed $f0 killed $xr0 -; LA32-NEXT:    bl log2f -; LA32-NEXT:    # kill: def $f0 killed $f0 def $xr0 -; LA32-NEXT:    vld $vr1, $sp, 16 # 16-byte Folded Reload -; LA32-NEXT:    vextrins.w $vr0, $vr1, 16 -; LA32-NEXT:    xvst $xr0, $sp, 16 # 32-byte Folded Spill -; LA32-NEXT:    xvld $xr0, $sp, 80 # 32-byte Folded Reload -; LA32-NEXT:    xvpickve.w $xr0, $xr0, 2 -; LA32-NEXT:    # kill: def $f0 killed $f0 killed $xr0 -; LA32-NEXT:    bl log2f -; LA32-NEXT:    # kill: def $f0 killed $f0 def $vr0 -; LA32-NEXT:    xvld $xr1, $sp, 16 # 32-byte Folded Reload -; LA32-NEXT:    vextrins.w $vr1, $vr0, 32 -; LA32-NEXT:    xvst $xr1, $sp, 16 # 32-byte Folded Spill -; LA32-NEXT:    xvld $xr0, $sp, 80 # 32-byte Folded Reload -; LA32-NEXT:    xvpickve.w $xr0, $xr0, 3 -; LA32-NEXT:    # kill: def $f0 killed $f0 killed $xr0 -; LA32-NEXT:    bl log2f -; LA32-NEXT:    # kill: def $f0 killed $f0 def $vr0 -; LA32-NEXT:    xvld $xr1, $sp, 16 # 32-byte Folded Reload -; LA32-NEXT:    vextrins.w $vr1, $vr0, 48 -; LA32-NEXT:    xvld $xr0, $sp, 48 # 32-byte Folded Reload -; LA32-NEXT:    xvpermi.q $xr1, $xr0, 2 -; LA32-NEXT:    xvst $xr1, $fp, 0 -; LA32-NEXT:    ld.w $fp, $sp, 120 # 4-byte Folded Reload -; LA32-NEXT:    ld.w $ra, $sp, 124 # 4-byte Folded Reload -; LA32-NEXT:    addi.w $sp, $sp, 128 -; LA32-NEXT:    ret -; -; LA64-LABEL: flog2_v8f32: -; LA64:       # %bb.0: # %entry -; LA64-NEXT:    addi.d $sp, $sp, -128 -; LA64-NEXT:    st.d $ra, $sp, 120 # 8-byte Folded Spill -; LA64-NEXT:    st.d $fp, $sp, 112 # 8-byte Folded Spill -; LA64-NEXT:    xvld $xr0, $a1, 0 -; LA64-NEXT:    xvst $xr0, $sp, 80 # 32-byte Folded Spill -; LA64-NEXT:    move $fp, $a0 -; LA64-NEXT:    xvpickve.w $xr0, $xr0, 5 -; LA64-NEXT:    # kill: def $f0 killed $f0 killed $xr0 -; LA64-NEXT:    pcaddu18i $ra, %call36(log2f) -; LA64-NEXT:    jirl $ra, $ra, 0 -; LA64-NEXT:    # kill: def $f0 killed $f0 def $vr0 -; LA64-NEXT:    vst $vr0, $sp, 48 # 16-byte Folded Spill -; LA64-NEXT:    xvld $xr0, $sp, 80 # 32-byte Folded Reload -; LA64-NEXT:    xvpickve.w $xr0, $xr0, 4 -; LA64-NEXT:    # kill: def $f0 killed $f0 killed $xr0 -; LA64-NEXT:    pcaddu18i $ra, %call36(log2f) -; LA64-NEXT:    jirl $ra, $ra, 0 -; LA64-NEXT:    # kill: def $f0 killed $f0 def $xr0 -; LA64-NEXT:    vld $vr1, $sp, 48 # 16-byte Folded Reload -; LA64-NEXT:    vextrins.w $vr0, $vr1, 16 -; LA64-NEXT:    xvst $xr0, $sp, 48 # 32-byte Folded Spill -; LA64-NEXT:    xvld $xr0, $sp, 80 # 32-byte Folded Reload -; LA64-NEXT:    xvpickve.w $xr0, $xr0, 6 -; LA64-NEXT:    # kill: def $f0 killed $f0 killed $xr0 -; LA64-NEXT:    pcaddu18i $ra, %call36(log2f) -; LA64-NEXT:    jirl $ra, $ra, 0 -; LA64-NEXT:    # kill: def $f0 killed $f0 def $vr0 -; LA64-NEXT:    xvld $xr1, $sp, 48 # 32-byte Folded Reload -; LA64-NEXT:    vextrins.w $vr1, $vr0, 32 -; LA64-NEXT:    xvst $xr1, $sp, 48 # 32-byte Folded Spill -; LA64-NEXT:    xvld $xr0, $sp, 80 # 32-byte Folded Reload -; LA64-NEXT:    xvpickve.w $xr0, $xr0, 7 -; LA64-NEXT:    # kill: def $f0 killed $f0 killed $xr0 -; LA64-NEXT:    pcaddu18i $ra, %call36(log2f) -; LA64-NEXT:    jirl $ra, $ra, 0 -; LA64-NEXT:    # kill: def $f0 killed $f0 def $vr0 -; LA64-NEXT:    xvld $xr1, $sp, 48 # 32-byte Folded Reload -; LA64-NEXT:    vextrins.w $vr1, $vr0, 48 -; LA64-NEXT:    xvst $xr1, $sp, 48 # 32-byte Folded Spill -; LA64-NEXT:    xvld $xr0, $sp, 80 # 32-byte Folded Reload -; LA64-NEXT:    xvpickve.w $xr0, $xr0, 1 -; LA64-NEXT:    # kill: def $f0 killed $f0 killed $xr0 -; LA64-NEXT:    pcaddu18i $ra, %call36(log2f) -; LA64-NEXT:    jirl $ra, $ra, 0 -; LA64-NEXT:    # kill: def $f0 killed $f0 def $vr0 -; LA64-NEXT:    vst $vr0, $sp, 16 # 16-byte Folded Spill -; LA64-NEXT:    xvld $xr0, $sp, 80 # 32-byte Folded Reload -; LA64-NEXT:    xvpickve.w $xr0, $xr0, 0 -; LA64-NEXT:    # kill: def $f0 killed $f0 killed $xr0 -; LA64-NEXT:    pcaddu18i $ra, %call36(log2f) -; LA64-NEXT:    jirl $ra, $ra, 0 -; LA64-NEXT:    # kill: def $f0 killed $f0 def $xr0 -; LA64-NEXT:    vld $vr1, $sp, 16 # 16-byte Folded Reload -; LA64-NEXT:    vextrins.w $vr0, $vr1, 16 -; LA64-NEXT:    xvst $xr0, $sp, 16 # 32-byte Folded Spill -; LA64-NEXT:    xvld $xr0, $sp, 80 # 32-byte Folded Reload -; LA64-NEXT:    xvpickve.w $xr0, $xr0, 2 -; LA64-NEXT:    # kill: def $f0 killed $f0 killed $xr0 -; LA64-NEXT:    pcaddu18i $ra, %call36(log2f) -; LA64-NEXT:    jirl $ra, $ra, 0 -; LA64-NEXT:    # kill: def $f0 killed $f0 def $vr0 -; LA64-NEXT:    xvld $xr1, $sp, 16 # 32-byte Folded Reload -; LA64-NEXT:    vextrins.w $vr1, $vr0, 32 -; LA64-NEXT:    xvst $xr1, $sp, 16 # 32-byte Folded Spill -; LA64-NEXT:    xvld $xr0, $sp, 80 # 32-byte Folded Reload -; LA64-NEXT:    xvpickve.w $xr0, $xr0, 3 -; LA64-NEXT:    # kill: def $f0 killed $f0 killed $xr0 -; LA64-NEXT:    pcaddu18i $ra, %call36(log2f) -; LA64-NEXT:    jirl $ra, $ra, 0 -; LA64-NEXT:    # kill: def $f0 killed $f0 def $vr0 -; LA64-NEXT:    xvld $xr1, $sp, 16 # 32-byte Folded Reload -; LA64-NEXT:    vextrins.w $vr1, $vr0, 48 -; LA64-NEXT:    xvld $xr0, $sp, 48 # 32-byte Folded Reload -; LA64-NEXT:    xvpermi.q $xr1, $xr0, 2 -; LA64-NEXT:    xvst $xr1, $fp, 0 -; LA64-NEXT:    ld.d $fp, $sp, 112 # 8-byte Folded Reload -; LA64-NEXT:    ld.d $ra, $sp, 120 # 8-byte Folded Reload -; LA64-NEXT:    addi.d $sp, $sp, 128 -; LA64-NEXT:    ret +; CHECK-LABEL: flog2_v8f32: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    xvld $xr0, $a1, 0 +; CHECK-NEXT:    xvflogb.s $xr0, $xr0 +; CHECK-NEXT:    xvst $xr0, $a0, 0 +; CHECK-NEXT:    ret  entry:    %v = load <8 x float>, ptr %a    %r = call <8 x float> @llvm.log2.v8f32(<8 x float> %v) @@ -169,93 +20,12 @@ entry:  }  define void @flog2_v4f64(ptr %res, ptr %a) nounwind { -; LA32-LABEL: flog2_v4f64: -; LA32:       # %bb.0: # %entry -; LA32-NEXT:    addi.w $sp, $sp, -112 -; LA32-NEXT:    st.w $ra, $sp, 108 # 4-byte Folded Spill -; LA32-NEXT:    st.w $fp, $sp, 104 # 4-byte Folded Spill -; LA32-NEXT:    xvld $xr0, $a1, 0 -; LA32-NEXT:    xvst $xr0, $sp, 64 # 32-byte Folded Spill -; LA32-NEXT:    move $fp, $a0 -; LA32-NEXT:    xvpickve.d $xr0, $xr0, 3 -; LA32-NEXT:    # kill: def $f0_64 killed $f0_64 killed $xr0 -; LA32-NEXT:    bl log2 -; LA32-NEXT:    # kill: def $f0_64 killed $f0_64 def $vr0 -; LA32-NEXT:    vst $vr0, $sp, 32 # 16-byte Folded Spill -; LA32-NEXT:    xvld $xr0, $sp, 64 # 32-byte Folded Reload -; LA32-NEXT:    xvpickve.d $xr0, $xr0, 2 -; LA32-NEXT:    # kill: def $f0_64 killed $f0_64 killed $xr0 -; LA32-NEXT:    bl log2 -; LA32-NEXT:    # kill: def $f0_64 killed $f0_64 def $xr0 -; LA32-NEXT:    vld $vr1, $sp, 32 # 16-byte Folded Reload -; LA32-NEXT:    vextrins.d $vr0, $vr1, 16 -; LA32-NEXT:    xvst $xr0, $sp, 32 # 32-byte Folded Spill -; LA32-NEXT:    xvld $xr0, $sp, 64 # 32-byte Folded Reload -; LA32-NEXT:    xvpickve.d $xr0, $xr0, 1 -; LA32-NEXT:    # kill: def $f0_64 killed $f0_64 killed $xr0 -; LA32-NEXT:    bl log2 -; LA32-NEXT:    # kill: def $f0_64 killed $f0_64 def $vr0 -; LA32-NEXT:    vst $vr0, $sp, 16 # 16-byte Folded Spill -; LA32-NEXT:    xvld $xr0, $sp, 64 # 32-byte Folded Reload -; LA32-NEXT:    xvpickve.d $xr0, $xr0, 0 -; LA32-NEXT:    # kill: def $f0_64 killed $f0_64 killed $xr0 -; LA32-NEXT:    bl log2 -; LA32-NEXT:    # kill: def $f0_64 killed $f0_64 def $xr0 -; LA32-NEXT:    vld $vr1, $sp, 16 # 16-byte Folded Reload -; LA32-NEXT:    vextrins.d $vr0, $vr1, 16 -; LA32-NEXT:    xvld $xr1, $sp, 32 # 32-byte Folded Reload -; LA32-NEXT:    xvpermi.q $xr0, $xr1, 2 -; LA32-NEXT:    xvst $xr0, $fp, 0 -; LA32-NEXT:    ld.w $fp, $sp, 104 # 4-byte Folded Reload -; LA32-NEXT:    ld.w $ra, $sp, 108 # 4-byte Folded Reload -; LA32-NEXT:    addi.w $sp, $sp, 112 -; LA32-NEXT:    ret -; -; LA64-LABEL: flog2_v4f64: -; LA64:       # %bb.0: # %entry -; LA64-NEXT:    addi.d $sp, $sp, -112 -; LA64-NEXT:    st.d $ra, $sp, 104 # 8-byte Folded Spill -; LA64-NEXT:    st.d $fp, $sp, 96 # 8-byte Folded Spill -; LA64-NEXT:    xvld $xr0, $a1, 0 -; LA64-NEXT:    xvst $xr0, $sp, 64 # 32-byte Folded Spill -; LA64-NEXT:    move $fp, $a0 -; LA64-NEXT:    xvpickve.d $xr0, $xr0, 3 -; LA64-NEXT:    # kill: def $f0_64 killed $f0_64 killed $xr0 -; LA64-NEXT:    pcaddu18i $ra, %call36(log2) -; LA64-NEXT:    jirl $ra, $ra, 0 -; LA64-NEXT:    # kill: def $f0_64 killed $f0_64 def $vr0 -; LA64-NEXT:    vst $vr0, $sp, 32 # 16-byte Folded Spill -; LA64-NEXT:    xvld $xr0, $sp, 64 # 32-byte Folded Reload -; LA64-NEXT:    xvpickve.d $xr0, $xr0, 2 -; LA64-NEXT:    # kill: def $f0_64 killed $f0_64 killed $xr0 -; LA64-NEXT:    pcaddu18i $ra, %call36(log2) -; LA64-NEXT:    jirl $ra, $ra, 0 -; LA64-NEXT:    # kill: def $f0_64 killed $f0_64 def $xr0 -; LA64-NEXT:    vld $vr1, $sp, 32 # 16-byte Folded Reload -; LA64-NEXT:    vextrins.d $vr0, $vr1, 16 -; LA64-NEXT:    xvst $xr0, $sp, 32 # 32-byte Folded Spill -; LA64-NEXT:    xvld $xr0, $sp, 64 # 32-byte Folded Reload -; LA64-NEXT:    xvpickve.d $xr0, $xr0, 1 -; LA64-NEXT:    # kill: def $f0_64 killed $f0_64 killed $xr0 -; LA64-NEXT:    pcaddu18i $ra, %call36(log2) -; LA64-NEXT:    jirl $ra, $ra, 0 -; LA64-NEXT:    # kill: def $f0_64 killed $f0_64 def $vr0 -; LA64-NEXT:    vst $vr0, $sp, 16 # 16-byte Folded Spill -; LA64-NEXT:    xvld $xr0, $sp, 64 # 32-byte Folded Reload -; LA64-NEXT:    xvpickve.d $xr0, $xr0, 0 -; LA64-NEXT:    # kill: def $f0_64 killed $f0_64 killed $xr0 -; LA64-NEXT:    pcaddu18i $ra, %call36(log2) -; LA64-NEXT:    jirl $ra, $ra, 0 -; LA64-NEXT:    # kill: def $f0_64 killed $f0_64 def $xr0 -; LA64-NEXT:    vld $vr1, $sp, 16 # 16-byte Folded Reload -; LA64-NEXT:    vextrins.d $vr0, $vr1, 16 -; LA64-NEXT:    xvld $xr1, $sp, 32 # 32-byte Folded Reload -; LA64-NEXT:    xvpermi.q $xr0, $xr1, 2 -; LA64-NEXT:    xvst $xr0, $fp, 0 -; LA64-NEXT:    ld.d $fp, $sp, 96 # 8-byte Folded Reload -; LA64-NEXT:    ld.d $ra, $sp, 104 # 8-byte Folded Reload -; LA64-NEXT:    addi.d $sp, $sp, 112 -; LA64-NEXT:    ret +; CHECK-LABEL: flog2_v4f64: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    xvld $xr0, $a1, 0 +; CHECK-NEXT:    xvflogb.d $xr0, $xr0 +; CHECK-NEXT:    xvst $xr0, $a0, 0 +; CHECK-NEXT:    ret  entry:    %v = load <4 x double>, ptr %a    %r = call <4 x double> @llvm.log2.v4f64(<4 x double> %v) diff --git a/llvm/test/CodeGen/LoongArch/lsx/fp-rounding.ll b/llvm/test/CodeGen/LoongArch/lsx/fp-rounding.ll new file mode 100644 index 0000000..1ca6290 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lsx/fp-rounding.ll @@ -0,0 +1,212 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6 +; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lsx < %s | FileCheck %s +; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s + +;; ceilf +define void @ceil_v4f32(ptr %res, ptr %a0) nounwind { +; CHECK-LABEL: ceil_v4f32: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vld $vr0, $a1, 0 +; CHECK-NEXT:    vreplvei.w $vr1, $vr0, 1 +; CHECK-NEXT:    vreplvei.w $vr1, $vr1, 0 +; CHECK-NEXT:    vfrintrp.s $vr1, $vr1 +; CHECK-NEXT:    vreplvei.w $vr2, $vr0, 0 +; CHECK-NEXT:    vreplvei.w $vr2, $vr2, 0 +; CHECK-NEXT:    vfrintrp.s $vr2, $vr2 +; CHECK-NEXT:    vextrins.w $vr2, $vr1, 16 +; CHECK-NEXT:    vreplvei.w $vr1, $vr0, 2 +; CHECK-NEXT:    vreplvei.w $vr1, $vr1, 0 +; CHECK-NEXT:    vfrintrp.s $vr1, $vr1 +; CHECK-NEXT:    vextrins.w $vr2, $vr1, 32 +; CHECK-NEXT:    vreplvei.w $vr0, $vr0, 3 +; CHECK-NEXT:    vreplvei.w $vr0, $vr0, 0 +; CHECK-NEXT:    vfrintrp.s $vr0, $vr0 +; CHECK-NEXT:    vextrins.w $vr2, $vr0, 48 +; CHECK-NEXT:    vst $vr2, $a0, 0 +; CHECK-NEXT:    ret +entry: +  %v0 = load <4 x float>, ptr %a0 +  %r = call <4 x float> @llvm.ceil.v4f32(<4 x float> %v0) +  store <4 x float> %r, ptr %res +  ret void +} + +;; ceil +define void @ceil_v2f64(ptr %res, ptr %a0) nounwind { +; CHECK-LABEL: ceil_v2f64: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vld $vr0, $a1, 0 +; CHECK-NEXT:    vreplvei.d $vr1, $vr0, 1 +; CHECK-NEXT:    vreplvei.d $vr1, $vr1, 0 +; CHECK-NEXT:    vfrintrp.d $vr1, $vr1 +; CHECK-NEXT:    vreplvei.d $vr0, $vr0, 0 +; CHECK-NEXT:    vreplvei.d $vr0, $vr0, 0 +; CHECK-NEXT:    vfrintrp.d $vr0, $vr0 +; CHECK-NEXT:    vextrins.d $vr0, $vr1, 16 +; CHECK-NEXT:    vst $vr0, $a0, 0 +; CHECK-NEXT:    ret +entry: +  %v0 = load <2 x double>, ptr %a0 +  %r = call <2 x double> @llvm.ceil.v2f64(<2 x double> %v0) +  store <2 x double> %r, ptr %res +  ret void +} + +;; floorf +define void @floor_v4f32(ptr %res, ptr %a0) nounwind { +; CHECK-LABEL: floor_v4f32: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vld $vr0, $a1, 0 +; CHECK-NEXT:    vreplvei.w $vr1, $vr0, 1 +; CHECK-NEXT:    vreplvei.w $vr1, $vr1, 0 +; CHECK-NEXT:    vfrintrm.s $vr1, $vr1 +; CHECK-NEXT:    vreplvei.w $vr2, $vr0, 0 +; CHECK-NEXT:    vreplvei.w $vr2, $vr2, 0 +; CHECK-NEXT:    vfrintrm.s $vr2, $vr2 +; CHECK-NEXT:    vextrins.w $vr2, $vr1, 16 +; CHECK-NEXT:    vreplvei.w $vr1, $vr0, 2 +; CHECK-NEXT:    vreplvei.w $vr1, $vr1, 0 +; CHECK-NEXT:    vfrintrm.s $vr1, $vr1 +; CHECK-NEXT:    vextrins.w $vr2, $vr1, 32 +; CHECK-NEXT:    vreplvei.w $vr0, $vr0, 3 +; CHECK-NEXT:    vreplvei.w $vr0, $vr0, 0 +; CHECK-NEXT:    vfrintrm.s $vr0, $vr0 +; CHECK-NEXT:    vextrins.w $vr2, $vr0, 48 +; CHECK-NEXT:    vst $vr2, $a0, 0 +; CHECK-NEXT:    ret +entry: +  %v0 = load <4 x float>, ptr %a0 +  %r = call <4 x float> @llvm.floor.v4f32(<4 x float> %v0) +  store <4 x float> %r, ptr %res +  ret void +} + +;; floor +define void @floor_v2f64(ptr %res, ptr %a0) nounwind { +; CHECK-LABEL: floor_v2f64: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vld $vr0, $a1, 0 +; CHECK-NEXT:    vreplvei.d $vr1, $vr0, 1 +; CHECK-NEXT:    vreplvei.d $vr1, $vr1, 0 +; CHECK-NEXT:    vfrintrm.d $vr1, $vr1 +; CHECK-NEXT:    vreplvei.d $vr0, $vr0, 0 +; CHECK-NEXT:    vreplvei.d $vr0, $vr0, 0 +; CHECK-NEXT:    vfrintrm.d $vr0, $vr0 +; CHECK-NEXT:    vextrins.d $vr0, $vr1, 16 +; CHECK-NEXT:    vst $vr0, $a0, 0 +; CHECK-NEXT:    ret +entry: +  %v0 = load <2 x double>, ptr %a0 +  %r = call <2 x double> @llvm.floor.v2f64(<2 x double> %v0) +  store <2 x double> %r, ptr %res +  ret void +} + +;; truncf +define void @trunc_v4f32(ptr %res, ptr %a0) nounwind { +; CHECK-LABEL: trunc_v4f32: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vld $vr0, $a1, 0 +; CHECK-NEXT:    vreplvei.w $vr1, $vr0, 1 +; CHECK-NEXT:    vreplvei.w $vr1, $vr1, 0 +; CHECK-NEXT:    vfrintrz.s $vr1, $vr1 +; CHECK-NEXT:    vreplvei.w $vr2, $vr0, 0 +; CHECK-NEXT:    vreplvei.w $vr2, $vr2, 0 +; CHECK-NEXT:    vfrintrz.s $vr2, $vr2 +; CHECK-NEXT:    vextrins.w $vr2, $vr1, 16 +; CHECK-NEXT:    vreplvei.w $vr1, $vr0, 2 +; CHECK-NEXT:    vreplvei.w $vr1, $vr1, 0 +; CHECK-NEXT:    vfrintrz.s $vr1, $vr1 +; CHECK-NEXT:    vextrins.w $vr2, $vr1, 32 +; CHECK-NEXT:    vreplvei.w $vr0, $vr0, 3 +; CHECK-NEXT:    vreplvei.w $vr0, $vr0, 0 +; CHECK-NEXT:    vfrintrz.s $vr0, $vr0 +; CHECK-NEXT:    vextrins.w $vr2, $vr0, 48 +; CHECK-NEXT:    vst $vr2, $a0, 0 +; CHECK-NEXT:    ret +entry: +  %v0 = load <4 x float>, ptr %a0 +  %r = call <4 x float> @llvm.trunc.v4f32(<4 x float> %v0) +  store <4 x float> %r, ptr %res +  ret void +} + +;; trunc +define void @trunc_v2f64(ptr %res, ptr %a0) nounwind { +; CHECK-LABEL: trunc_v2f64: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vld $vr0, $a1, 0 +; CHECK-NEXT:    vreplvei.d $vr1, $vr0, 1 +; CHECK-NEXT:    vreplvei.d $vr1, $vr1, 0 +; CHECK-NEXT:    vfrintrz.d $vr1, $vr1 +; CHECK-NEXT:    vreplvei.d $vr0, $vr0, 0 +; CHECK-NEXT:    vreplvei.d $vr0, $vr0, 0 +; CHECK-NEXT:    vfrintrz.d $vr0, $vr0 +; CHECK-NEXT:    vextrins.d $vr0, $vr1, 16 +; CHECK-NEXT:    vst $vr0, $a0, 0 +; CHECK-NEXT:    ret +entry: +  %v0 = load <2 x double>, ptr %a0 +  %r = call <2 x double> @llvm.trunc.v2f64(<2 x double> %v0) +  store <2 x double> %r, ptr %res +  ret void +} + +;; roundevenf +define void @roundeven_v4f32(ptr %res, ptr %a0) nounwind { +; CHECK-LABEL: roundeven_v4f32: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vld $vr0, $a1, 0 +; CHECK-NEXT:    vreplvei.w $vr1, $vr0, 1 +; CHECK-NEXT:    vreplvei.w $vr1, $vr1, 0 +; CHECK-NEXT:    vfrintrne.s $vr1, $vr1 +; CHECK-NEXT:    vreplvei.w $vr2, $vr0, 0 +; CHECK-NEXT:    vreplvei.w $vr2, $vr2, 0 +; CHECK-NEXT:    vfrintrne.s $vr2, $vr2 +; CHECK-NEXT:    vextrins.w $vr2, $vr1, 16 +; CHECK-NEXT:    vreplvei.w $vr1, $vr0, 2 +; CHECK-NEXT:    vreplvei.w $vr1, $vr1, 0 +; CHECK-NEXT:    vfrintrne.s $vr1, $vr1 +; CHECK-NEXT:    vextrins.w $vr2, $vr1, 32 +; CHECK-NEXT:    vreplvei.w $vr0, $vr0, 3 +; CHECK-NEXT:    vreplvei.w $vr0, $vr0, 0 +; CHECK-NEXT:    vfrintrne.s $vr0, $vr0 +; CHECK-NEXT:    vextrins.w $vr2, $vr0, 48 +; CHECK-NEXT:    vst $vr2, $a0, 0 +; CHECK-NEXT:    ret +entry: +  %v0 = load <4 x float>, ptr %a0 +  %r = call <4 x float> @llvm.roundeven.v4f32(<4 x float> %v0) +  store <4 x float> %r, ptr %res +  ret void +} + +;; roundeven +define void @roundeven_v2f64(ptr %res, ptr %a0) nounwind { +; CHECK-LABEL: roundeven_v2f64: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vld $vr0, $a1, 0 +; CHECK-NEXT:    vreplvei.d $vr1, $vr0, 1 +; CHECK-NEXT:    vreplvei.d $vr1, $vr1, 0 +; CHECK-NEXT:    vfrintrne.d $vr1, $vr1 +; CHECK-NEXT:    vreplvei.d $vr0, $vr0, 0 +; CHECK-NEXT:    vreplvei.d $vr0, $vr0, 0 +; CHECK-NEXT:    vfrintrne.d $vr0, $vr0 +; CHECK-NEXT:    vextrins.d $vr0, $vr1, 16 +; CHECK-NEXT:    vst $vr0, $a0, 0 +; CHECK-NEXT:    ret +entry: +  %v0 = load <2 x double>, ptr %a0 +  %r = call <2 x double> @llvm.roundeven.v2f64(<2 x double> %v0) +  store <2 x double> %r, ptr %res +  ret void +} + +declare <4 x float> @llvm.ceil.v4f32(<4 x float>) +declare <2 x double> @llvm.ceil.v2f64(<2 x double>) +declare <4 x float> @llvm.floor.v4f32(<4 x float>) +declare <2 x double> @llvm.floor.v2f64(<2 x double>) +declare <4 x float> @llvm.trunc.v4f32(<4 x float>) +declare <2 x double> @llvm.trunc.v2f64(<2 x double>) +declare <4 x float> @llvm.roundeven.v4f32(<4 x float>) +declare <2 x double> @llvm.roundeven.v2f64(<2 x double>) diff --git a/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/avg.ll b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/avg.ll index 20b88984..334af22 100644 --- a/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/avg.ll +++ b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/avg.ll @@ -1,14 +1,13 @@  ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6 -; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lsx < %s | FileCheck %s -; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s +; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lsx < %s | FileCheck %s --check-prefixes=CHECK,LA32 +; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s --check-prefixes=CHECK,LA64  define void @vavg_b(ptr %res, ptr %a, ptr %b) nounwind {  ; CHECK-LABEL: vavg_b:  ; CHECK:       # %bb.0: # %entry  ; CHECK-NEXT:    vld $vr0, $a1, 0  ; CHECK-NEXT:    vld $vr1, $a2, 0 -; CHECK-NEXT:    vadd.b $vr0, $vr0, $vr1 -; CHECK-NEXT:    vsrai.b $vr0, $vr0, 1 +; CHECK-NEXT:    vavg.b $vr0, $vr0, $vr1  ; CHECK-NEXT:    vst $vr0, $a0, 0  ; CHECK-NEXT:    ret  entry: @@ -25,8 +24,7 @@ define void @vavg_h(ptr %res, ptr %a, ptr %b) nounwind {  ; CHECK:       # %bb.0: # %entry  ; CHECK-NEXT:    vld $vr0, $a1, 0  ; CHECK-NEXT:    vld $vr1, $a2, 0 -; CHECK-NEXT:    vadd.h $vr0, $vr0, $vr1 -; CHECK-NEXT:    vsrai.h $vr0, $vr0, 1 +; CHECK-NEXT:    vavg.h $vr0, $vr0, $vr1  ; CHECK-NEXT:    vst $vr0, $a0, 0  ; CHECK-NEXT:    ret  entry: @@ -43,8 +41,7 @@ define void @vavg_w(ptr %res, ptr %a, ptr %b) nounwind {  ; CHECK:       # %bb.0: # %entry  ; CHECK-NEXT:    vld $vr0, $a1, 0  ; CHECK-NEXT:    vld $vr1, $a2, 0 -; CHECK-NEXT:    vadd.w $vr0, $vr0, $vr1 -; CHECK-NEXT:    vsrai.w $vr0, $vr0, 1 +; CHECK-NEXT:    vavg.w $vr0, $vr0, $vr1  ; CHECK-NEXT:    vst $vr0, $a0, 0  ; CHECK-NEXT:    ret  entry: @@ -57,14 +54,22 @@ entry:  }  define void @vavg_d(ptr %res, ptr %a, ptr %b) nounwind { -; CHECK-LABEL: vavg_d: -; CHECK:       # %bb.0: # %entry -; CHECK-NEXT:    vld $vr0, $a1, 0 -; CHECK-NEXT:    vld $vr1, $a2, 0 -; CHECK-NEXT:    vadd.d $vr0, $vr0, $vr1 -; CHECK-NEXT:    vsrai.d $vr0, $vr0, 1 -; CHECK-NEXT:    vst $vr0, $a0, 0 -; CHECK-NEXT:    ret +; LA32-LABEL: vavg_d: +; LA32:       # %bb.0: # %entry +; LA32-NEXT:    vld $vr0, $a1, 0 +; LA32-NEXT:    vld $vr1, $a2, 0 +; LA32-NEXT:    vadd.d $vr0, $vr0, $vr1 +; LA32-NEXT:    vsrai.d $vr0, $vr0, 1 +; LA32-NEXT:    vst $vr0, $a0, 0 +; LA32-NEXT:    ret +; +; LA64-LABEL: vavg_d: +; LA64:       # %bb.0: # %entry +; LA64-NEXT:    vld $vr0, $a1, 0 +; LA64-NEXT:    vld $vr1, $a2, 0 +; LA64-NEXT:    vavg.d $vr0, $vr0, $vr1 +; LA64-NEXT:    vst $vr0, $a0, 0 +; LA64-NEXT:    ret  entry:    %va = load <2 x i64>, ptr %a    %vb = load <2 x i64>, ptr %b @@ -79,8 +84,7 @@ define void @vavg_bu(ptr %res, ptr %a, ptr %b) nounwind {  ; CHECK:       # %bb.0: # %entry  ; CHECK-NEXT:    vld $vr0, $a1, 0  ; CHECK-NEXT:    vld $vr1, $a2, 0 -; CHECK-NEXT:    vadd.b $vr0, $vr0, $vr1 -; CHECK-NEXT:    vsrli.b $vr0, $vr0, 1 +; CHECK-NEXT:    vavg.bu $vr0, $vr0, $vr1  ; CHECK-NEXT:    vst $vr0, $a0, 0  ; CHECK-NEXT:    ret  entry: @@ -97,8 +101,7 @@ define void @vavg_hu(ptr %res, ptr %a, ptr %b) nounwind {  ; CHECK:       # %bb.0: # %entry  ; CHECK-NEXT:    vld $vr0, $a1, 0  ; CHECK-NEXT:    vld $vr1, $a2, 0 -; CHECK-NEXT:    vadd.h $vr0, $vr0, $vr1 -; CHECK-NEXT:    vsrli.h $vr0, $vr0, 1 +; CHECK-NEXT:    vavg.hu $vr0, $vr0, $vr1  ; CHECK-NEXT:    vst $vr0, $a0, 0  ; CHECK-NEXT:    ret  entry: @@ -115,8 +118,7 @@ define void @vavg_wu(ptr %res, ptr %a, ptr %b) nounwind {  ; CHECK:       # %bb.0: # %entry  ; CHECK-NEXT:    vld $vr0, $a1, 0  ; CHECK-NEXT:    vld $vr1, $a2, 0 -; CHECK-NEXT:    vadd.w $vr0, $vr0, $vr1 -; CHECK-NEXT:    vsrli.w $vr0, $vr0, 1 +; CHECK-NEXT:    vavg.wu $vr0, $vr0, $vr1  ; CHECK-NEXT:    vst $vr0, $a0, 0  ; CHECK-NEXT:    ret  entry: @@ -129,14 +131,22 @@ entry:  }  define void @vavg_du(ptr %res, ptr %a, ptr %b) nounwind { -; CHECK-LABEL: vavg_du: -; CHECK:       # %bb.0: # %entry -; CHECK-NEXT:    vld $vr0, $a1, 0 -; CHECK-NEXT:    vld $vr1, $a2, 0 -; CHECK-NEXT:    vadd.d $vr0, $vr0, $vr1 -; CHECK-NEXT:    vsrli.d $vr0, $vr0, 1 -; CHECK-NEXT:    vst $vr0, $a0, 0 -; CHECK-NEXT:    ret +; LA32-LABEL: vavg_du: +; LA32:       # %bb.0: # %entry +; LA32-NEXT:    vld $vr0, $a1, 0 +; LA32-NEXT:    vld $vr1, $a2, 0 +; LA32-NEXT:    vadd.d $vr0, $vr0, $vr1 +; LA32-NEXT:    vsrli.d $vr0, $vr0, 1 +; LA32-NEXT:    vst $vr0, $a0, 0 +; LA32-NEXT:    ret +; +; LA64-LABEL: vavg_du: +; LA64:       # %bb.0: # %entry +; LA64-NEXT:    vld $vr0, $a1, 0 +; LA64-NEXT:    vld $vr1, $a2, 0 +; LA64-NEXT:    vavg.du $vr0, $vr0, $vr1 +; LA64-NEXT:    vst $vr0, $a0, 0 +; LA64-NEXT:    ret  entry:    %va = load <2 x i64>, ptr %a    %vb = load <2 x i64>, ptr %b @@ -151,9 +161,7 @@ define void @vavgr_b(ptr %res, ptr %a, ptr %b) nounwind {  ; CHECK:       # %bb.0: # %entry  ; CHECK-NEXT:    vld $vr0, $a1, 0  ; CHECK-NEXT:    vld $vr1, $a2, 0 -; CHECK-NEXT:    vadd.b $vr0, $vr0, $vr1 -; CHECK-NEXT:    vaddi.bu $vr0, $vr0, 1 -; CHECK-NEXT:    vsrai.b $vr0, $vr0, 1 +; CHECK-NEXT:    vavgr.b $vr0, $vr0, $vr1  ; CHECK-NEXT:    vst $vr0, $a0, 0  ; CHECK-NEXT:    ret  entry: @@ -171,9 +179,7 @@ define void @vavgr_h(ptr %res, ptr %a, ptr %b) nounwind {  ; CHECK:       # %bb.0: # %entry  ; CHECK-NEXT:    vld $vr0, $a1, 0  ; CHECK-NEXT:    vld $vr1, $a2, 0 -; CHECK-NEXT:    vadd.h $vr0, $vr0, $vr1 -; CHECK-NEXT:    vaddi.hu $vr0, $vr0, 1 -; CHECK-NEXT:    vsrai.h $vr0, $vr0, 1 +; CHECK-NEXT:    vavgr.h $vr0, $vr0, $vr1  ; CHECK-NEXT:    vst $vr0, $a0, 0  ; CHECK-NEXT:    ret  entry: @@ -191,9 +197,7 @@ define void @vavgr_w(ptr %res, ptr %a, ptr %b) nounwind {  ; CHECK:       # %bb.0: # %entry  ; CHECK-NEXT:    vld $vr0, $a1, 0  ; CHECK-NEXT:    vld $vr1, $a2, 0 -; CHECK-NEXT:    vadd.w $vr0, $vr0, $vr1 -; CHECK-NEXT:    vaddi.wu $vr0, $vr0, 1 -; CHECK-NEXT:    vsrai.w $vr0, $vr0, 1 +; CHECK-NEXT:    vavgr.w $vr0, $vr0, $vr1  ; CHECK-NEXT:    vst $vr0, $a0, 0  ; CHECK-NEXT:    ret  entry: @@ -207,15 +211,23 @@ entry:  }  define void @vavgr_d(ptr %res, ptr %a, ptr %b) nounwind { -; CHECK-LABEL: vavgr_d: -; CHECK:       # %bb.0: # %entry -; CHECK-NEXT:    vld $vr0, $a1, 0 -; CHECK-NEXT:    vld $vr1, $a2, 0 -; CHECK-NEXT:    vadd.d $vr0, $vr0, $vr1 -; CHECK-NEXT:    vaddi.du $vr0, $vr0, 1 -; CHECK-NEXT:    vsrai.d $vr0, $vr0, 1 -; CHECK-NEXT:    vst $vr0, $a0, 0 -; CHECK-NEXT:    ret +; LA32-LABEL: vavgr_d: +; LA32:       # %bb.0: # %entry +; LA32-NEXT:    vld $vr0, $a1, 0 +; LA32-NEXT:    vld $vr1, $a2, 0 +; LA32-NEXT:    vadd.d $vr0, $vr0, $vr1 +; LA32-NEXT:    vaddi.du $vr0, $vr0, 1 +; LA32-NEXT:    vsrai.d $vr0, $vr0, 1 +; LA32-NEXT:    vst $vr0, $a0, 0 +; LA32-NEXT:    ret +; +; LA64-LABEL: vavgr_d: +; LA64:       # %bb.0: # %entry +; LA64-NEXT:    vld $vr0, $a1, 0 +; LA64-NEXT:    vld $vr1, $a2, 0 +; LA64-NEXT:    vavgr.d $vr0, $vr0, $vr1 +; LA64-NEXT:    vst $vr0, $a0, 0 +; LA64-NEXT:    ret  entry:    %va = load <2 x i64>, ptr %a    %vb = load <2 x i64>, ptr %b @@ -231,9 +243,7 @@ define void @vavgr_bu(ptr %res, ptr %a, ptr %b) nounwind {  ; CHECK:       # %bb.0: # %entry  ; CHECK-NEXT:    vld $vr0, $a1, 0  ; CHECK-NEXT:    vld $vr1, $a2, 0 -; CHECK-NEXT:    vadd.b $vr0, $vr0, $vr1 -; CHECK-NEXT:    vaddi.bu $vr0, $vr0, 1 -; CHECK-NEXT:    vsrli.b $vr0, $vr0, 1 +; CHECK-NEXT:    vavgr.bu $vr0, $vr0, $vr1  ; CHECK-NEXT:    vst $vr0, $a0, 0  ; CHECK-NEXT:    ret  entry: @@ -251,9 +261,7 @@ define void @vavgr_hu(ptr %res, ptr %a, ptr %b) nounwind {  ; CHECK:       # %bb.0: # %entry  ; CHECK-NEXT:    vld $vr0, $a1, 0  ; CHECK-NEXT:    vld $vr1, $a2, 0 -; CHECK-NEXT:    vadd.h $vr0, $vr0, $vr1 -; CHECK-NEXT:    vaddi.hu $vr0, $vr0, 1 -; CHECK-NEXT:    vsrli.h $vr0, $vr0, 1 +; CHECK-NEXT:    vavgr.hu $vr0, $vr0, $vr1  ; CHECK-NEXT:    vst $vr0, $a0, 0  ; CHECK-NEXT:    ret  entry: @@ -271,9 +279,7 @@ define void @vavgr_wu(ptr %res, ptr %a, ptr %b) nounwind {  ; CHECK:       # %bb.0: # %entry  ; CHECK-NEXT:    vld $vr0, $a1, 0  ; CHECK-NEXT:    vld $vr1, $a2, 0 -; CHECK-NEXT:    vadd.w $vr0, $vr0, $vr1 -; CHECK-NEXT:    vaddi.wu $vr0, $vr0, 1 -; CHECK-NEXT:    vsrli.w $vr0, $vr0, 1 +; CHECK-NEXT:    vavgr.wu $vr0, $vr0, $vr1  ; CHECK-NEXT:    vst $vr0, $a0, 0  ; CHECK-NEXT:    ret  entry: @@ -287,15 +293,23 @@ entry:  }  define void @vavgr_du(ptr %res, ptr %a, ptr %b) nounwind { -; CHECK-LABEL: vavgr_du: -; CHECK:       # %bb.0: # %entry -; CHECK-NEXT:    vld $vr0, $a1, 0 -; CHECK-NEXT:    vld $vr1, $a2, 0 -; CHECK-NEXT:    vadd.d $vr0, $vr0, $vr1 -; CHECK-NEXT:    vaddi.du $vr0, $vr0, 1 -; CHECK-NEXT:    vsrli.d $vr0, $vr0, 1 -; CHECK-NEXT:    vst $vr0, $a0, 0 -; CHECK-NEXT:    ret +; LA32-LABEL: vavgr_du: +; LA32:       # %bb.0: # %entry +; LA32-NEXT:    vld $vr0, $a1, 0 +; LA32-NEXT:    vld $vr1, $a2, 0 +; LA32-NEXT:    vadd.d $vr0, $vr0, $vr1 +; LA32-NEXT:    vaddi.du $vr0, $vr0, 1 +; LA32-NEXT:    vsrli.d $vr0, $vr0, 1 +; LA32-NEXT:    vst $vr0, $a0, 0 +; LA32-NEXT:    ret +; +; LA64-LABEL: vavgr_du: +; LA64:       # %bb.0: # %entry +; LA64-NEXT:    vld $vr0, $a1, 0 +; LA64-NEXT:    vld $vr1, $a2, 0 +; LA64-NEXT:    vavgr.du $vr0, $vr0, $vr1 +; LA64-NEXT:    vst $vr0, $a0, 0 +; LA64-NEXT:    ret  entry:    %va = load <2 x i64>, ptr %a    %vb = load <2 x i64>, ptr %b diff --git a/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/avgfloor-ceil.ll b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/avgfloor-ceil.ll new file mode 100644 index 0000000..bb4df64 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/avgfloor-ceil.ll @@ -0,0 +1,379 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6 +; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lsx < %s | FileCheck %s +; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s + +define void @vavg_b(ptr %res, ptr %a, ptr %b) nounwind { +; CHECK-LABEL: vavg_b: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vld $vr0, $a1, 0 +; CHECK-NEXT:    vld $vr1, $a2, 0 +; CHECK-NEXT:    vand.v $vr2, $vr0, $vr1 +; CHECK-NEXT:    vxor.v $vr0, $vr0, $vr1 +; CHECK-NEXT:    vsrai.b $vr0, $vr0, 1 +; CHECK-NEXT:    vadd.b $vr0, $vr2, $vr0 +; CHECK-NEXT:    vst $vr0, $a0, 0 +; CHECK-NEXT:    ret +entry: +  %va = load <16 x i8>, ptr %a +  %vb = load <16 x i8>, ptr %b +  %ea = sext <16 x i8> %va to <16 x i16> +  %eb = sext <16 x i8> %vb to <16 x i16> +  %add = add <16 x i16> %ea, %eb +  %shr = lshr <16 x i16> %add, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1> +  %r = trunc <16 x i16> %shr to <16 x i8> +  store <16 x i8> %r, ptr %res +  ret void +} + +define void @vavg_h(ptr %res, ptr %a, ptr %b) nounwind { +; CHECK-LABEL: vavg_h: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vld $vr0, $a1, 0 +; CHECK-NEXT:    vld $vr1, $a2, 0 +; CHECK-NEXT:    vand.v $vr2, $vr0, $vr1 +; CHECK-NEXT:    vxor.v $vr0, $vr0, $vr1 +; CHECK-NEXT:    vsrai.h $vr0, $vr0, 1 +; CHECK-NEXT:    vadd.h $vr0, $vr2, $vr0 +; CHECK-NEXT:    vst $vr0, $a0, 0 +; CHECK-NEXT:    ret +entry: +  %va = load <8 x i16>, ptr %a +  %vb = load <8 x i16>, ptr %b +  %ea = sext <8 x i16> %va to <8 x i32> +  %eb = sext <8 x i16> %vb to <8 x i32> +  %add = add <8 x i32> %ea, %eb +  %shr = lshr <8 x i32> %add, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1> +  %r = trunc <8 x i32> %shr to <8 x i16> +  store <8 x i16> %r, ptr %res +  ret void +} + +define void @vavg_w(ptr %res, ptr %a, ptr %b) nounwind { +; CHECK-LABEL: vavg_w: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vld $vr0, $a1, 0 +; CHECK-NEXT:    vld $vr1, $a2, 0 +; CHECK-NEXT:    vand.v $vr2, $vr0, $vr1 +; CHECK-NEXT:    vxor.v $vr0, $vr0, $vr1 +; CHECK-NEXT:    vsrai.w $vr0, $vr0, 1 +; CHECK-NEXT:    vadd.w $vr0, $vr2, $vr0 +; CHECK-NEXT:    vst $vr0, $a0, 0 +; CHECK-NEXT:    ret +entry: +  %va = load <4 x i32>, ptr %a +  %vb = load <4 x i32>, ptr %b +  %ea = sext <4 x i32> %va to <4 x i64> +  %eb = sext <4 x i32> %vb to <4 x i64> +  %add = add <4 x i64> %ea, %eb +  %shr = lshr <4 x i64> %add, <i64 1, i64 1, i64 1, i64 1> +  %r = trunc <4 x i64> %shr to <4 x i32> +  store <4 x i32> %r, ptr %res +  ret void +} + +define void @vavg_d(ptr %res, ptr %a, ptr %b) nounwind { +; CHECK-LABEL: vavg_d: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vld $vr0, $a1, 0 +; CHECK-NEXT:    vld $vr1, $a2, 0 +; CHECK-NEXT:    vand.v $vr2, $vr0, $vr1 +; CHECK-NEXT:    vxor.v $vr0, $vr0, $vr1 +; CHECK-NEXT:    vsrai.d $vr0, $vr0, 1 +; CHECK-NEXT:    vadd.d $vr0, $vr2, $vr0 +; CHECK-NEXT:    vst $vr0, $a0, 0 +; CHECK-NEXT:    ret +entry: +  %va = load <2 x i64>, ptr %a +  %vb = load <2 x i64>, ptr %b +  %ea = sext <2 x i64> %va to <2 x i128> +  %eb = sext <2 x i64> %vb to <2 x i128> +  %add = add <2 x i128> %ea, %eb +  %shr = lshr <2 x i128> %add, <i128 1, i128 1> +  %r = trunc <2 x i128> %shr to <2 x i64> +  store <2 x i64> %r, ptr %res +  ret void +} + +define void @vavg_bu(ptr %res, ptr %a, ptr %b) nounwind { +; CHECK-LABEL: vavg_bu: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vld $vr0, $a1, 0 +; CHECK-NEXT:    vld $vr1, $a2, 0 +; CHECK-NEXT:    vand.v $vr2, $vr0, $vr1 +; CHECK-NEXT:    vxor.v $vr0, $vr0, $vr1 +; CHECK-NEXT:    vsrli.b $vr0, $vr0, 1 +; CHECK-NEXT:    vadd.b $vr0, $vr2, $vr0 +; CHECK-NEXT:    vst $vr0, $a0, 0 +; CHECK-NEXT:    ret +entry: +  %va = load <16 x i8>, ptr %a +  %vb = load <16 x i8>, ptr %b +  %ea = zext <16 x i8> %va to <16 x i16> +  %eb = zext <16 x i8> %vb to <16 x i16> +  %add = add <16 x i16> %ea, %eb +  %shr = lshr <16 x i16> %add, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1> +  %r = trunc <16 x i16> %shr to <16 x i8> +  store <16 x i8> %r, ptr %res +  ret void +} + +define void @vavg_hu(ptr %res, ptr %a, ptr %b) nounwind { +; CHECK-LABEL: vavg_hu: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vld $vr0, $a1, 0 +; CHECK-NEXT:    vld $vr1, $a2, 0 +; CHECK-NEXT:    vand.v $vr2, $vr0, $vr1 +; CHECK-NEXT:    vxor.v $vr0, $vr0, $vr1 +; CHECK-NEXT:    vsrli.h $vr0, $vr0, 1 +; CHECK-NEXT:    vadd.h $vr0, $vr2, $vr0 +; CHECK-NEXT:    vst $vr0, $a0, 0 +; CHECK-NEXT:    ret +entry: +  %va = load <8 x i16>, ptr %a +  %vb = load <8 x i16>, ptr %b +  %ea = zext <8 x i16> %va to <8 x i32> +  %eb = zext <8 x i16> %vb to <8 x i32> +  %add = add <8 x i32> %ea, %eb +  %shr = lshr <8 x i32> %add, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1> +  %r = trunc <8 x i32> %shr to <8 x i16> +  store <8 x i16> %r, ptr %res +  ret void +} + +define void @vavg_wu(ptr %res, ptr %a, ptr %b) nounwind { +; CHECK-LABEL: vavg_wu: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vld $vr0, $a1, 0 +; CHECK-NEXT:    vld $vr1, $a2, 0 +; CHECK-NEXT:    vand.v $vr2, $vr0, $vr1 +; CHECK-NEXT:    vxor.v $vr0, $vr0, $vr1 +; CHECK-NEXT:    vsrli.w $vr0, $vr0, 1 +; CHECK-NEXT:    vadd.w $vr0, $vr2, $vr0 +; CHECK-NEXT:    vst $vr0, $a0, 0 +; CHECK-NEXT:    ret +entry: +  %va = load <4 x i32>, ptr %a +  %vb = load <4 x i32>, ptr %b +  %ea = zext <4 x i32> %va to <4 x i64> +  %eb = zext <4 x i32> %vb to <4 x i64> +  %add = add <4 x i64> %ea, %eb +  %shr = lshr <4 x i64> %add, <i64 1, i64 1, i64 1, i64 1> +  %r = trunc <4 x i64> %shr to <4 x i32> +  store <4 x i32> %r, ptr %res +  ret void +} + +define void @vavg_du(ptr %res, ptr %a, ptr %b) nounwind { +; CHECK-LABEL: vavg_du: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vld $vr0, $a1, 0 +; CHECK-NEXT:    vld $vr1, $a2, 0 +; CHECK-NEXT:    vand.v $vr2, $vr0, $vr1 +; CHECK-NEXT:    vxor.v $vr0, $vr0, $vr1 +; CHECK-NEXT:    vsrli.d $vr0, $vr0, 1 +; CHECK-NEXT:    vadd.d $vr0, $vr2, $vr0 +; CHECK-NEXT:    vst $vr0, $a0, 0 +; CHECK-NEXT:    ret +entry: +  %va = load <2 x i64>, ptr %a +  %vb = load <2 x i64>, ptr %b +  %ea = zext <2 x i64> %va to <2 x i128> +  %eb = zext <2 x i64> %vb to <2 x i128> +  %add = add <2 x i128> %ea, %eb +  %shr = lshr <2 x i128> %add, <i128 1, i128 1> +  %r = trunc <2 x i128> %shr to <2 x i64> +  store <2 x i64> %r, ptr %res +  ret void +} + +define void @vavgr_b(ptr %res, ptr %a, ptr %b) nounwind { +; CHECK-LABEL: vavgr_b: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vld $vr0, $a1, 0 +; CHECK-NEXT:    vld $vr1, $a2, 0 +; CHECK-NEXT:    vor.v $vr2, $vr0, $vr1 +; CHECK-NEXT:    vxor.v $vr0, $vr0, $vr1 +; CHECK-NEXT:    vsrai.b $vr0, $vr0, 1 +; CHECK-NEXT:    vsub.b $vr0, $vr2, $vr0 +; CHECK-NEXT:    vst $vr0, $a0, 0 +; CHECK-NEXT:    ret +entry: +  %va = load <16 x i8>, ptr %a +  %vb = load <16 x i8>, ptr %b +  %ea = sext <16 x i8> %va to <16 x i16> +  %eb = sext <16 x i8> %vb to <16 x i16> +  %add = add <16 x i16> %ea, %eb +  %add1 = add <16 x i16> %add, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1> +  %shr = lshr <16 x i16> %add1, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1> +  %r = trunc <16 x i16> %shr to <16 x i8> +  store <16 x i8> %r, ptr %res +  ret void +} + +define void @vavgr_h(ptr %res, ptr %a, ptr %b) nounwind { +; CHECK-LABEL: vavgr_h: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vld $vr0, $a1, 0 +; CHECK-NEXT:    vld $vr1, $a2, 0 +; CHECK-NEXT:    vor.v $vr2, $vr0, $vr1 +; CHECK-NEXT:    vxor.v $vr0, $vr0, $vr1 +; CHECK-NEXT:    vsrai.h $vr0, $vr0, 1 +; CHECK-NEXT:    vsub.h $vr0, $vr2, $vr0 +; CHECK-NEXT:    vst $vr0, $a0, 0 +; CHECK-NEXT:    ret +entry: +  %va = load <8 x i16>, ptr %a +  %vb = load <8 x i16>, ptr %b +  %ea = sext <8 x i16> %va to <8 x i32> +  %eb = sext <8 x i16> %vb to <8 x i32> +  %add = add <8 x i32> %ea, %eb +  %add1 = add <8 x i32> %add, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1> +  %shr = lshr <8 x i32> %add1, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1> +  %r = trunc <8 x i32> %shr to <8 x i16> +  store <8 x i16> %r, ptr %res +  ret void +} + +define void @vavgr_w(ptr %res, ptr %a, ptr %b) nounwind { +; CHECK-LABEL: vavgr_w: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vld $vr0, $a1, 0 +; CHECK-NEXT:    vld $vr1, $a2, 0 +; CHECK-NEXT:    vor.v $vr2, $vr0, $vr1 +; CHECK-NEXT:    vxor.v $vr0, $vr0, $vr1 +; CHECK-NEXT:    vsrai.w $vr0, $vr0, 1 +; CHECK-NEXT:    vsub.w $vr0, $vr2, $vr0 +; CHECK-NEXT:    vst $vr0, $a0, 0 +; CHECK-NEXT:    ret +entry: +  %va = load <4 x i32>, ptr %a +  %vb = load <4 x i32>, ptr %b +  %ea = sext <4 x i32> %va to <4 x i64> +  %eb = sext <4 x i32> %vb to <4 x i64> +  %add = add <4 x i64> %ea, %eb +  %add1 = add <4 x i64> %add, <i64 1, i64 1, i64 1, i64 1> +  %shr = lshr <4 x i64> %add1, <i64 1, i64 1, i64 1, i64 1> +  %r = trunc <4 x i64> %shr to <4 x i32> +  store <4 x i32> %r, ptr %res +  ret void +} + +define void @vavgr_d(ptr %res, ptr %a, ptr %b) nounwind { +; CHECK-LABEL: vavgr_d: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vld $vr0, $a1, 0 +; CHECK-NEXT:    vld $vr1, $a2, 0 +; CHECK-NEXT:    vor.v $vr2, $vr0, $vr1 +; CHECK-NEXT:    vxor.v $vr0, $vr0, $vr1 +; CHECK-NEXT:    vsrai.d $vr0, $vr0, 1 +; CHECK-NEXT:    vsub.d $vr0, $vr2, $vr0 +; CHECK-NEXT:    vst $vr0, $a0, 0 +; CHECK-NEXT:    ret +entry: +  %va = load <2 x i64>, ptr %a +  %vb = load <2 x i64>, ptr %b +  %ea = sext <2 x i64> %va to <2 x i128> +  %eb = sext <2 x i64> %vb to <2 x i128> +  %add = add <2 x i128> %ea, %eb +  %add1 = add <2 x i128> %add, <i128 1, i128 1> +  %shr = lshr <2 x i128> %add1, <i128 1, i128 1> +  %r = trunc <2 x i128> %shr to <2 x i64> +  store <2 x i64> %r, ptr %res +  ret void +} + +define void @vavgr_bu(ptr %res, ptr %a, ptr %b) nounwind { +; CHECK-LABEL: vavgr_bu: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vld $vr0, $a1, 0 +; CHECK-NEXT:    vld $vr1, $a2, 0 +; CHECK-NEXT:    vor.v $vr2, $vr0, $vr1 +; CHECK-NEXT:    vxor.v $vr0, $vr0, $vr1 +; CHECK-NEXT:    vsrli.b $vr0, $vr0, 1 +; CHECK-NEXT:    vsub.b $vr0, $vr2, $vr0 +; CHECK-NEXT:    vst $vr0, $a0, 0 +; CHECK-NEXT:    ret +entry: +  %va = load <16 x i8>, ptr %a +  %vb = load <16 x i8>, ptr %b +  %ea = zext <16 x i8> %va to <16 x i16> +  %eb = zext <16 x i8> %vb to <16 x i16> +  %add = add <16 x i16> %ea, %eb +  %add1 = add <16 x i16> %add, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1> +  %shr = lshr <16 x i16> %add1, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1> +  %r = trunc <16 x i16> %shr to <16 x i8> +  store <16 x i8> %r, ptr %res +  ret void +} + +define void @vavgr_hu(ptr %res, ptr %a, ptr %b) nounwind { +; CHECK-LABEL: vavgr_hu: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vld $vr0, $a1, 0 +; CHECK-NEXT:    vld $vr1, $a2, 0 +; CHECK-NEXT:    vor.v $vr2, $vr0, $vr1 +; CHECK-NEXT:    vxor.v $vr0, $vr0, $vr1 +; CHECK-NEXT:    vsrli.h $vr0, $vr0, 1 +; CHECK-NEXT:    vsub.h $vr0, $vr2, $vr0 +; CHECK-NEXT:    vst $vr0, $a0, 0 +; CHECK-NEXT:    ret +entry: +  %va = load <8 x i16>, ptr %a +  %vb = load <8 x i16>, ptr %b +  %ea = zext <8 x i16> %va to <8 x i32> +  %eb = zext <8 x i16> %vb to <8 x i32> +  %add = add <8 x i32> %ea, %eb +  %add1 = add <8 x i32> %add, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1> +  %shr = lshr <8 x i32> %add1, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1> +  %r = trunc <8 x i32> %shr to <8 x i16> +  store <8 x i16> %r, ptr %res +  ret void +} + +define void @vavgr_wu(ptr %res, ptr %a, ptr %b) nounwind { +; CHECK-LABEL: vavgr_wu: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vld $vr0, $a1, 0 +; CHECK-NEXT:    vld $vr1, $a2, 0 +; CHECK-NEXT:    vor.v $vr2, $vr0, $vr1 +; CHECK-NEXT:    vxor.v $vr0, $vr0, $vr1 +; CHECK-NEXT:    vsrli.w $vr0, $vr0, 1 +; CHECK-NEXT:    vsub.w $vr0, $vr2, $vr0 +; CHECK-NEXT:    vst $vr0, $a0, 0 +; CHECK-NEXT:    ret +entry: +  %va = load <4 x i32>, ptr %a +  %vb = load <4 x i32>, ptr %b +  %ea = zext <4 x i32> %va to <4 x i64> +  %eb = zext <4 x i32> %vb to <4 x i64> +  %add = add <4 x i64> %ea, %eb +  %add1 = add <4 x i64> %add, <i64 1, i64 1, i64 1, i64 1> +  %shr = lshr <4 x i64> %add1, <i64 1, i64 1, i64 1, i64 1> +  %r = trunc <4 x i64> %shr to <4 x i32> +  store <4 x i32> %r, ptr %res +  ret void +} + +define void @vavgr_du(ptr %res, ptr %a, ptr %b) nounwind { +; CHECK-LABEL: vavgr_du: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vld $vr0, $a1, 0 +; CHECK-NEXT:    vld $vr1, $a2, 0 +; CHECK-NEXT:    vor.v $vr2, $vr0, $vr1 +; CHECK-NEXT:    vxor.v $vr0, $vr0, $vr1 +; CHECK-NEXT:    vsrli.d $vr0, $vr0, 1 +; CHECK-NEXT:    vsub.d $vr0, $vr2, $vr0 +; CHECK-NEXT:    vst $vr0, $a0, 0 +; CHECK-NEXT:    ret +entry: +  %va = load <2 x i64>, ptr %a +  %vb = load <2 x i64>, ptr %b +  %ea = zext <2 x i64> %va to <2 x i128> +  %eb = zext <2 x i64> %vb to <2 x i128> +  %add = add <2 x i128> %ea, %eb +  %add1 = add <2 x i128> %add, <i128 1, i128 1> +  %shr = lshr <2 x i128> %add1, <i128 1, i128 1> +  %r = trunc <2 x i128> %shr to <2 x i64> +  store <2 x i64> %r, ptr %res +  ret void +} diff --git a/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/flog2.ll b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/flog2.ll index e5e75ec..87cc7c6 100644 --- a/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/flog2.ll +++ b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/flog2.ll @@ -1,98 +1,17 @@  ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6 -; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lsx < %s | FileCheck %s --check-prefix=LA32 -; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s --check-prefix=LA64 +; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lsx < %s | FileCheck %s +; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s  declare <4 x float> @llvm.log2.v4f32(<4 x float>)  declare <2 x double> @llvm.log2.v2f64(<2 x double>)  define void @flog2_v4f32(ptr %res, ptr %a) nounwind { -; LA32-LABEL: flog2_v4f32: -; LA32:       # %bb.0: # %entry -; LA32-NEXT:    addi.w $sp, $sp, -48 -; LA32-NEXT:    st.w $ra, $sp, 44 # 4-byte Folded Spill -; LA32-NEXT:    st.w $fp, $sp, 40 # 4-byte Folded Spill -; LA32-NEXT:    vld $vr0, $a1, 0 -; LA32-NEXT:    vst $vr0, $sp, 16 # 16-byte Folded Spill -; LA32-NEXT:    move $fp, $a0 -; LA32-NEXT:    vreplvei.w $vr0, $vr0, 1 -; LA32-NEXT:    # kill: def $f0 killed $f0 killed $vr0 -; LA32-NEXT:    bl log2f -; LA32-NEXT:    # kill: def $f0 killed $f0 def $vr0 -; LA32-NEXT:    vst $vr0, $sp, 0 # 16-byte Folded Spill -; LA32-NEXT:    vld $vr0, $sp, 16 # 16-byte Folded Reload -; LA32-NEXT:    vreplvei.w $vr0, $vr0, 0 -; LA32-NEXT:    # kill: def $f0 killed $f0 killed $vr0 -; LA32-NEXT:    bl log2f -; LA32-NEXT:    # kill: def $f0 killed $f0 def $vr0 -; LA32-NEXT:    vld $vr1, $sp, 0 # 16-byte Folded Reload -; LA32-NEXT:    vextrins.w $vr0, $vr1, 16 -; LA32-NEXT:    vst $vr0, $sp, 0 # 16-byte Folded Spill -; LA32-NEXT:    vld $vr0, $sp, 16 # 16-byte Folded Reload -; LA32-NEXT:    vreplvei.w $vr0, $vr0, 2 -; LA32-NEXT:    # kill: def $f0 killed $f0 killed $vr0 -; LA32-NEXT:    bl log2f -; LA32-NEXT:    # kill: def $f0 killed $f0 def $vr0 -; LA32-NEXT:    vld $vr1, $sp, 0 # 16-byte Folded Reload -; LA32-NEXT:    vextrins.w $vr1, $vr0, 32 -; LA32-NEXT:    vst $vr1, $sp, 0 # 16-byte Folded Spill -; LA32-NEXT:    vld $vr0, $sp, 16 # 16-byte Folded Reload -; LA32-NEXT:    vreplvei.w $vr0, $vr0, 3 -; LA32-NEXT:    # kill: def $f0 killed $f0 killed $vr0 -; LA32-NEXT:    bl log2f -; LA32-NEXT:    # kill: def $f0 killed $f0 def $vr0 -; LA32-NEXT:    vld $vr1, $sp, 0 # 16-byte Folded Reload -; LA32-NEXT:    vextrins.w $vr1, $vr0, 48 -; LA32-NEXT:    vst $vr1, $fp, 0 -; LA32-NEXT:    ld.w $fp, $sp, 40 # 4-byte Folded Reload -; LA32-NEXT:    ld.w $ra, $sp, 44 # 4-byte Folded Reload -; LA32-NEXT:    addi.w $sp, $sp, 48 -; LA32-NEXT:    ret -; -; LA64-LABEL: flog2_v4f32: -; LA64:       # %bb.0: # %entry -; LA64-NEXT:    addi.d $sp, $sp, -48 -; LA64-NEXT:    st.d $ra, $sp, 40 # 8-byte Folded Spill -; LA64-NEXT:    st.d $fp, $sp, 32 # 8-byte Folded Spill -; LA64-NEXT:    vld $vr0, $a1, 0 -; LA64-NEXT:    vst $vr0, $sp, 16 # 16-byte Folded Spill -; LA64-NEXT:    move $fp, $a0 -; LA64-NEXT:    vreplvei.w $vr0, $vr0, 1 -; LA64-NEXT:    # kill: def $f0 killed $f0 killed $vr0 -; LA64-NEXT:    pcaddu18i $ra, %call36(log2f) -; LA64-NEXT:    jirl $ra, $ra, 0 -; LA64-NEXT:    # kill: def $f0 killed $f0 def $vr0 -; LA64-NEXT:    vst $vr0, $sp, 0 # 16-byte Folded Spill -; LA64-NEXT:    vld $vr0, $sp, 16 # 16-byte Folded Reload -; LA64-NEXT:    vreplvei.w $vr0, $vr0, 0 -; LA64-NEXT:    # kill: def $f0 killed $f0 killed $vr0 -; LA64-NEXT:    pcaddu18i $ra, %call36(log2f) -; LA64-NEXT:    jirl $ra, $ra, 0 -; LA64-NEXT:    # kill: def $f0 killed $f0 def $vr0 -; LA64-NEXT:    vld $vr1, $sp, 0 # 16-byte Folded Reload -; LA64-NEXT:    vextrins.w $vr0, $vr1, 16 -; LA64-NEXT:    vst $vr0, $sp, 0 # 16-byte Folded Spill -; LA64-NEXT:    vld $vr0, $sp, 16 # 16-byte Folded Reload -; LA64-NEXT:    vreplvei.w $vr0, $vr0, 2 -; LA64-NEXT:    # kill: def $f0 killed $f0 killed $vr0 -; LA64-NEXT:    pcaddu18i $ra, %call36(log2f) -; LA64-NEXT:    jirl $ra, $ra, 0 -; LA64-NEXT:    # kill: def $f0 killed $f0 def $vr0 -; LA64-NEXT:    vld $vr1, $sp, 0 # 16-byte Folded Reload -; LA64-NEXT:    vextrins.w $vr1, $vr0, 32 -; LA64-NEXT:    vst $vr1, $sp, 0 # 16-byte Folded Spill -; LA64-NEXT:    vld $vr0, $sp, 16 # 16-byte Folded Reload -; LA64-NEXT:    vreplvei.w $vr0, $vr0, 3 -; LA64-NEXT:    # kill: def $f0 killed $f0 killed $vr0 -; LA64-NEXT:    pcaddu18i $ra, %call36(log2f) -; LA64-NEXT:    jirl $ra, $ra, 0 -; LA64-NEXT:    # kill: def $f0 killed $f0 def $vr0 -; LA64-NEXT:    vld $vr1, $sp, 0 # 16-byte Folded Reload -; LA64-NEXT:    vextrins.w $vr1, $vr0, 48 -; LA64-NEXT:    vst $vr1, $fp, 0 -; LA64-NEXT:    ld.d $fp, $sp, 32 # 8-byte Folded Reload -; LA64-NEXT:    ld.d $ra, $sp, 40 # 8-byte Folded Reload -; LA64-NEXT:    addi.d $sp, $sp, 48 -; LA64-NEXT:    ret +; CHECK-LABEL: flog2_v4f32: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vld $vr0, $a1, 0 +; CHECK-NEXT:    vflogb.s $vr0, $vr0 +; CHECK-NEXT:    vst $vr0, $a0, 0 +; CHECK-NEXT:    ret  entry:    %v = load <4 x float>, ptr %a    %r = call <4 x float> @llvm.log2.v4f32(<4 x float> %v) @@ -101,59 +20,12 @@ entry:  }  define void @flog2_v2f64(ptr %res, ptr %a) nounwind { -; LA32-LABEL: flog2_v2f64: -; LA32:       # %bb.0: # %entry -; LA32-NEXT:    addi.w $sp, $sp, -48 -; LA32-NEXT:    st.w $ra, $sp, 44 # 4-byte Folded Spill -; LA32-NEXT:    st.w $fp, $sp, 40 # 4-byte Folded Spill -; LA32-NEXT:    vld $vr0, $a1, 0 -; LA32-NEXT:    vst $vr0, $sp, 0 # 16-byte Folded Spill -; LA32-NEXT:    move $fp, $a0 -; LA32-NEXT:    vreplvei.d $vr0, $vr0, 1 -; LA32-NEXT:    # kill: def $f0_64 killed $f0_64 killed $vr0 -; LA32-NEXT:    bl log2 -; LA32-NEXT:    # kill: def $f0_64 killed $f0_64 def $vr0 -; LA32-NEXT:    vst $vr0, $sp, 16 # 16-byte Folded Spill -; LA32-NEXT:    vld $vr0, $sp, 0 # 16-byte Folded Reload -; LA32-NEXT:    vreplvei.d $vr0, $vr0, 0 -; LA32-NEXT:    # kill: def $f0_64 killed $f0_64 killed $vr0 -; LA32-NEXT:    bl log2 -; LA32-NEXT:    # kill: def $f0_64 killed $f0_64 def $vr0 -; LA32-NEXT:    vld $vr1, $sp, 16 # 16-byte Folded Reload -; LA32-NEXT:    vextrins.d $vr0, $vr1, 16 -; LA32-NEXT:    vst $vr0, $fp, 0 -; LA32-NEXT:    ld.w $fp, $sp, 40 # 4-byte Folded Reload -; LA32-NEXT:    ld.w $ra, $sp, 44 # 4-byte Folded Reload -; LA32-NEXT:    addi.w $sp, $sp, 48 -; LA32-NEXT:    ret -; -; LA64-LABEL: flog2_v2f64: -; LA64:       # %bb.0: # %entry -; LA64-NEXT:    addi.d $sp, $sp, -48 -; LA64-NEXT:    st.d $ra, $sp, 40 # 8-byte Folded Spill -; LA64-NEXT:    st.d $fp, $sp, 32 # 8-byte Folded Spill -; LA64-NEXT:    vld $vr0, $a1, 0 -; LA64-NEXT:    vst $vr0, $sp, 0 # 16-byte Folded Spill -; LA64-NEXT:    move $fp, $a0 -; LA64-NEXT:    vreplvei.d $vr0, $vr0, 1 -; LA64-NEXT:    # kill: def $f0_64 killed $f0_64 killed $vr0 -; LA64-NEXT:    pcaddu18i $ra, %call36(log2) -; LA64-NEXT:    jirl $ra, $ra, 0 -; LA64-NEXT:    # kill: def $f0_64 killed $f0_64 def $vr0 -; LA64-NEXT:    vst $vr0, $sp, 16 # 16-byte Folded Spill -; LA64-NEXT:    vld $vr0, $sp, 0 # 16-byte Folded Reload -; LA64-NEXT:    vreplvei.d $vr0, $vr0, 0 -; LA64-NEXT:    # kill: def $f0_64 killed $f0_64 killed $vr0 -; LA64-NEXT:    pcaddu18i $ra, %call36(log2) -; LA64-NEXT:    jirl $ra, $ra, 0 -; LA64-NEXT:    # kill: def $f0_64 killed $f0_64 def $vr0 -; LA64-NEXT:    vld $vr1, $sp, 16 # 16-byte Folded Reload -; LA64-NEXT:    vextrins.d $vr0, $vr1, 16 -; LA64-NEXT:    vst $vr0, $fp, 0 -; LA64-NEXT:    ld.d $fp, $sp, 32 # 8-byte Folded Reload -; LA64-NEXT:    ld.d $ra, $sp, 40 # 8-byte Folded Reload -; LA64-NEXT:    addi.d $sp, $sp, 48 -; LA64-NEXT:    ret +; CHECK-LABEL: flog2_v2f64: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vld $vr0, $a1, 0 +; CHECK-NEXT:    vflogb.d $vr0, $vr0 +; CHECK-NEXT:    vst $vr0, $a0, 0 +; CHECK-NEXT:    ret  entry:    %v = load <2 x double>, ptr %a    %r = call <2 x double> @llvm.log2.v2f64(<2 x double> %v) diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/rvv/vse.ll b/llvm/test/CodeGen/RISCV/GlobalISel/rvv/vse.ll new file mode 100644 index 0000000..785d9fc --- /dev/null +++ b/llvm/test/CodeGen/RISCV/GlobalISel/rvv/vse.ll @@ -0,0 +1,1575 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v,+zvfhmin,+zvfbfmin \ +; RUN:   -global-isel -verify-machineinstrs -target-abi=ilp32d | FileCheck %s +; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v,+zvfhmin,+zvfbfmin \ +; RUN:   -global-isel -verify-machineinstrs -target-abi=lp64d | FileCheck %s + +declare void @llvm.riscv.vse.nxv1i64( +  <vscale x 1 x i64>, +  ptr, +  iXLen); + +define void @intrinsic_vse_v_nxv1i64_nxv1i64(<vscale x 1 x i64> %0, ptr %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vse_v_nxv1i64_nxv1i64: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e64, m1, ta, ma +; CHECK-NEXT:    vse64.v v8, (a0) +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vse.nxv1i64( +    <vscale x 1 x i64> %0, +    ptr %1, +    iXLen %2) + +  ret void +} + +declare void @llvm.riscv.vse.mask.nxv1i64( +  <vscale x 1 x i64>, +  ptr, +  <vscale x 1 x i1>, +  iXLen); + +define void @intrinsic_vse_mask_v_nxv1i64_nxv1i64(<vscale x 1 x i64> %0, ptr %1, <vscale x 1 x i1> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vse_mask_v_nxv1i64_nxv1i64: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e64, m1, ta, ma +; CHECK-NEXT:    vse64.v v8, (a0), v0.t +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vse.mask.nxv1i64( +    <vscale x 1 x i64> %0, +    ptr %1, +    <vscale x 1 x i1> %2, +    iXLen %3) + +  ret void +} + +define void @intrinsic_vse_allonesmask_v_nxv1i64_nxv1i64(<vscale x 1 x i64> %0, ptr %1, <vscale x 1 x i1> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vse_allonesmask_v_nxv1i64_nxv1i64: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e64, m1, ta, ma +; CHECK-NEXT:    vse64.v v8, (a0) +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vse.mask.nxv1i64( +    <vscale x 1 x i64> %0, +    ptr %1, +    <vscale x 1 x i1> splat (i1 true), +    iXLen %3) + +  ret void +} + +declare void @llvm.riscv.vse.nxv2i64( +  <vscale x 2 x i64>, +  ptr, +  iXLen); + +define void @intrinsic_vse_v_nxv2i64_nxv2i64(<vscale x 2 x i64> %0, ptr %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vse_v_nxv2i64_nxv2i64: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e64, m2, ta, ma +; CHECK-NEXT:    vse64.v v8, (a0) +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vse.nxv2i64( +    <vscale x 2 x i64> %0, +    ptr %1, +    iXLen %2) + +  ret void +} + +declare void @llvm.riscv.vse.mask.nxv2i64( +  <vscale x 2 x i64>, +  ptr, +  <vscale x 2 x i1>, +  iXLen); + +define void @intrinsic_vse_mask_v_nxv2i64_nxv2i64(<vscale x 2 x i64> %0, ptr %1, <vscale x 2 x i1> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vse_mask_v_nxv2i64_nxv2i64: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e64, m2, ta, ma +; CHECK-NEXT:    vse64.v v8, (a0), v0.t +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vse.mask.nxv2i64( +    <vscale x 2 x i64> %0, +    ptr %1, +    <vscale x 2 x i1> %2, +    iXLen %3) + +  ret void +} + +declare void @llvm.riscv.vse.nxv4i64( +  <vscale x 4 x i64>, +  ptr, +  iXLen); + +define void @intrinsic_vse_v_nxv4i64_nxv4i64(<vscale x 4 x i64> %0, ptr %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vse_v_nxv4i64_nxv4i64: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e64, m4, ta, ma +; CHECK-NEXT:    vse64.v v8, (a0) +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vse.nxv4i64( +    <vscale x 4 x i64> %0, +    ptr %1, +    iXLen %2) + +  ret void +} + +declare void @llvm.riscv.vse.mask.nxv4i64( +  <vscale x 4 x i64>, +  ptr, +  <vscale x 4 x i1>, +  iXLen); + +define void @intrinsic_vse_mask_v_nxv4i64_nxv4i64(<vscale x 4 x i64> %0, ptr %1, <vscale x 4 x i1> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vse_mask_v_nxv4i64_nxv4i64: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e64, m4, ta, ma +; CHECK-NEXT:    vse64.v v8, (a0), v0.t +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vse.mask.nxv4i64( +    <vscale x 4 x i64> %0, +    ptr %1, +    <vscale x 4 x i1> %2, +    iXLen %3) + +  ret void +} + +declare void @llvm.riscv.vse.nxv8i64( +  <vscale x 8 x i64>, +  ptr, +  iXLen); + +define void @intrinsic_vse_v_nxv8i64_nxv8i64(<vscale x 8 x i64> %0, ptr %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vse_v_nxv8i64_nxv8i64: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e64, m8, ta, ma +; CHECK-NEXT:    vse64.v v8, (a0) +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vse.nxv8i64( +    <vscale x 8 x i64> %0, +    ptr %1, +    iXLen %2) + +  ret void +} + +declare void @llvm.riscv.vse.mask.nxv8i64( +  <vscale x 8 x i64>, +  ptr, +  <vscale x 8 x i1>, +  iXLen); + +define void @intrinsic_vse_mask_v_nxv8i64_nxv8i64(<vscale x 8 x i64> %0, ptr %1, <vscale x 8 x i1> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vse_mask_v_nxv8i64_nxv8i64: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e64, m8, ta, ma +; CHECK-NEXT:    vse64.v v8, (a0), v0.t +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vse.mask.nxv8i64( +    <vscale x 8 x i64> %0, +    ptr %1, +    <vscale x 8 x i1> %2, +    iXLen %3) + +  ret void +} + +declare void @llvm.riscv.vse.nxv1f64( +  <vscale x 1 x double>, +  ptr, +  iXLen); + +define void @intrinsic_vse_v_nxv1f64_nxv1f64(<vscale x 1 x double> %0, ptr %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vse_v_nxv1f64_nxv1f64: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e64, m1, ta, ma +; CHECK-NEXT:    vse64.v v8, (a0) +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vse.nxv1f64( +    <vscale x 1 x double> %0, +    ptr %1, +    iXLen %2) + +  ret void +} + +declare void @llvm.riscv.vse.mask.nxv1f64( +  <vscale x 1 x double>, +  ptr, +  <vscale x 1 x i1>, +  iXLen); + +define void @intrinsic_vse_mask_v_nxv1f64_nxv1f64(<vscale x 1 x double> %0, ptr %1, <vscale x 1 x i1> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vse_mask_v_nxv1f64_nxv1f64: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e64, m1, ta, ma +; CHECK-NEXT:    vse64.v v8, (a0), v0.t +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vse.mask.nxv1f64( +    <vscale x 1 x double> %0, +    ptr %1, +    <vscale x 1 x i1> %2, +    iXLen %3) + +  ret void +} + +declare void @llvm.riscv.vse.nxv2f64( +  <vscale x 2 x double>, +  ptr, +  iXLen); + +define void @intrinsic_vse_v_nxv2f64_nxv2f64(<vscale x 2 x double> %0, ptr %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vse_v_nxv2f64_nxv2f64: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e64, m2, ta, ma +; CHECK-NEXT:    vse64.v v8, (a0) +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vse.nxv2f64( +    <vscale x 2 x double> %0, +    ptr %1, +    iXLen %2) + +  ret void +} + +declare void @llvm.riscv.vse.mask.nxv2f64( +  <vscale x 2 x double>, +  ptr, +  <vscale x 2 x i1>, +  iXLen); + +define void @intrinsic_vse_mask_v_nxv2f64_nxv2f64(<vscale x 2 x double> %0, ptr %1, <vscale x 2 x i1> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vse_mask_v_nxv2f64_nxv2f64: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e64, m2, ta, ma +; CHECK-NEXT:    vse64.v v8, (a0), v0.t +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vse.mask.nxv2f64( +    <vscale x 2 x double> %0, +    ptr %1, +    <vscale x 2 x i1> %2, +    iXLen %3) + +  ret void +} + +declare void @llvm.riscv.vse.nxv4f64( +  <vscale x 4 x double>, +  ptr, +  iXLen); + +define void @intrinsic_vse_v_nxv4f64_nxv4f64(<vscale x 4 x double> %0, ptr %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vse_v_nxv4f64_nxv4f64: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e64, m4, ta, ma +; CHECK-NEXT:    vse64.v v8, (a0) +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vse.nxv4f64( +    <vscale x 4 x double> %0, +    ptr %1, +    iXLen %2) + +  ret void +} + +declare void @llvm.riscv.vse.mask.nxv4f64( +  <vscale x 4 x double>, +  ptr, +  <vscale x 4 x i1>, +  iXLen); + +define void @intrinsic_vse_mask_v_nxv4f64_nxv4f64(<vscale x 4 x double> %0, ptr %1, <vscale x 4 x i1> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vse_mask_v_nxv4f64_nxv4f64: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e64, m4, ta, ma +; CHECK-NEXT:    vse64.v v8, (a0), v0.t +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vse.mask.nxv4f64( +    <vscale x 4 x double> %0, +    ptr %1, +    <vscale x 4 x i1> %2, +    iXLen %3) + +  ret void +} + +declare void @llvm.riscv.vse.nxv8f64( +  <vscale x 8 x double>, +  ptr, +  iXLen); + +define void @intrinsic_vse_v_nxv8f64_nxv8f64(<vscale x 8 x double> %0, ptr %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vse_v_nxv8f64_nxv8f64: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e64, m8, ta, ma +; CHECK-NEXT:    vse64.v v8, (a0) +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vse.nxv8f64( +    <vscale x 8 x double> %0, +    ptr %1, +    iXLen %2) + +  ret void +} + +declare void @llvm.riscv.vse.mask.nxv8f64( +  <vscale x 8 x double>, +  ptr, +  <vscale x 8 x i1>, +  iXLen); + +define void @intrinsic_vse_mask_v_nxv8f64_nxv8f64(<vscale x 8 x double> %0, ptr %1, <vscale x 8 x i1> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vse_mask_v_nxv8f64_nxv8f64: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e64, m8, ta, ma +; CHECK-NEXT:    vse64.v v8, (a0), v0.t +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vse.mask.nxv8f64( +    <vscale x 8 x double> %0, +    ptr %1, +    <vscale x 8 x i1> %2, +    iXLen %3) + +  ret void +} + +declare void @llvm.riscv.vse.nxv1i32( +  <vscale x 1 x i32>, +  ptr, +  iXLen); + +define void @intrinsic_vse_v_nxv1i32_nxv1i32(<vscale x 1 x i32> %0, ptr %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vse_v_nxv1i32_nxv1i32: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e32, mf2, ta, ma +; CHECK-NEXT:    vse32.v v8, (a0) +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vse.nxv1i32( +    <vscale x 1 x i32> %0, +    ptr %1, +    iXLen %2) + +  ret void +} + +declare void @llvm.riscv.vse.mask.nxv1i32( +  <vscale x 1 x i32>, +  ptr, +  <vscale x 1 x i1>, +  iXLen); + +define void @intrinsic_vse_mask_v_nxv1i32_nxv1i32(<vscale x 1 x i32> %0, ptr %1, <vscale x 1 x i1> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vse_mask_v_nxv1i32_nxv1i32: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e32, mf2, ta, ma +; CHECK-NEXT:    vse32.v v8, (a0), v0.t +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vse.mask.nxv1i32( +    <vscale x 1 x i32> %0, +    ptr %1, +    <vscale x 1 x i1> %2, +    iXLen %3) + +  ret void +} + +declare void @llvm.riscv.vse.nxv2i32( +  <vscale x 2 x i32>, +  ptr, +  iXLen); + +define void @intrinsic_vse_v_nxv2i32_nxv2i32(<vscale x 2 x i32> %0, ptr %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vse_v_nxv2i32_nxv2i32: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e32, m1, ta, ma +; CHECK-NEXT:    vse32.v v8, (a0) +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vse.nxv2i32( +    <vscale x 2 x i32> %0, +    ptr %1, +    iXLen %2) + +  ret void +} + +declare void @llvm.riscv.vse.mask.nxv2i32( +  <vscale x 2 x i32>, +  ptr, +  <vscale x 2 x i1>, +  iXLen); + +define void @intrinsic_vse_mask_v_nxv2i32_nxv2i32(<vscale x 2 x i32> %0, ptr %1, <vscale x 2 x i1> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vse_mask_v_nxv2i32_nxv2i32: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e32, m1, ta, ma +; CHECK-NEXT:    vse32.v v8, (a0), v0.t +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vse.mask.nxv2i32( +    <vscale x 2 x i32> %0, +    ptr %1, +    <vscale x 2 x i1> %2, +    iXLen %3) + +  ret void +} + +declare void @llvm.riscv.vse.nxv4i32( +  <vscale x 4 x i32>, +  ptr, +  iXLen); + +define void @intrinsic_vse_v_nxv4i32_nxv4i32(<vscale x 4 x i32> %0, ptr %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vse_v_nxv4i32_nxv4i32: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e32, m2, ta, ma +; CHECK-NEXT:    vse32.v v8, (a0) +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vse.nxv4i32( +    <vscale x 4 x i32> %0, +    ptr %1, +    iXLen %2) + +  ret void +} + +declare void @llvm.riscv.vse.mask.nxv4i32( +  <vscale x 4 x i32>, +  ptr, +  <vscale x 4 x i1>, +  iXLen); + +define void @intrinsic_vse_mask_v_nxv4i32_nxv4i32(<vscale x 4 x i32> %0, ptr %1, <vscale x 4 x i1> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vse_mask_v_nxv4i32_nxv4i32: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e32, m2, ta, ma +; CHECK-NEXT:    vse32.v v8, (a0), v0.t +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vse.mask.nxv4i32( +    <vscale x 4 x i32> %0, +    ptr %1, +    <vscale x 4 x i1> %2, +    iXLen %3) + +  ret void +} + +declare void @llvm.riscv.vse.nxv8i32( +  <vscale x 8 x i32>, +  ptr, +  iXLen); + +define void @intrinsic_vse_v_nxv8i32_nxv8i32(<vscale x 8 x i32> %0, ptr %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vse_v_nxv8i32_nxv8i32: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e32, m4, ta, ma +; CHECK-NEXT:    vse32.v v8, (a0) +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vse.nxv8i32( +    <vscale x 8 x i32> %0, +    ptr %1, +    iXLen %2) + +  ret void +} + +declare void @llvm.riscv.vse.mask.nxv8i32( +  <vscale x 8 x i32>, +  ptr, +  <vscale x 8 x i1>, +  iXLen); + +define void @intrinsic_vse_mask_v_nxv8i32_nxv8i32(<vscale x 8 x i32> %0, ptr %1, <vscale x 8 x i1> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vse_mask_v_nxv8i32_nxv8i32: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e32, m4, ta, ma +; CHECK-NEXT:    vse32.v v8, (a0), v0.t +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vse.mask.nxv8i32( +    <vscale x 8 x i32> %0, +    ptr %1, +    <vscale x 8 x i1> %2, +    iXLen %3) + +  ret void +} + +declare void @llvm.riscv.vse.nxv16i32( +  <vscale x 16 x i32>, +  ptr, +  iXLen); + +define void @intrinsic_vse_v_nxv16i32_nxv16i32(<vscale x 16 x i32> %0, ptr %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vse_v_nxv16i32_nxv16i32: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e32, m8, ta, ma +; CHECK-NEXT:    vse32.v v8, (a0) +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vse.nxv16i32( +    <vscale x 16 x i32> %0, +    ptr %1, +    iXLen %2) + +  ret void +} + +declare void @llvm.riscv.vse.mask.nxv16i32( +  <vscale x 16 x i32>, +  ptr, +  <vscale x 16 x i1>, +  iXLen); + +define void @intrinsic_vse_mask_v_nxv16i32_nxv16i32(<vscale x 16 x i32> %0, ptr %1, <vscale x 16 x i1> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vse_mask_v_nxv16i32_nxv16i32: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e32, m8, ta, ma +; CHECK-NEXT:    vse32.v v8, (a0), v0.t +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vse.mask.nxv16i32( +    <vscale x 16 x i32> %0, +    ptr %1, +    <vscale x 16 x i1> %2, +    iXLen %3) + +  ret void +} + +declare void @llvm.riscv.vse.nxv1f32( +  <vscale x 1 x float>, +  ptr, +  iXLen); + +define void @intrinsic_vse_v_nxv1f32_nxv1f32(<vscale x 1 x float> %0, ptr %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vse_v_nxv1f32_nxv1f32: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e32, mf2, ta, ma +; CHECK-NEXT:    vse32.v v8, (a0) +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vse.nxv1f32( +    <vscale x 1 x float> %0, +    ptr %1, +    iXLen %2) + +  ret void +} + +declare void @llvm.riscv.vse.mask.nxv1f32( +  <vscale x 1 x float>, +  ptr, +  <vscale x 1 x i1>, +  iXLen); + +define void @intrinsic_vse_mask_v_nxv1f32_nxv1f32(<vscale x 1 x float> %0, ptr %1, <vscale x 1 x i1> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vse_mask_v_nxv1f32_nxv1f32: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e32, mf2, ta, ma +; CHECK-NEXT:    vse32.v v8, (a0), v0.t +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vse.mask.nxv1f32( +    <vscale x 1 x float> %0, +    ptr %1, +    <vscale x 1 x i1> %2, +    iXLen %3) + +  ret void +} + +declare void @llvm.riscv.vse.nxv2f32( +  <vscale x 2 x float>, +  ptr, +  iXLen); + +define void @intrinsic_vse_v_nxv2f32_nxv2f32(<vscale x 2 x float> %0, ptr %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vse_v_nxv2f32_nxv2f32: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e32, m1, ta, ma +; CHECK-NEXT:    vse32.v v8, (a0) +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vse.nxv2f32( +    <vscale x 2 x float> %0, +    ptr %1, +    iXLen %2) + +  ret void +} + +declare void @llvm.riscv.vse.mask.nxv2f32( +  <vscale x 2 x float>, +  ptr, +  <vscale x 2 x i1>, +  iXLen); + +define void @intrinsic_vse_mask_v_nxv2f32_nxv2f32(<vscale x 2 x float> %0, ptr %1, <vscale x 2 x i1> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vse_mask_v_nxv2f32_nxv2f32: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e32, m1, ta, ma +; CHECK-NEXT:    vse32.v v8, (a0), v0.t +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vse.mask.nxv2f32( +    <vscale x 2 x float> %0, +    ptr %1, +    <vscale x 2 x i1> %2, +    iXLen %3) + +  ret void +} + +declare void @llvm.riscv.vse.nxv4f32( +  <vscale x 4 x float>, +  ptr, +  iXLen); + +define void @intrinsic_vse_v_nxv4f32_nxv4f32(<vscale x 4 x float> %0, ptr %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vse_v_nxv4f32_nxv4f32: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e32, m2, ta, ma +; CHECK-NEXT:    vse32.v v8, (a0) +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vse.nxv4f32( +    <vscale x 4 x float> %0, +    ptr %1, +    iXLen %2) + +  ret void +} + +declare void @llvm.riscv.vse.mask.nxv4f32( +  <vscale x 4 x float>, +  ptr, +  <vscale x 4 x i1>, +  iXLen); + +define void @intrinsic_vse_mask_v_nxv4f32_nxv4f32(<vscale x 4 x float> %0, ptr %1, <vscale x 4 x i1> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vse_mask_v_nxv4f32_nxv4f32: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e32, m2, ta, ma +; CHECK-NEXT:    vse32.v v8, (a0), v0.t +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vse.mask.nxv4f32( +    <vscale x 4 x float> %0, +    ptr %1, +    <vscale x 4 x i1> %2, +    iXLen %3) + +  ret void +} + +declare void @llvm.riscv.vse.nxv8f32( +  <vscale x 8 x float>, +  ptr, +  iXLen); + +define void @intrinsic_vse_v_nxv8f32_nxv8f32(<vscale x 8 x float> %0, ptr %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vse_v_nxv8f32_nxv8f32: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e32, m4, ta, ma +; CHECK-NEXT:    vse32.v v8, (a0) +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vse.nxv8f32( +    <vscale x 8 x float> %0, +    ptr %1, +    iXLen %2) + +  ret void +} + +declare void @llvm.riscv.vse.mask.nxv8f32( +  <vscale x 8 x float>, +  ptr, +  <vscale x 8 x i1>, +  iXLen); + +define void @intrinsic_vse_mask_v_nxv8f32_nxv8f32(<vscale x 8 x float> %0, ptr %1, <vscale x 8 x i1> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vse_mask_v_nxv8f32_nxv8f32: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e32, m4, ta, ma +; CHECK-NEXT:    vse32.v v8, (a0), v0.t +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vse.mask.nxv8f32( +    <vscale x 8 x float> %0, +    ptr %1, +    <vscale x 8 x i1> %2, +    iXLen %3) + +  ret void +} + +declare void @llvm.riscv.vse.nxv16f32( +  <vscale x 16 x float>, +  ptr, +  iXLen); + +define void @intrinsic_vse_v_nxv16f32_nxv16f32(<vscale x 16 x float> %0, ptr %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vse_v_nxv16f32_nxv16f32: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e32, m8, ta, ma +; CHECK-NEXT:    vse32.v v8, (a0) +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vse.nxv16f32( +    <vscale x 16 x float> %0, +    ptr %1, +    iXLen %2) + +  ret void +} + +declare void @llvm.riscv.vse.mask.nxv16f32( +  <vscale x 16 x float>, +  ptr, +  <vscale x 16 x i1>, +  iXLen); + +define void @intrinsic_vse_mask_v_nxv16f32_nxv16f32(<vscale x 16 x float> %0, ptr %1, <vscale x 16 x i1> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vse_mask_v_nxv16f32_nxv16f32: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e32, m8, ta, ma +; CHECK-NEXT:    vse32.v v8, (a0), v0.t +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vse.mask.nxv16f32( +    <vscale x 16 x float> %0, +    ptr %1, +    <vscale x 16 x i1> %2, +    iXLen %3) + +  ret void +} + +declare void @llvm.riscv.vse.nxv1i16( +  <vscale x 1 x i16>, +  ptr, +  iXLen); + +define void @intrinsic_vse_v_nxv1i16_nxv1i16(<vscale x 1 x i16> %0, ptr %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vse_v_nxv1i16_nxv1i16: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e16, mf4, ta, ma +; CHECK-NEXT:    vse16.v v8, (a0) +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vse.nxv1i16( +    <vscale x 1 x i16> %0, +    ptr %1, +    iXLen %2) + +  ret void +} + +declare void @llvm.riscv.vse.mask.nxv1i16( +  <vscale x 1 x i16>, +  ptr, +  <vscale x 1 x i1>, +  iXLen); + +define void @intrinsic_vse_mask_v_nxv1i16_nxv1i16(<vscale x 1 x i16> %0, ptr %1, <vscale x 1 x i1> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vse_mask_v_nxv1i16_nxv1i16: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e16, mf4, ta, ma +; CHECK-NEXT:    vse16.v v8, (a0), v0.t +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vse.mask.nxv1i16( +    <vscale x 1 x i16> %0, +    ptr %1, +    <vscale x 1 x i1> %2, +    iXLen %3) + +  ret void +} + +declare void @llvm.riscv.vse.nxv2i16( +  <vscale x 2 x i16>, +  ptr, +  iXLen); + +define void @intrinsic_vse_v_nxv2i16_nxv2i16(<vscale x 2 x i16> %0, ptr %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vse_v_nxv2i16_nxv2i16: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e16, mf2, ta, ma +; CHECK-NEXT:    vse16.v v8, (a0) +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vse.nxv2i16( +    <vscale x 2 x i16> %0, +    ptr %1, +    iXLen %2) + +  ret void +} + +declare void @llvm.riscv.vse.mask.nxv2i16( +  <vscale x 2 x i16>, +  ptr, +  <vscale x 2 x i1>, +  iXLen); + +define void @intrinsic_vse_mask_v_nxv2i16_nxv2i16(<vscale x 2 x i16> %0, ptr %1, <vscale x 2 x i1> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vse_mask_v_nxv2i16_nxv2i16: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e16, mf2, ta, ma +; CHECK-NEXT:    vse16.v v8, (a0), v0.t +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vse.mask.nxv2i16( +    <vscale x 2 x i16> %0, +    ptr %1, +    <vscale x 2 x i1> %2, +    iXLen %3) + +  ret void +} + +declare void @llvm.riscv.vse.nxv4i16( +  <vscale x 4 x i16>, +  ptr, +  iXLen); + +define void @intrinsic_vse_v_nxv4i16_nxv4i16(<vscale x 4 x i16> %0, ptr %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vse_v_nxv4i16_nxv4i16: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e16, m1, ta, ma +; CHECK-NEXT:    vse16.v v8, (a0) +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vse.nxv4i16( +    <vscale x 4 x i16> %0, +    ptr %1, +    iXLen %2) + +  ret void +} + +declare void @llvm.riscv.vse.mask.nxv4i16( +  <vscale x 4 x i16>, +  ptr, +  <vscale x 4 x i1>, +  iXLen); + +define void @intrinsic_vse_mask_v_nxv4i16_nxv4i16(<vscale x 4 x i16> %0, ptr %1, <vscale x 4 x i1> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vse_mask_v_nxv4i16_nxv4i16: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e16, m1, ta, ma +; CHECK-NEXT:    vse16.v v8, (a0), v0.t +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vse.mask.nxv4i16( +    <vscale x 4 x i16> %0, +    ptr %1, +    <vscale x 4 x i1> %2, +    iXLen %3) + +  ret void +} + +declare void @llvm.riscv.vse.nxv8i16( +  <vscale x 8 x i16>, +  ptr, +  iXLen); + +define void @intrinsic_vse_v_nxv8i16_nxv8i16(<vscale x 8 x i16> %0, ptr %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vse_v_nxv8i16_nxv8i16: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e16, m2, ta, ma +; CHECK-NEXT:    vse16.v v8, (a0) +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vse.nxv8i16( +    <vscale x 8 x i16> %0, +    ptr %1, +    iXLen %2) + +  ret void +} + +declare void @llvm.riscv.vse.mask.nxv8i16( +  <vscale x 8 x i16>, +  ptr, +  <vscale x 8 x i1>, +  iXLen); + +define void @intrinsic_vse_mask_v_nxv8i16_nxv8i16(<vscale x 8 x i16> %0, ptr %1, <vscale x 8 x i1> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vse_mask_v_nxv8i16_nxv8i16: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e16, m2, ta, ma +; CHECK-NEXT:    vse16.v v8, (a0), v0.t +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vse.mask.nxv8i16( +    <vscale x 8 x i16> %0, +    ptr %1, +    <vscale x 8 x i1> %2, +    iXLen %3) + +  ret void +} + +declare void @llvm.riscv.vse.nxv16i16( +  <vscale x 16 x i16>, +  ptr, +  iXLen); + +define void @intrinsic_vse_v_nxv16i16_nxv16i16(<vscale x 16 x i16> %0, ptr %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vse_v_nxv16i16_nxv16i16: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e16, m4, ta, ma +; CHECK-NEXT:    vse16.v v8, (a0) +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vse.nxv16i16( +    <vscale x 16 x i16> %0, +    ptr %1, +    iXLen %2) + +  ret void +} + +declare void @llvm.riscv.vse.mask.nxv16i16( +  <vscale x 16 x i16>, +  ptr, +  <vscale x 16 x i1>, +  iXLen); + +define void @intrinsic_vse_mask_v_nxv16i16_nxv16i16(<vscale x 16 x i16> %0, ptr %1, <vscale x 16 x i1> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vse_mask_v_nxv16i16_nxv16i16: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e16, m4, ta, ma +; CHECK-NEXT:    vse16.v v8, (a0), v0.t +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vse.mask.nxv16i16( +    <vscale x 16 x i16> %0, +    ptr %1, +    <vscale x 16 x i1> %2, +    iXLen %3) + +  ret void +} + +declare void @llvm.riscv.vse.nxv32i16( +  <vscale x 32 x i16>, +  ptr, +  iXLen); + +define void @intrinsic_vse_v_nxv32i16_nxv32i16(<vscale x 32 x i16> %0, ptr %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vse_v_nxv32i16_nxv32i16: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e16, m8, ta, ma +; CHECK-NEXT:    vse16.v v8, (a0) +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vse.nxv32i16( +    <vscale x 32 x i16> %0, +    ptr %1, +    iXLen %2) + +  ret void +} + +declare void @llvm.riscv.vse.mask.nxv32i16( +  <vscale x 32 x i16>, +  ptr, +  <vscale x 32 x i1>, +  iXLen); + +define void @intrinsic_vse_mask_v_nxv32i16_nxv32i16(<vscale x 32 x i16> %0, ptr %1, <vscale x 32 x i1> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vse_mask_v_nxv32i16_nxv32i16: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e16, m8, ta, ma +; CHECK-NEXT:    vse16.v v8, (a0), v0.t +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vse.mask.nxv32i16( +    <vscale x 32 x i16> %0, +    ptr %1, +    <vscale x 32 x i1> %2, +    iXLen %3) + +  ret void +} + +declare void @llvm.riscv.vse.nxv1f16( +  <vscale x 1 x half>, +  ptr, +  iXLen); + +define void @intrinsic_vse_v_nxv1f16_nxv1f16(<vscale x 1 x half> %0, ptr %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vse_v_nxv1f16_nxv1f16: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e16, mf4, ta, ma +; CHECK-NEXT:    vse16.v v8, (a0) +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vse.nxv1f16( +    <vscale x 1 x half> %0, +    ptr %1, +    iXLen %2) + +  ret void +} + +declare void @llvm.riscv.vse.mask.nxv1f16( +  <vscale x 1 x half>, +  ptr, +  <vscale x 1 x i1>, +  iXLen); + +define void @intrinsic_vse_mask_v_nxv1f16_nxv1f16(<vscale x 1 x half> %0, ptr %1, <vscale x 1 x i1> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vse_mask_v_nxv1f16_nxv1f16: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e16, mf4, ta, ma +; CHECK-NEXT:    vse16.v v8, (a0), v0.t +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vse.mask.nxv1f16( +    <vscale x 1 x half> %0, +    ptr %1, +    <vscale x 1 x i1> %2, +    iXLen %3) + +  ret void +} + +declare void @llvm.riscv.vse.nxv2f16( +  <vscale x 2 x half>, +  ptr, +  iXLen); + +define void @intrinsic_vse_v_nxv2f16_nxv2f16(<vscale x 2 x half> %0, ptr %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vse_v_nxv2f16_nxv2f16: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e16, mf2, ta, ma +; CHECK-NEXT:    vse16.v v8, (a0) +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vse.nxv2f16( +    <vscale x 2 x half> %0, +    ptr %1, +    iXLen %2) + +  ret void +} + +declare void @llvm.riscv.vse.mask.nxv2f16( +  <vscale x 2 x half>, +  ptr, +  <vscale x 2 x i1>, +  iXLen); + +define void @intrinsic_vse_mask_v_nxv2f16_nxv2f16(<vscale x 2 x half> %0, ptr %1, <vscale x 2 x i1> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vse_mask_v_nxv2f16_nxv2f16: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e16, mf2, ta, ma +; CHECK-NEXT:    vse16.v v8, (a0), v0.t +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vse.mask.nxv2f16( +    <vscale x 2 x half> %0, +    ptr %1, +    <vscale x 2 x i1> %2, +    iXLen %3) + +  ret void +} + +declare void @llvm.riscv.vse.nxv4f16( +  <vscale x 4 x half>, +  ptr, +  iXLen); + +define void @intrinsic_vse_v_nxv4f16_nxv4f16(<vscale x 4 x half> %0, ptr %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vse_v_nxv4f16_nxv4f16: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e16, m1, ta, ma +; CHECK-NEXT:    vse16.v v8, (a0) +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vse.nxv4f16( +    <vscale x 4 x half> %0, +    ptr %1, +    iXLen %2) + +  ret void +} + +declare void @llvm.riscv.vse.mask.nxv4f16( +  <vscale x 4 x half>, +  ptr, +  <vscale x 4 x i1>, +  iXLen); + +define void @intrinsic_vse_mask_v_nxv4f16_nxv4f16(<vscale x 4 x half> %0, ptr %1, <vscale x 4 x i1> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vse_mask_v_nxv4f16_nxv4f16: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e16, m1, ta, ma +; CHECK-NEXT:    vse16.v v8, (a0), v0.t +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vse.mask.nxv4f16( +    <vscale x 4 x half> %0, +    ptr %1, +    <vscale x 4 x i1> %2, +    iXLen %3) + +  ret void +} + +declare void @llvm.riscv.vse.nxv8f16( +  <vscale x 8 x half>, +  ptr, +  iXLen); + +define void @intrinsic_vse_v_nxv8f16_nxv8f16(<vscale x 8 x half> %0, ptr %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vse_v_nxv8f16_nxv8f16: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e16, m2, ta, ma +; CHECK-NEXT:    vse16.v v8, (a0) +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vse.nxv8f16( +    <vscale x 8 x half> %0, +    ptr %1, +    iXLen %2) + +  ret void +} + +declare void @llvm.riscv.vse.mask.nxv8f16( +  <vscale x 8 x half>, +  ptr, +  <vscale x 8 x i1>, +  iXLen); + +define void @intrinsic_vse_mask_v_nxv8f16_nxv8f16(<vscale x 8 x half> %0, ptr %1, <vscale x 8 x i1> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vse_mask_v_nxv8f16_nxv8f16: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e16, m2, ta, ma +; CHECK-NEXT:    vse16.v v8, (a0), v0.t +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vse.mask.nxv8f16( +    <vscale x 8 x half> %0, +    ptr %1, +    <vscale x 8 x i1> %2, +    iXLen %3) + +  ret void +} + +declare void @llvm.riscv.vse.nxv16f16( +  <vscale x 16 x half>, +  ptr, +  iXLen); + +define void @intrinsic_vse_v_nxv16f16_nxv16f16(<vscale x 16 x half> %0, ptr %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vse_v_nxv16f16_nxv16f16: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e16, m4, ta, ma +; CHECK-NEXT:    vse16.v v8, (a0) +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vse.nxv16f16( +    <vscale x 16 x half> %0, +    ptr %1, +    iXLen %2) + +  ret void +} + +declare void @llvm.riscv.vse.mask.nxv16f16( +  <vscale x 16 x half>, +  ptr, +  <vscale x 16 x i1>, +  iXLen); + +define void @intrinsic_vse_mask_v_nxv16f16_nxv16f16(<vscale x 16 x half> %0, ptr %1, <vscale x 16 x i1> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vse_mask_v_nxv16f16_nxv16f16: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e16, m4, ta, ma +; CHECK-NEXT:    vse16.v v8, (a0), v0.t +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vse.mask.nxv16f16( +    <vscale x 16 x half> %0, +    ptr %1, +    <vscale x 16 x i1> %2, +    iXLen %3) + +  ret void +} + +declare void @llvm.riscv.vse.nxv32f16( +  <vscale x 32 x half>, +  ptr, +  iXLen); + +define void @intrinsic_vse_v_nxv32f16_nxv32f16(<vscale x 32 x half> %0, ptr %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vse_v_nxv32f16_nxv32f16: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e16, m8, ta, ma +; CHECK-NEXT:    vse16.v v8, (a0) +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vse.nxv32f16( +    <vscale x 32 x half> %0, +    ptr %1, +    iXLen %2) + +  ret void +} + +declare void @llvm.riscv.vse.mask.nxv32f16( +  <vscale x 32 x half>, +  ptr, +  <vscale x 32 x i1>, +  iXLen); + +define void @intrinsic_vse_mask_v_nxv32f16_nxv32f16(<vscale x 32 x half> %0, ptr %1, <vscale x 32 x i1> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vse_mask_v_nxv32f16_nxv32f16: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e16, m8, ta, ma +; CHECK-NEXT:    vse16.v v8, (a0), v0.t +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vse.mask.nxv32f16( +    <vscale x 32 x half> %0, +    ptr %1, +    <vscale x 32 x i1> %2, +    iXLen %3) + +  ret void +} + +declare void @llvm.riscv.vse.nxv1i8( +  <vscale x 1 x i8>, +  ptr, +  iXLen); + +define void @intrinsic_vse_v_nxv1i8_nxv1i8(<vscale x 1 x i8> %0, ptr %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vse_v_nxv1i8_nxv1i8: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e8, mf8, ta, ma +; CHECK-NEXT:    vse8.v v8, (a0) +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vse.nxv1i8( +    <vscale x 1 x i8> %0, +    ptr %1, +    iXLen %2) + +  ret void +} + +declare void @llvm.riscv.vse.mask.nxv1i8( +  <vscale x 1 x i8>, +  ptr, +  <vscale x 1 x i1>, +  iXLen); + +define void @intrinsic_vse_mask_v_nxv1i8_nxv1i8(<vscale x 1 x i8> %0, ptr %1, <vscale x 1 x i1> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vse_mask_v_nxv1i8_nxv1i8: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e8, mf8, ta, ma +; CHECK-NEXT:    vse8.v v8, (a0), v0.t +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vse.mask.nxv1i8( +    <vscale x 1 x i8> %0, +    ptr %1, +    <vscale x 1 x i1> %2, +    iXLen %3) + +  ret void +} + +declare void @llvm.riscv.vse.nxv2i8( +  <vscale x 2 x i8>, +  ptr, +  iXLen); + +define void @intrinsic_vse_v_nxv2i8_nxv2i8(<vscale x 2 x i8> %0, ptr %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vse_v_nxv2i8_nxv2i8: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e8, mf4, ta, ma +; CHECK-NEXT:    vse8.v v8, (a0) +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vse.nxv2i8( +    <vscale x 2 x i8> %0, +    ptr %1, +    iXLen %2) + +  ret void +} + +declare void @llvm.riscv.vse.mask.nxv2i8( +  <vscale x 2 x i8>, +  ptr, +  <vscale x 2 x i1>, +  iXLen); + +define void @intrinsic_vse_mask_v_nxv2i8_nxv2i8(<vscale x 2 x i8> %0, ptr %1, <vscale x 2 x i1> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vse_mask_v_nxv2i8_nxv2i8: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e8, mf4, ta, ma +; CHECK-NEXT:    vse8.v v8, (a0), v0.t +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vse.mask.nxv2i8( +    <vscale x 2 x i8> %0, +    ptr %1, +    <vscale x 2 x i1> %2, +    iXLen %3) + +  ret void +} + +declare void @llvm.riscv.vse.nxv4i8( +  <vscale x 4 x i8>, +  ptr, +  iXLen); + +define void @intrinsic_vse_v_nxv4i8_nxv4i8(<vscale x 4 x i8> %0, ptr %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vse_v_nxv4i8_nxv4i8: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e8, mf2, ta, ma +; CHECK-NEXT:    vse8.v v8, (a0) +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vse.nxv4i8( +    <vscale x 4 x i8> %0, +    ptr %1, +    iXLen %2) + +  ret void +} + +declare void @llvm.riscv.vse.mask.nxv4i8( +  <vscale x 4 x i8>, +  ptr, +  <vscale x 4 x i1>, +  iXLen); + +define void @intrinsic_vse_mask_v_nxv4i8_nxv4i8(<vscale x 4 x i8> %0, ptr %1, <vscale x 4 x i1> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vse_mask_v_nxv4i8_nxv4i8: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e8, mf2, ta, ma +; CHECK-NEXT:    vse8.v v8, (a0), v0.t +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vse.mask.nxv4i8( +    <vscale x 4 x i8> %0, +    ptr %1, +    <vscale x 4 x i1> %2, +    iXLen %3) + +  ret void +} + +declare void @llvm.riscv.vse.nxv8i8( +  <vscale x 8 x i8>, +  ptr, +  iXLen); + +define void @intrinsic_vse_v_nxv8i8_nxv8i8(<vscale x 8 x i8> %0, ptr %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vse_v_nxv8i8_nxv8i8: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e8, m1, ta, ma +; CHECK-NEXT:    vse8.v v8, (a0) +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vse.nxv8i8( +    <vscale x 8 x i8> %0, +    ptr %1, +    iXLen %2) + +  ret void +} + +declare void @llvm.riscv.vse.mask.nxv8i8( +  <vscale x 8 x i8>, +  ptr, +  <vscale x 8 x i1>, +  iXLen); + +define void @intrinsic_vse_mask_v_nxv8i8_nxv8i8(<vscale x 8 x i8> %0, ptr %1, <vscale x 8 x i1> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vse_mask_v_nxv8i8_nxv8i8: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e8, m1, ta, ma +; CHECK-NEXT:    vse8.v v8, (a0), v0.t +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vse.mask.nxv8i8( +    <vscale x 8 x i8> %0, +    ptr %1, +    <vscale x 8 x i1> %2, +    iXLen %3) + +  ret void +} + +declare void @llvm.riscv.vse.nxv16i8( +  <vscale x 16 x i8>, +  ptr, +  iXLen); + +define void @intrinsic_vse_v_nxv16i8_nxv16i8(<vscale x 16 x i8> %0, ptr %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vse_v_nxv16i8_nxv16i8: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e8, m2, ta, ma +; CHECK-NEXT:    vse8.v v8, (a0) +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vse.nxv16i8( +    <vscale x 16 x i8> %0, +    ptr %1, +    iXLen %2) + +  ret void +} + +declare void @llvm.riscv.vse.mask.nxv16i8( +  <vscale x 16 x i8>, +  ptr, +  <vscale x 16 x i1>, +  iXLen); + +define void @intrinsic_vse_mask_v_nxv16i8_nxv16i8(<vscale x 16 x i8> %0, ptr %1, <vscale x 16 x i1> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vse_mask_v_nxv16i8_nxv16i8: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e8, m2, ta, ma +; CHECK-NEXT:    vse8.v v8, (a0), v0.t +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vse.mask.nxv16i8( +    <vscale x 16 x i8> %0, +    ptr %1, +    <vscale x 16 x i1> %2, +    iXLen %3) + +  ret void +} + +declare void @llvm.riscv.vse.nxv32i8( +  <vscale x 32 x i8>, +  ptr, +  iXLen); + +define void @intrinsic_vse_v_nxv32i8_nxv32i8(<vscale x 32 x i8> %0, ptr %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vse_v_nxv32i8_nxv32i8: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e8, m4, ta, ma +; CHECK-NEXT:    vse8.v v8, (a0) +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vse.nxv32i8( +    <vscale x 32 x i8> %0, +    ptr %1, +    iXLen %2) + +  ret void +} + +declare void @llvm.riscv.vse.mask.nxv32i8( +  <vscale x 32 x i8>, +  ptr, +  <vscale x 32 x i1>, +  iXLen); + +define void @intrinsic_vse_mask_v_nxv32i8_nxv32i8(<vscale x 32 x i8> %0, ptr %1, <vscale x 32 x i1> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vse_mask_v_nxv32i8_nxv32i8: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e8, m4, ta, ma +; CHECK-NEXT:    vse8.v v8, (a0), v0.t +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vse.mask.nxv32i8( +    <vscale x 32 x i8> %0, +    ptr %1, +    <vscale x 32 x i1> %2, +    iXLen %3) + +  ret void +} + +declare void @llvm.riscv.vse.nxv64i8( +  <vscale x 64 x i8>, +  ptr, +  iXLen); + +define void @intrinsic_vse_v_nxv64i8_nxv64i8(<vscale x 64 x i8> %0, ptr %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vse_v_nxv64i8_nxv64i8: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e8, m8, ta, ma +; CHECK-NEXT:    vse8.v v8, (a0) +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vse.nxv64i8( +    <vscale x 64 x i8> %0, +    ptr %1, +    iXLen %2) + +  ret void +} + +declare void @llvm.riscv.vse.mask.nxv64i8( +  <vscale x 64 x i8>, +  ptr, +  <vscale x 64 x i1>, +  iXLen); + +define void @intrinsic_vse_mask_v_nxv64i8_nxv64i8(<vscale x 64 x i8> %0, ptr %1, <vscale x 64 x i1> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vse_mask_v_nxv64i8_nxv64i8: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e8, m8, ta, ma +; CHECK-NEXT:    vse8.v v8, (a0), v0.t +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vse.mask.nxv64i8( +    <vscale x 64 x i8> %0, +    ptr %1, +    <vscale x 64 x i1> %2, +    iXLen %3) + +  ret void +} diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/rvv/vsm.ll b/llvm/test/CodeGen/RISCV/GlobalISel/rvv/vsm.ll new file mode 100644 index 0000000..5237536 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/GlobalISel/rvv/vsm.ll @@ -0,0 +1,139 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v \ +; RUN:   -global-isel -verify-machineinstrs | FileCheck %s +; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v \ +; RUN:   -global-isel -verify-machineinstrs | FileCheck %s + +declare void @llvm.riscv.vsm.nxv1i1(<vscale x 1 x i1>, ptr, iXLen); + +define void @intrinsic_vsm_v_nxv1i1(<vscale x 1 x i1> %0, ptr %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vsm_v_nxv1i1: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e8, mf8, ta, ma +; CHECK-NEXT:    vsm.v v0, (a0) +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsm.nxv1i1(<vscale x 1 x i1> %0, ptr %1, iXLen %2) +  ret void +} + +declare void @llvm.riscv.vsm.nxv2i1(<vscale x 2 x i1>, ptr, iXLen); + +define void @intrinsic_vsm_v_nxv2i1(<vscale x 2 x i1> %0, ptr %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vsm_v_nxv2i1: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e8, mf4, ta, ma +; CHECK-NEXT:    vsm.v v0, (a0) +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsm.nxv2i1(<vscale x 2 x i1> %0, ptr %1, iXLen %2) +  ret void +} + +declare void @llvm.riscv.vsm.nxv4i1(<vscale x 4 x i1>, ptr, iXLen); + +define void @intrinsic_vsm_v_nxv4i1(<vscale x 4 x i1> %0, ptr %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vsm_v_nxv4i1: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e8, mf2, ta, ma +; CHECK-NEXT:    vsm.v v0, (a0) +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsm.nxv4i1(<vscale x 4 x i1> %0, ptr %1, iXLen %2) +  ret void +} + +declare void @llvm.riscv.vsm.nxv8i1(<vscale x 8 x i1>, ptr, iXLen); + +define void @intrinsic_vsm_v_nxv8i1(<vscale x 8 x i1> %0, ptr %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vsm_v_nxv8i1: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e8, m1, ta, ma +; CHECK-NEXT:    vsm.v v0, (a0) +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsm.nxv8i1(<vscale x 8 x i1> %0, ptr %1, iXLen %2) +  ret void +} + +declare void @llvm.riscv.vsm.nxv16i1(<vscale x 16 x i1>, ptr, iXLen); + +define void @intrinsic_vsm_v_nxv16i1(<vscale x 16 x i1> %0, ptr %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vsm_v_nxv16i1: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e8, m2, ta, ma +; CHECK-NEXT:    vsm.v v0, (a0) +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsm.nxv16i1(<vscale x 16 x i1> %0, ptr %1, iXLen %2) +  ret void +} + +declare void @llvm.riscv.vsm.nxv32i1(<vscale x 32 x i1>, ptr, iXLen); + +define void @intrinsic_vsm_v_nxv32i1(<vscale x 32 x i1> %0, ptr %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vsm_v_nxv32i1: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e8, m4, ta, ma +; CHECK-NEXT:    vsm.v v0, (a0) +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsm.nxv32i1(<vscale x 32 x i1> %0, ptr %1, iXLen %2) +  ret void +} + +declare void @llvm.riscv.vsm.nxv64i1(<vscale x 64 x i1>, ptr, iXLen); + +define void @intrinsic_vsm_v_nxv64i1(<vscale x 64 x i1> %0, ptr %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vsm_v_nxv64i1: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e8, m8, ta, ma +; CHECK-NEXT:    vsm.v v0, (a0) +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsm.nxv64i1(<vscale x 64 x i1> %0, ptr %1, iXLen %2) +  ret void +} + +declare <vscale x 1 x i1> @llvm.riscv.vmseq.nxv1i16( +  <vscale x 1 x i16>, +  <vscale x 1 x i16>, +  iXLen); + +; Make sure we can use the vsetvli from the producing instruction. +define void @test_vsetvli_i16(<vscale x 1 x i16> %0, <vscale x 1 x i16> %1, ptr %2, iXLen %3) nounwind { +; CHECK-LABEL: test_vsetvli_i16: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e16, mf4, ta, ma +; CHECK-NEXT:    vmseq.vv v8, v8, v9 +; CHECK-NEXT:    vsm.v v8, (a0) +; CHECK-NEXT:    ret +entry: +  %a = call <vscale x 1 x i1> @llvm.riscv.vmseq.nxv1i16( +    <vscale x 1 x i16> %0, +    <vscale x 1 x i16> %1, +    iXLen %3) +  call void @llvm.riscv.vsm.nxv1i1(<vscale x 1 x i1> %a, ptr %2, iXLen %3) +  ret void +} + +declare <vscale x 1 x i1> @llvm.riscv.vmseq.nxv1i32( +  <vscale x 1 x i32>, +  <vscale x 1 x i32>, +  iXLen); + +define void @test_vsetvli_i32(<vscale x 1 x i32> %0, <vscale x 1 x i32> %1, ptr %2, iXLen %3) nounwind { +; CHECK-LABEL: test_vsetvli_i32: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a1, e32, mf2, ta, ma +; CHECK-NEXT:    vmseq.vv v8, v8, v9 +; CHECK-NEXT:    vsm.v v8, (a0) +; CHECK-NEXT:    ret +entry: +  %a = call <vscale x 1 x i1> @llvm.riscv.vmseq.nxv1i32( +    <vscale x 1 x i32> %0, +    <vscale x 1 x i32> %1, +    iXLen %3) +  call void @llvm.riscv.vsm.nxv1i1(<vscale x 1 x i1> %a, ptr %2, iXLen %3) +  ret void +} diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/rvv/vsse.ll b/llvm/test/CodeGen/RISCV/GlobalISel/rvv/vsse.ll new file mode 100644 index 0000000..b7609ff --- /dev/null +++ b/llvm/test/CodeGen/RISCV/GlobalISel/rvv/vsse.ll @@ -0,0 +1,1724 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v,+zvfhmin,+zvfbfmin \ +; RUN:   -global-isel -verify-machineinstrs -target-abi=ilp32d | FileCheck %s +; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v,+zvfhmin,+zvfbfmin \ +; RUN:   -global-isel -verify-machineinstrs -target-abi=lp64d | FileCheck %s + +declare void @llvm.riscv.vsse.nxv1i64( +  <vscale x 1 x i64>, +  ptr, +  iXLen, +  iXLen); + +define void @intrinsic_vsse_v_nxv1i64_nxv1i64(<vscale x 1 x i64> %0, ptr %1, iXLen %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vsse_v_nxv1i64_nxv1i64: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a2, e64, m1, ta, ma +; CHECK-NEXT:    vsse64.v v8, (a0), a1 +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsse.nxv1i64( +    <vscale x 1 x i64> %0, +    ptr %1, +    iXLen %2, +    iXLen %3) + +  ret void +} + +declare void @llvm.riscv.vsse.mask.nxv1i64( +  <vscale x 1 x i64>, +  ptr, +  iXLen, +  <vscale x 1 x i1>, +  iXLen); + +define void @intrinsic_vsse_mask_v_nxv1i64_nxv1i64(<vscale x 1 x i64> %0, ptr %1, iXLen %2, <vscale x 1 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vsse_mask_v_nxv1i64_nxv1i64: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a2, e64, m1, ta, ma +; CHECK-NEXT:    vsse64.v v8, (a0), a1, v0.t +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsse.mask.nxv1i64( +    <vscale x 1 x i64> %0, +    ptr %1, +    iXLen %2, +    <vscale x 1 x i1> %3, +    iXLen %4) + +  ret void +} + +define void @intrinsic_vsse_allonesmask_v_nxv1i64_nxv1i64(<vscale x 1 x i64> %0, ptr %1, iXLen %2, <vscale x 1 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vsse_allonesmask_v_nxv1i64_nxv1i64: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a2, e64, m1, ta, ma +; CHECK-NEXT:    vsse64.v v8, (a0), a1 +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsse.mask.nxv1i64( +    <vscale x 1 x i64> %0, +    ptr %1, +    iXLen %2, +    <vscale x 1 x i1> splat (i1 true), +    iXLen %4) + +  ret void +} + +declare void @llvm.riscv.vsse.nxv2i64( +  <vscale x 2 x i64>, +  ptr, +  iXLen, +  iXLen); + +define void @intrinsic_vsse_v_nxv2i64_nxv2i64(<vscale x 2 x i64> %0, ptr %1, iXLen %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vsse_v_nxv2i64_nxv2i64: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a2, e64, m2, ta, ma +; CHECK-NEXT:    vsse64.v v8, (a0), a1 +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsse.nxv2i64( +    <vscale x 2 x i64> %0, +    ptr %1, +    iXLen %2, +    iXLen %3) + +  ret void +} + +declare void @llvm.riscv.vsse.mask.nxv2i64( +  <vscale x 2 x i64>, +  ptr, +  iXLen, +  <vscale x 2 x i1>, +  iXLen); + +define void @intrinsic_vsse_mask_v_nxv2i64_nxv2i64(<vscale x 2 x i64> %0, ptr %1, iXLen %2, <vscale x 2 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vsse_mask_v_nxv2i64_nxv2i64: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a2, e64, m2, ta, ma +; CHECK-NEXT:    vsse64.v v8, (a0), a1, v0.t +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsse.mask.nxv2i64( +    <vscale x 2 x i64> %0, +    ptr %1, +    iXLen %2, +    <vscale x 2 x i1> %3, +    iXLen %4) + +  ret void +} + +declare void @llvm.riscv.vsse.nxv4i64( +  <vscale x 4 x i64>, +  ptr, +  iXLen, +  iXLen); + +define void @intrinsic_vsse_v_nxv4i64_nxv4i64(<vscale x 4 x i64> %0, ptr %1, iXLen %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vsse_v_nxv4i64_nxv4i64: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a2, e64, m4, ta, ma +; CHECK-NEXT:    vsse64.v v8, (a0), a1 +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsse.nxv4i64( +    <vscale x 4 x i64> %0, +    ptr %1, +    iXLen %2, +    iXLen %3) + +  ret void +} + +declare void @llvm.riscv.vsse.mask.nxv4i64( +  <vscale x 4 x i64>, +  ptr, +  iXLen, +  <vscale x 4 x i1>, +  iXLen); + +define void @intrinsic_vsse_mask_v_nxv4i64_nxv4i64(<vscale x 4 x i64> %0, ptr %1, iXLen %2, <vscale x 4 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vsse_mask_v_nxv4i64_nxv4i64: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a2, e64, m4, ta, ma +; CHECK-NEXT:    vsse64.v v8, (a0), a1, v0.t +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsse.mask.nxv4i64( +    <vscale x 4 x i64> %0, +    ptr %1, +    iXLen %2, +    <vscale x 4 x i1> %3, +    iXLen %4) + +  ret void +} + +declare void @llvm.riscv.vsse.nxv8i64( +  <vscale x 8 x i64>, +  ptr, +  iXLen, +  iXLen); + +define void @intrinsic_vsse_v_nxv8i64_nxv8i64(<vscale x 8 x i64> %0, ptr %1, iXLen %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vsse_v_nxv8i64_nxv8i64: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a2, e64, m8, ta, ma +; CHECK-NEXT:    vsse64.v v8, (a0), a1 +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsse.nxv8i64( +    <vscale x 8 x i64> %0, +    ptr %1, +    iXLen %2, +    iXLen %3) + +  ret void +} + +declare void @llvm.riscv.vsse.mask.nxv8i64( +  <vscale x 8 x i64>, +  ptr, +  iXLen, +  <vscale x 8 x i1>, +  iXLen); + +define void @intrinsic_vsse_mask_v_nxv8i64_nxv8i64(<vscale x 8 x i64> %0, ptr %1, iXLen %2, <vscale x 8 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vsse_mask_v_nxv8i64_nxv8i64: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a2, e64, m8, ta, ma +; CHECK-NEXT:    vsse64.v v8, (a0), a1, v0.t +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsse.mask.nxv8i64( +    <vscale x 8 x i64> %0, +    ptr %1, +    iXLen %2, +    <vscale x 8 x i1> %3, +    iXLen %4) + +  ret void +} + +declare void @llvm.riscv.vsse.nxv1f64( +  <vscale x 1 x double>, +  ptr, +  iXLen, +  iXLen); + +define void @intrinsic_vsse_v_nxv1f64_nxv1f64(<vscale x 1 x double> %0, ptr %1, iXLen %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vsse_v_nxv1f64_nxv1f64: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a2, e64, m1, ta, ma +; CHECK-NEXT:    vsse64.v v8, (a0), a1 +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsse.nxv1f64( +    <vscale x 1 x double> %0, +    ptr %1, +    iXLen %2, +    iXLen %3) + +  ret void +} + +declare void @llvm.riscv.vsse.mask.nxv1f64( +  <vscale x 1 x double>, +  ptr, +  iXLen, +  <vscale x 1 x i1>, +  iXLen); + +define void @intrinsic_vsse_mask_v_nxv1f64_nxv1f64(<vscale x 1 x double> %0, ptr %1, iXLen %2, <vscale x 1 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vsse_mask_v_nxv1f64_nxv1f64: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a2, e64, m1, ta, ma +; CHECK-NEXT:    vsse64.v v8, (a0), a1, v0.t +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsse.mask.nxv1f64( +    <vscale x 1 x double> %0, +    ptr %1, +    iXLen %2, +    <vscale x 1 x i1> %3, +    iXLen %4) + +  ret void +} + +declare void @llvm.riscv.vsse.nxv2f64( +  <vscale x 2 x double>, +  ptr, +  iXLen, +  iXLen); + +define void @intrinsic_vsse_v_nxv2f64_nxv2f64(<vscale x 2 x double> %0, ptr %1, iXLen %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vsse_v_nxv2f64_nxv2f64: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a2, e64, m2, ta, ma +; CHECK-NEXT:    vsse64.v v8, (a0), a1 +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsse.nxv2f64( +    <vscale x 2 x double> %0, +    ptr %1, +    iXLen %2, +    iXLen %3) + +  ret void +} + +declare void @llvm.riscv.vsse.mask.nxv2f64( +  <vscale x 2 x double>, +  ptr, +  iXLen, +  <vscale x 2 x i1>, +  iXLen); + +define void @intrinsic_vsse_mask_v_nxv2f64_nxv2f64(<vscale x 2 x double> %0, ptr %1, iXLen %2, <vscale x 2 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vsse_mask_v_nxv2f64_nxv2f64: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a2, e64, m2, ta, ma +; CHECK-NEXT:    vsse64.v v8, (a0), a1, v0.t +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsse.mask.nxv2f64( +    <vscale x 2 x double> %0, +    ptr %1, +    iXLen %2, +    <vscale x 2 x i1> %3, +    iXLen %4) + +  ret void +} + +declare void @llvm.riscv.vsse.nxv4f64( +  <vscale x 4 x double>, +  ptr, +  iXLen, +  iXLen); + +define void @intrinsic_vsse_v_nxv4f64_nxv4f64(<vscale x 4 x double> %0, ptr %1, iXLen %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vsse_v_nxv4f64_nxv4f64: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a2, e64, m4, ta, ma +; CHECK-NEXT:    vsse64.v v8, (a0), a1 +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsse.nxv4f64( +    <vscale x 4 x double> %0, +    ptr %1, +    iXLen %2, +    iXLen %3) + +  ret void +} + +declare void @llvm.riscv.vsse.mask.nxv4f64( +  <vscale x 4 x double>, +  ptr, +  iXLen, +  <vscale x 4 x i1>, +  iXLen); + +define void @intrinsic_vsse_mask_v_nxv4f64_nxv4f64(<vscale x 4 x double> %0, ptr %1, iXLen %2, <vscale x 4 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vsse_mask_v_nxv4f64_nxv4f64: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a2, e64, m4, ta, ma +; CHECK-NEXT:    vsse64.v v8, (a0), a1, v0.t +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsse.mask.nxv4f64( +    <vscale x 4 x double> %0, +    ptr %1, +    iXLen %2, +    <vscale x 4 x i1> %3, +    iXLen %4) + +  ret void +} + +declare void @llvm.riscv.vsse.nxv8f64( +  <vscale x 8 x double>, +  ptr, +  iXLen, +  iXLen); + +define void @intrinsic_vsse_v_nxv8f64_nxv8f64(<vscale x 8 x double> %0, ptr %1, iXLen %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vsse_v_nxv8f64_nxv8f64: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a2, e64, m8, ta, ma +; CHECK-NEXT:    vsse64.v v8, (a0), a1 +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsse.nxv8f64( +    <vscale x 8 x double> %0, +    ptr %1, +    iXLen %2, +    iXLen %3) + +  ret void +} + +declare void @llvm.riscv.vsse.mask.nxv8f64( +  <vscale x 8 x double>, +  ptr, +  iXLen, +  <vscale x 8 x i1>, +  iXLen); + +define void @intrinsic_vsse_mask_v_nxv8f64_nxv8f64(<vscale x 8 x double> %0, ptr %1, iXLen %2, <vscale x 8 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vsse_mask_v_nxv8f64_nxv8f64: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a2, e64, m8, ta, ma +; CHECK-NEXT:    vsse64.v v8, (a0), a1, v0.t +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsse.mask.nxv8f64( +    <vscale x 8 x double> %0, +    ptr %1, +    iXLen %2, +    <vscale x 8 x i1> %3, +    iXLen %4) + +  ret void +} + +declare void @llvm.riscv.vsse.nxv1i32( +  <vscale x 1 x i32>, +  ptr, +  iXLen, +  iXLen); + +define void @intrinsic_vsse_v_nxv1i32_nxv1i32(<vscale x 1 x i32> %0, ptr %1, iXLen %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vsse_v_nxv1i32_nxv1i32: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a2, e32, mf2, ta, ma +; CHECK-NEXT:    vsse32.v v8, (a0), a1 +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsse.nxv1i32( +    <vscale x 1 x i32> %0, +    ptr %1, +    iXLen %2, +    iXLen %3) + +  ret void +} + +declare void @llvm.riscv.vsse.mask.nxv1i32( +  <vscale x 1 x i32>, +  ptr, +  iXLen, +  <vscale x 1 x i1>, +  iXLen); + +define void @intrinsic_vsse_mask_v_nxv1i32_nxv1i32(<vscale x 1 x i32> %0, ptr %1, iXLen %2, <vscale x 1 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vsse_mask_v_nxv1i32_nxv1i32: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a2, e32, mf2, ta, ma +; CHECK-NEXT:    vsse32.v v8, (a0), a1, v0.t +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsse.mask.nxv1i32( +    <vscale x 1 x i32> %0, +    ptr %1, +    iXLen %2, +    <vscale x 1 x i1> %3, +    iXLen %4) + +  ret void +} + +declare void @llvm.riscv.vsse.nxv2i32( +  <vscale x 2 x i32>, +  ptr, +  iXLen, +  iXLen); + +define void @intrinsic_vsse_v_nxv2i32_nxv2i32(<vscale x 2 x i32> %0, ptr %1, iXLen %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vsse_v_nxv2i32_nxv2i32: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a2, e32, m1, ta, ma +; CHECK-NEXT:    vsse32.v v8, (a0), a1 +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsse.nxv2i32( +    <vscale x 2 x i32> %0, +    ptr %1, +    iXLen %2, +    iXLen %3) + +  ret void +} + +declare void @llvm.riscv.vsse.mask.nxv2i32( +  <vscale x 2 x i32>, +  ptr, +  iXLen, +  <vscale x 2 x i1>, +  iXLen); + +define void @intrinsic_vsse_mask_v_nxv2i32_nxv2i32(<vscale x 2 x i32> %0, ptr %1, iXLen %2, <vscale x 2 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vsse_mask_v_nxv2i32_nxv2i32: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a2, e32, m1, ta, ma +; CHECK-NEXT:    vsse32.v v8, (a0), a1, v0.t +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsse.mask.nxv2i32( +    <vscale x 2 x i32> %0, +    ptr %1, +    iXLen %2, +    <vscale x 2 x i1> %3, +    iXLen %4) + +  ret void +} + +declare void @llvm.riscv.vsse.nxv4i32( +  <vscale x 4 x i32>, +  ptr, +  iXLen, +  iXLen); + +define void @intrinsic_vsse_v_nxv4i32_nxv4i32(<vscale x 4 x i32> %0, ptr %1, iXLen %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vsse_v_nxv4i32_nxv4i32: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a2, e32, m2, ta, ma +; CHECK-NEXT:    vsse32.v v8, (a0), a1 +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsse.nxv4i32( +    <vscale x 4 x i32> %0, +    ptr %1, +    iXLen %2, +    iXLen %3) + +  ret void +} + +declare void @llvm.riscv.vsse.mask.nxv4i32( +  <vscale x 4 x i32>, +  ptr, +  iXLen, +  <vscale x 4 x i1>, +  iXLen); + +define void @intrinsic_vsse_mask_v_nxv4i32_nxv4i32(<vscale x 4 x i32> %0, ptr %1, iXLen %2, <vscale x 4 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vsse_mask_v_nxv4i32_nxv4i32: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a2, e32, m2, ta, ma +; CHECK-NEXT:    vsse32.v v8, (a0), a1, v0.t +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsse.mask.nxv4i32( +    <vscale x 4 x i32> %0, +    ptr %1, +    iXLen %2, +    <vscale x 4 x i1> %3, +    iXLen %4) + +  ret void +} + +declare void @llvm.riscv.vsse.nxv8i32( +  <vscale x 8 x i32>, +  ptr, +  iXLen, +  iXLen); + +define void @intrinsic_vsse_v_nxv8i32_nxv8i32(<vscale x 8 x i32> %0, ptr %1, iXLen %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vsse_v_nxv8i32_nxv8i32: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a2, e32, m4, ta, ma +; CHECK-NEXT:    vsse32.v v8, (a0), a1 +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsse.nxv8i32( +    <vscale x 8 x i32> %0, +    ptr %1, +    iXLen %2, +    iXLen %3) + +  ret void +} + +declare void @llvm.riscv.vsse.mask.nxv8i32( +  <vscale x 8 x i32>, +  ptr, +  iXLen, +  <vscale x 8 x i1>, +  iXLen); + +define void @intrinsic_vsse_mask_v_nxv8i32_nxv8i32(<vscale x 8 x i32> %0, ptr %1, iXLen %2, <vscale x 8 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vsse_mask_v_nxv8i32_nxv8i32: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a2, e32, m4, ta, ma +; CHECK-NEXT:    vsse32.v v8, (a0), a1, v0.t +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsse.mask.nxv8i32( +    <vscale x 8 x i32> %0, +    ptr %1, +    iXLen %2, +    <vscale x 8 x i1> %3, +    iXLen %4) + +  ret void +} + +declare void @llvm.riscv.vsse.nxv16i32( +  <vscale x 16 x i32>, +  ptr, +  iXLen, +  iXLen); + +define void @intrinsic_vsse_v_nxv16i32_nxv16i32(<vscale x 16 x i32> %0, ptr %1, iXLen %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vsse_v_nxv16i32_nxv16i32: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a2, e32, m8, ta, ma +; CHECK-NEXT:    vsse32.v v8, (a0), a1 +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsse.nxv16i32( +    <vscale x 16 x i32> %0, +    ptr %1, +    iXLen %2, +    iXLen %3) + +  ret void +} + +declare void @llvm.riscv.vsse.mask.nxv16i32( +  <vscale x 16 x i32>, +  ptr, +  iXLen, +  <vscale x 16 x i1>, +  iXLen); + +define void @intrinsic_vsse_mask_v_nxv16i32_nxv16i32(<vscale x 16 x i32> %0, ptr %1, iXLen %2, <vscale x 16 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vsse_mask_v_nxv16i32_nxv16i32: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a2, e32, m8, ta, ma +; CHECK-NEXT:    vsse32.v v8, (a0), a1, v0.t +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsse.mask.nxv16i32( +    <vscale x 16 x i32> %0, +    ptr %1, +    iXLen %2, +    <vscale x 16 x i1> %3, +    iXLen %4) + +  ret void +} + +declare void @llvm.riscv.vsse.nxv1f32( +  <vscale x 1 x float>, +  ptr, +  iXLen, +  iXLen); + +define void @intrinsic_vsse_v_nxv1f32_nxv1f32(<vscale x 1 x float> %0, ptr %1, iXLen %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vsse_v_nxv1f32_nxv1f32: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a2, e32, mf2, ta, ma +; CHECK-NEXT:    vsse32.v v8, (a0), a1 +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsse.nxv1f32( +    <vscale x 1 x float> %0, +    ptr %1, +    iXLen %2, +    iXLen %3) + +  ret void +} + +declare void @llvm.riscv.vsse.mask.nxv1f32( +  <vscale x 1 x float>, +  ptr, +  iXLen, +  <vscale x 1 x i1>, +  iXLen); + +define void @intrinsic_vsse_mask_v_nxv1f32_nxv1f32(<vscale x 1 x float> %0, ptr %1, iXLen %2, <vscale x 1 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vsse_mask_v_nxv1f32_nxv1f32: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a2, e32, mf2, ta, ma +; CHECK-NEXT:    vsse32.v v8, (a0), a1, v0.t +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsse.mask.nxv1f32( +    <vscale x 1 x float> %0, +    ptr %1, +    iXLen %2, +    <vscale x 1 x i1> %3, +    iXLen %4) + +  ret void +} + +declare void @llvm.riscv.vsse.nxv2f32( +  <vscale x 2 x float>, +  ptr, +  iXLen, +  iXLen); + +define void @intrinsic_vsse_v_nxv2f32_nxv2f32(<vscale x 2 x float> %0, ptr %1, iXLen %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vsse_v_nxv2f32_nxv2f32: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a2, e32, m1, ta, ma +; CHECK-NEXT:    vsse32.v v8, (a0), a1 +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsse.nxv2f32( +    <vscale x 2 x float> %0, +    ptr %1, +    iXLen %2, +    iXLen %3) + +  ret void +} + +declare void @llvm.riscv.vsse.mask.nxv2f32( +  <vscale x 2 x float>, +  ptr, +  iXLen, +  <vscale x 2 x i1>, +  iXLen); + +define void @intrinsic_vsse_mask_v_nxv2f32_nxv2f32(<vscale x 2 x float> %0, ptr %1, iXLen %2, <vscale x 2 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vsse_mask_v_nxv2f32_nxv2f32: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a2, e32, m1, ta, ma +; CHECK-NEXT:    vsse32.v v8, (a0), a1, v0.t +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsse.mask.nxv2f32( +    <vscale x 2 x float> %0, +    ptr %1, +    iXLen %2, +    <vscale x 2 x i1> %3, +    iXLen %4) + +  ret void +} + +declare void @llvm.riscv.vsse.nxv4f32( +  <vscale x 4 x float>, +  ptr, +  iXLen, +  iXLen); + +define void @intrinsic_vsse_v_nxv4f32_nxv4f32(<vscale x 4 x float> %0, ptr %1, iXLen %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vsse_v_nxv4f32_nxv4f32: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a2, e32, m2, ta, ma +; CHECK-NEXT:    vsse32.v v8, (a0), a1 +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsse.nxv4f32( +    <vscale x 4 x float> %0, +    ptr %1, +    iXLen %2, +    iXLen %3) + +  ret void +} + +declare void @llvm.riscv.vsse.mask.nxv4f32( +  <vscale x 4 x float>, +  ptr, +  iXLen, +  <vscale x 4 x i1>, +  iXLen); + +define void @intrinsic_vsse_mask_v_nxv4f32_nxv4f32(<vscale x 4 x float> %0, ptr %1, iXLen %2, <vscale x 4 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vsse_mask_v_nxv4f32_nxv4f32: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a2, e32, m2, ta, ma +; CHECK-NEXT:    vsse32.v v8, (a0), a1, v0.t +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsse.mask.nxv4f32( +    <vscale x 4 x float> %0, +    ptr %1, +    iXLen %2, +    <vscale x 4 x i1> %3, +    iXLen %4) + +  ret void +} + +declare void @llvm.riscv.vsse.nxv8f32( +  <vscale x 8 x float>, +  ptr, +  iXLen, +  iXLen); + +define void @intrinsic_vsse_v_nxv8f32_nxv8f32(<vscale x 8 x float> %0, ptr %1, iXLen %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vsse_v_nxv8f32_nxv8f32: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a2, e32, m4, ta, ma +; CHECK-NEXT:    vsse32.v v8, (a0), a1 +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsse.nxv8f32( +    <vscale x 8 x float> %0, +    ptr %1, +    iXLen %2, +    iXLen %3) + +  ret void +} + +declare void @llvm.riscv.vsse.mask.nxv8f32( +  <vscale x 8 x float>, +  ptr, +  iXLen, +  <vscale x 8 x i1>, +  iXLen); + +define void @intrinsic_vsse_mask_v_nxv8f32_nxv8f32(<vscale x 8 x float> %0, ptr %1, iXLen %2, <vscale x 8 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vsse_mask_v_nxv8f32_nxv8f32: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a2, e32, m4, ta, ma +; CHECK-NEXT:    vsse32.v v8, (a0), a1, v0.t +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsse.mask.nxv8f32( +    <vscale x 8 x float> %0, +    ptr %1, +    iXLen %2, +    <vscale x 8 x i1> %3, +    iXLen %4) + +  ret void +} + +declare void @llvm.riscv.vsse.nxv16f32( +  <vscale x 16 x float>, +  ptr, +  iXLen, +  iXLen); + +define void @intrinsic_vsse_v_nxv16f32_nxv16f32(<vscale x 16 x float> %0, ptr %1, iXLen %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vsse_v_nxv16f32_nxv16f32: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a2, e32, m8, ta, ma +; CHECK-NEXT:    vsse32.v v8, (a0), a1 +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsse.nxv16f32( +    <vscale x 16 x float> %0, +    ptr %1, +    iXLen %2, +    iXLen %3) + +  ret void +} + +declare void @llvm.riscv.vsse.mask.nxv16f32( +  <vscale x 16 x float>, +  ptr, +  iXLen, +  <vscale x 16 x i1>, +  iXLen); + +define void @intrinsic_vsse_mask_v_nxv16f32_nxv16f32(<vscale x 16 x float> %0, ptr %1, iXLen %2, <vscale x 16 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vsse_mask_v_nxv16f32_nxv16f32: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a2, e32, m8, ta, ma +; CHECK-NEXT:    vsse32.v v8, (a0), a1, v0.t +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsse.mask.nxv16f32( +    <vscale x 16 x float> %0, +    ptr %1, +    iXLen %2, +    <vscale x 16 x i1> %3, +    iXLen %4) + +  ret void +} + +declare void @llvm.riscv.vsse.nxv1i16( +  <vscale x 1 x i16>, +  ptr, +  iXLen, +  iXLen); + +define void @intrinsic_vsse_v_nxv1i16_nxv1i16(<vscale x 1 x i16> %0, ptr %1, iXLen %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vsse_v_nxv1i16_nxv1i16: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a2, e16, mf4, ta, ma +; CHECK-NEXT:    vsse16.v v8, (a0), a1 +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsse.nxv1i16( +    <vscale x 1 x i16> %0, +    ptr %1, +    iXLen %2, +    iXLen %3) + +  ret void +} + +declare void @llvm.riscv.vsse.mask.nxv1i16( +  <vscale x 1 x i16>, +  ptr, +  iXLen, +  <vscale x 1 x i1>, +  iXLen); + +define void @intrinsic_vsse_mask_v_nxv1i16_nxv1i16(<vscale x 1 x i16> %0, ptr %1, iXLen %2, <vscale x 1 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vsse_mask_v_nxv1i16_nxv1i16: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a2, e16, mf4, ta, ma +; CHECK-NEXT:    vsse16.v v8, (a0), a1, v0.t +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsse.mask.nxv1i16( +    <vscale x 1 x i16> %0, +    ptr %1, +    iXLen %2, +    <vscale x 1 x i1> %3, +    iXLen %4) + +  ret void +} + +declare void @llvm.riscv.vsse.nxv2i16( +  <vscale x 2 x i16>, +  ptr, +  iXLen, +  iXLen); + +define void @intrinsic_vsse_v_nxv2i16_nxv2i16(<vscale x 2 x i16> %0, ptr %1, iXLen %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vsse_v_nxv2i16_nxv2i16: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a2, e16, mf2, ta, ma +; CHECK-NEXT:    vsse16.v v8, (a0), a1 +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsse.nxv2i16( +    <vscale x 2 x i16> %0, +    ptr %1, +    iXLen %2, +    iXLen %3) + +  ret void +} + +declare void @llvm.riscv.vsse.mask.nxv2i16( +  <vscale x 2 x i16>, +  ptr, +  iXLen, +  <vscale x 2 x i1>, +  iXLen); + +define void @intrinsic_vsse_mask_v_nxv2i16_nxv2i16(<vscale x 2 x i16> %0, ptr %1, iXLen %2, <vscale x 2 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vsse_mask_v_nxv2i16_nxv2i16: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a2, e16, mf2, ta, ma +; CHECK-NEXT:    vsse16.v v8, (a0), a1, v0.t +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsse.mask.nxv2i16( +    <vscale x 2 x i16> %0, +    ptr %1, +    iXLen %2, +    <vscale x 2 x i1> %3, +    iXLen %4) + +  ret void +} + +declare void @llvm.riscv.vsse.nxv4i16( +  <vscale x 4 x i16>, +  ptr, +  iXLen, +  iXLen); + +define void @intrinsic_vsse_v_nxv4i16_nxv4i16(<vscale x 4 x i16> %0, ptr %1, iXLen %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vsse_v_nxv4i16_nxv4i16: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a2, e16, m1, ta, ma +; CHECK-NEXT:    vsse16.v v8, (a0), a1 +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsse.nxv4i16( +    <vscale x 4 x i16> %0, +    ptr %1, +    iXLen %2, +    iXLen %3) + +  ret void +} + +declare void @llvm.riscv.vsse.mask.nxv4i16( +  <vscale x 4 x i16>, +  ptr, +  iXLen, +  <vscale x 4 x i1>, +  iXLen); + +define void @intrinsic_vsse_mask_v_nxv4i16_nxv4i16(<vscale x 4 x i16> %0, ptr %1, iXLen %2, <vscale x 4 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vsse_mask_v_nxv4i16_nxv4i16: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a2, e16, m1, ta, ma +; CHECK-NEXT:    vsse16.v v8, (a0), a1, v0.t +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsse.mask.nxv4i16( +    <vscale x 4 x i16> %0, +    ptr %1, +    iXLen %2, +    <vscale x 4 x i1> %3, +    iXLen %4) + +  ret void +} + +declare void @llvm.riscv.vsse.nxv8i16( +  <vscale x 8 x i16>, +  ptr, +  iXLen, +  iXLen); + +define void @intrinsic_vsse_v_nxv8i16_nxv8i16(<vscale x 8 x i16> %0, ptr %1, iXLen %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vsse_v_nxv8i16_nxv8i16: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a2, e16, m2, ta, ma +; CHECK-NEXT:    vsse16.v v8, (a0), a1 +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsse.nxv8i16( +    <vscale x 8 x i16> %0, +    ptr %1, +    iXLen %2, +    iXLen %3) + +  ret void +} + +declare void @llvm.riscv.vsse.mask.nxv8i16( +  <vscale x 8 x i16>, +  ptr, +  iXLen, +  <vscale x 8 x i1>, +  iXLen); + +define void @intrinsic_vsse_mask_v_nxv8i16_nxv8i16(<vscale x 8 x i16> %0, ptr %1, iXLen %2, <vscale x 8 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vsse_mask_v_nxv8i16_nxv8i16: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a2, e16, m2, ta, ma +; CHECK-NEXT:    vsse16.v v8, (a0), a1, v0.t +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsse.mask.nxv8i16( +    <vscale x 8 x i16> %0, +    ptr %1, +    iXLen %2, +    <vscale x 8 x i1> %3, +    iXLen %4) + +  ret void +} + +declare void @llvm.riscv.vsse.nxv16i16( +  <vscale x 16 x i16>, +  ptr, +  iXLen, +  iXLen); + +define void @intrinsic_vsse_v_nxv16i16_nxv16i16(<vscale x 16 x i16> %0, ptr %1, iXLen %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vsse_v_nxv16i16_nxv16i16: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a2, e16, m4, ta, ma +; CHECK-NEXT:    vsse16.v v8, (a0), a1 +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsse.nxv16i16( +    <vscale x 16 x i16> %0, +    ptr %1, +    iXLen %2, +    iXLen %3) + +  ret void +} + +declare void @llvm.riscv.vsse.mask.nxv16i16( +  <vscale x 16 x i16>, +  ptr, +  iXLen, +  <vscale x 16 x i1>, +  iXLen); + +define void @intrinsic_vsse_mask_v_nxv16i16_nxv16i16(<vscale x 16 x i16> %0, ptr %1, iXLen %2, <vscale x 16 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vsse_mask_v_nxv16i16_nxv16i16: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a2, e16, m4, ta, ma +; CHECK-NEXT:    vsse16.v v8, (a0), a1, v0.t +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsse.mask.nxv16i16( +    <vscale x 16 x i16> %0, +    ptr %1, +    iXLen %2, +    <vscale x 16 x i1> %3, +    iXLen %4) + +  ret void +} + +declare void @llvm.riscv.vsse.nxv32i16( +  <vscale x 32 x i16>, +  ptr, +  iXLen, +  iXLen); + +define void @intrinsic_vsse_v_nxv32i16_nxv32i16(<vscale x 32 x i16> %0, ptr %1, iXLen %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vsse_v_nxv32i16_nxv32i16: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a2, e16, m8, ta, ma +; CHECK-NEXT:    vsse16.v v8, (a0), a1 +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsse.nxv32i16( +    <vscale x 32 x i16> %0, +    ptr %1, +    iXLen %2, +    iXLen %3) + +  ret void +} + +declare void @llvm.riscv.vsse.mask.nxv32i16( +  <vscale x 32 x i16>, +  ptr, +  iXLen, +  <vscale x 32 x i1>, +  iXLen); + +define void @intrinsic_vsse_mask_v_nxv32i16_nxv32i16(<vscale x 32 x i16> %0, ptr %1, iXLen %2, <vscale x 32 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vsse_mask_v_nxv32i16_nxv32i16: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a2, e16, m8, ta, ma +; CHECK-NEXT:    vsse16.v v8, (a0), a1, v0.t +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsse.mask.nxv32i16( +    <vscale x 32 x i16> %0, +    ptr %1, +    iXLen %2, +    <vscale x 32 x i1> %3, +    iXLen %4) + +  ret void +} + +declare void @llvm.riscv.vsse.nxv1f16( +  <vscale x 1 x half>, +  ptr, +  iXLen, +  iXLen); + +define void @intrinsic_vsse_v_nxv1f16_nxv1f16(<vscale x 1 x half> %0, ptr %1, iXLen %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vsse_v_nxv1f16_nxv1f16: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a2, e16, mf4, ta, ma +; CHECK-NEXT:    vsse16.v v8, (a0), a1 +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsse.nxv1f16( +    <vscale x 1 x half> %0, +    ptr %1, +    iXLen %2, +    iXLen %3) + +  ret void +} + +declare void @llvm.riscv.vsse.mask.nxv1f16( +  <vscale x 1 x half>, +  ptr, +  iXLen, +  <vscale x 1 x i1>, +  iXLen); + +define void @intrinsic_vsse_mask_v_nxv1f16_nxv1f16(<vscale x 1 x half> %0, ptr %1, iXLen %2, <vscale x 1 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vsse_mask_v_nxv1f16_nxv1f16: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a2, e16, mf4, ta, ma +; CHECK-NEXT:    vsse16.v v8, (a0), a1, v0.t +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsse.mask.nxv1f16( +    <vscale x 1 x half> %0, +    ptr %1, +    iXLen %2, +    <vscale x 1 x i1> %3, +    iXLen %4) + +  ret void +} + +declare void @llvm.riscv.vsse.nxv2f16( +  <vscale x 2 x half>, +  ptr, +  iXLen, +  iXLen); + +define void @intrinsic_vsse_v_nxv2f16_nxv2f16(<vscale x 2 x half> %0, ptr %1, iXLen %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vsse_v_nxv2f16_nxv2f16: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a2, e16, mf2, ta, ma +; CHECK-NEXT:    vsse16.v v8, (a0), a1 +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsse.nxv2f16( +    <vscale x 2 x half> %0, +    ptr %1, +    iXLen %2, +    iXLen %3) + +  ret void +} + +declare void @llvm.riscv.vsse.mask.nxv2f16( +  <vscale x 2 x half>, +  ptr, +  iXLen, +  <vscale x 2 x i1>, +  iXLen); + +define void @intrinsic_vsse_mask_v_nxv2f16_nxv2f16(<vscale x 2 x half> %0, ptr %1, iXLen %2, <vscale x 2 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vsse_mask_v_nxv2f16_nxv2f16: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a2, e16, mf2, ta, ma +; CHECK-NEXT:    vsse16.v v8, (a0), a1, v0.t +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsse.mask.nxv2f16( +    <vscale x 2 x half> %0, +    ptr %1, +    iXLen %2, +    <vscale x 2 x i1> %3, +    iXLen %4) + +  ret void +} + +declare void @llvm.riscv.vsse.nxv4f16( +  <vscale x 4 x half>, +  ptr, +  iXLen, +  iXLen); + +define void @intrinsic_vsse_v_nxv4f16_nxv4f16(<vscale x 4 x half> %0, ptr %1, iXLen %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vsse_v_nxv4f16_nxv4f16: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a2, e16, m1, ta, ma +; CHECK-NEXT:    vsse16.v v8, (a0), a1 +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsse.nxv4f16( +    <vscale x 4 x half> %0, +    ptr %1, +    iXLen %2, +    iXLen %3) + +  ret void +} + +declare void @llvm.riscv.vsse.mask.nxv4f16( +  <vscale x 4 x half>, +  ptr, +  iXLen, +  <vscale x 4 x i1>, +  iXLen); + +define void @intrinsic_vsse_mask_v_nxv4f16_nxv4f16(<vscale x 4 x half> %0, ptr %1, iXLen %2, <vscale x 4 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vsse_mask_v_nxv4f16_nxv4f16: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a2, e16, m1, ta, ma +; CHECK-NEXT:    vsse16.v v8, (a0), a1, v0.t +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsse.mask.nxv4f16( +    <vscale x 4 x half> %0, +    ptr %1, +    iXLen %2, +    <vscale x 4 x i1> %3, +    iXLen %4) + +  ret void +} + +declare void @llvm.riscv.vsse.nxv8f16( +  <vscale x 8 x half>, +  ptr, +  iXLen, +  iXLen); + +define void @intrinsic_vsse_v_nxv8f16_nxv8f16(<vscale x 8 x half> %0, ptr %1, iXLen %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vsse_v_nxv8f16_nxv8f16: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a2, e16, m2, ta, ma +; CHECK-NEXT:    vsse16.v v8, (a0), a1 +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsse.nxv8f16( +    <vscale x 8 x half> %0, +    ptr %1, +    iXLen %2, +    iXLen %3) + +  ret void +} + +declare void @llvm.riscv.vsse.mask.nxv8f16( +  <vscale x 8 x half>, +  ptr, +  iXLen, +  <vscale x 8 x i1>, +  iXLen); + +define void @intrinsic_vsse_mask_v_nxv8f16_nxv8f16(<vscale x 8 x half> %0, ptr %1, iXLen %2, <vscale x 8 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vsse_mask_v_nxv8f16_nxv8f16: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a2, e16, m2, ta, ma +; CHECK-NEXT:    vsse16.v v8, (a0), a1, v0.t +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsse.mask.nxv8f16( +    <vscale x 8 x half> %0, +    ptr %1, +    iXLen %2, +    <vscale x 8 x i1> %3, +    iXLen %4) + +  ret void +} + +declare void @llvm.riscv.vsse.nxv16f16( +  <vscale x 16 x half>, +  ptr, +  iXLen, +  iXLen); + +define void @intrinsic_vsse_v_nxv16f16_nxv16f16(<vscale x 16 x half> %0, ptr %1, iXLen %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vsse_v_nxv16f16_nxv16f16: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a2, e16, m4, ta, ma +; CHECK-NEXT:    vsse16.v v8, (a0), a1 +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsse.nxv16f16( +    <vscale x 16 x half> %0, +    ptr %1, +    iXLen %2, +    iXLen %3) + +  ret void +} + +declare void @llvm.riscv.vsse.mask.nxv16f16( +  <vscale x 16 x half>, +  ptr, +  iXLen, +  <vscale x 16 x i1>, +  iXLen); + +define void @intrinsic_vsse_mask_v_nxv16f16_nxv16f16(<vscale x 16 x half> %0, ptr %1, iXLen %2, <vscale x 16 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vsse_mask_v_nxv16f16_nxv16f16: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a2, e16, m4, ta, ma +; CHECK-NEXT:    vsse16.v v8, (a0), a1, v0.t +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsse.mask.nxv16f16( +    <vscale x 16 x half> %0, +    ptr %1, +    iXLen %2, +    <vscale x 16 x i1> %3, +    iXLen %4) + +  ret void +} + +declare void @llvm.riscv.vsse.nxv32f16( +  <vscale x 32 x half>, +  ptr, +  iXLen, +  iXLen); + +define void @intrinsic_vsse_v_nxv32f16_nxv32f16(<vscale x 32 x half> %0, ptr %1, iXLen %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vsse_v_nxv32f16_nxv32f16: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a2, e16, m8, ta, ma +; CHECK-NEXT:    vsse16.v v8, (a0), a1 +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsse.nxv32f16( +    <vscale x 32 x half> %0, +    ptr %1, +    iXLen %2, +    iXLen %3) + +  ret void +} + +declare void @llvm.riscv.vsse.mask.nxv32f16( +  <vscale x 32 x half>, +  ptr, +  iXLen, +  <vscale x 32 x i1>, +  iXLen); + +define void @intrinsic_vsse_mask_v_nxv32f16_nxv32f16(<vscale x 32 x half> %0, ptr %1, iXLen %2, <vscale x 32 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vsse_mask_v_nxv32f16_nxv32f16: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a2, e16, m8, ta, ma +; CHECK-NEXT:    vsse16.v v8, (a0), a1, v0.t +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsse.mask.nxv32f16( +    <vscale x 32 x half> %0, +    ptr %1, +    iXLen %2, +    <vscale x 32 x i1> %3, +    iXLen %4) + +  ret void +} + +declare void @llvm.riscv.vsse.nxv1i8( +  <vscale x 1 x i8>, +  ptr, +  iXLen, +  iXLen); + +define void @intrinsic_vsse_v_nxv1i8_nxv1i8(<vscale x 1 x i8> %0, ptr %1, iXLen %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vsse_v_nxv1i8_nxv1i8: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a2, e8, mf8, ta, ma +; CHECK-NEXT:    vsse8.v v8, (a0), a1 +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsse.nxv1i8( +    <vscale x 1 x i8> %0, +    ptr %1, +    iXLen %2, +    iXLen %3) + +  ret void +} + +declare void @llvm.riscv.vsse.mask.nxv1i8( +  <vscale x 1 x i8>, +  ptr, +  iXLen, +  <vscale x 1 x i1>, +  iXLen); + +define void @intrinsic_vsse_mask_v_nxv1i8_nxv1i8(<vscale x 1 x i8> %0, ptr %1, iXLen %2, <vscale x 1 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vsse_mask_v_nxv1i8_nxv1i8: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a2, e8, mf8, ta, ma +; CHECK-NEXT:    vsse8.v v8, (a0), a1, v0.t +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsse.mask.nxv1i8( +    <vscale x 1 x i8> %0, +    ptr %1, +    iXLen %2, +    <vscale x 1 x i1> %3, +    iXLen %4) + +  ret void +} + +declare void @llvm.riscv.vsse.nxv2i8( +  <vscale x 2 x i8>, +  ptr, +  iXLen, +  iXLen); + +define void @intrinsic_vsse_v_nxv2i8_nxv2i8(<vscale x 2 x i8> %0, ptr %1, iXLen %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vsse_v_nxv2i8_nxv2i8: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a2, e8, mf4, ta, ma +; CHECK-NEXT:    vsse8.v v8, (a0), a1 +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsse.nxv2i8( +    <vscale x 2 x i8> %0, +    ptr %1, +    iXLen %2, +    iXLen %3) + +  ret void +} + +declare void @llvm.riscv.vsse.mask.nxv2i8( +  <vscale x 2 x i8>, +  ptr, +  iXLen, +  <vscale x 2 x i1>, +  iXLen); + +define void @intrinsic_vsse_mask_v_nxv2i8_nxv2i8(<vscale x 2 x i8> %0, ptr %1, iXLen %2, <vscale x 2 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vsse_mask_v_nxv2i8_nxv2i8: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a2, e8, mf4, ta, ma +; CHECK-NEXT:    vsse8.v v8, (a0), a1, v0.t +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsse.mask.nxv2i8( +    <vscale x 2 x i8> %0, +    ptr %1, +    iXLen %2, +    <vscale x 2 x i1> %3, +    iXLen %4) + +  ret void +} + +declare void @llvm.riscv.vsse.nxv4i8( +  <vscale x 4 x i8>, +  ptr, +  iXLen, +  iXLen); + +define void @intrinsic_vsse_v_nxv4i8_nxv4i8(<vscale x 4 x i8> %0, ptr %1, iXLen %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vsse_v_nxv4i8_nxv4i8: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a2, e8, mf2, ta, ma +; CHECK-NEXT:    vsse8.v v8, (a0), a1 +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsse.nxv4i8( +    <vscale x 4 x i8> %0, +    ptr %1, +    iXLen %2, +    iXLen %3) + +  ret void +} + +declare void @llvm.riscv.vsse.mask.nxv4i8( +  <vscale x 4 x i8>, +  ptr, +  iXLen, +  <vscale x 4 x i1>, +  iXLen); + +define void @intrinsic_vsse_mask_v_nxv4i8_nxv4i8(<vscale x 4 x i8> %0, ptr %1, iXLen %2, <vscale x 4 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vsse_mask_v_nxv4i8_nxv4i8: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a2, e8, mf2, ta, ma +; CHECK-NEXT:    vsse8.v v8, (a0), a1, v0.t +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsse.mask.nxv4i8( +    <vscale x 4 x i8> %0, +    ptr %1, +    iXLen %2, +    <vscale x 4 x i1> %3, +    iXLen %4) + +  ret void +} + +declare void @llvm.riscv.vsse.nxv8i8( +  <vscale x 8 x i8>, +  ptr, +  iXLen, +  iXLen); + +define void @intrinsic_vsse_v_nxv8i8_nxv8i8(<vscale x 8 x i8> %0, ptr %1, iXLen %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vsse_v_nxv8i8_nxv8i8: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a2, e8, m1, ta, ma +; CHECK-NEXT:    vsse8.v v8, (a0), a1 +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsse.nxv8i8( +    <vscale x 8 x i8> %0, +    ptr %1, +    iXLen %2, +    iXLen %3) + +  ret void +} + +declare void @llvm.riscv.vsse.mask.nxv8i8( +  <vscale x 8 x i8>, +  ptr, +  iXLen, +  <vscale x 8 x i1>, +  iXLen); + +define void @intrinsic_vsse_mask_v_nxv8i8_nxv8i8(<vscale x 8 x i8> %0, ptr %1, iXLen %2, <vscale x 8 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vsse_mask_v_nxv8i8_nxv8i8: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a2, e8, m1, ta, ma +; CHECK-NEXT:    vsse8.v v8, (a0), a1, v0.t +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsse.mask.nxv8i8( +    <vscale x 8 x i8> %0, +    ptr %1, +    iXLen %2, +    <vscale x 8 x i1> %3, +    iXLen %4) + +  ret void +} + +declare void @llvm.riscv.vsse.nxv16i8( +  <vscale x 16 x i8>, +  ptr, +  iXLen, +  iXLen); + +define void @intrinsic_vsse_v_nxv16i8_nxv16i8(<vscale x 16 x i8> %0, ptr %1, iXLen %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vsse_v_nxv16i8_nxv16i8: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a2, e8, m2, ta, ma +; CHECK-NEXT:    vsse8.v v8, (a0), a1 +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsse.nxv16i8( +    <vscale x 16 x i8> %0, +    ptr %1, +    iXLen %2, +    iXLen %3) + +  ret void +} + +declare void @llvm.riscv.vsse.mask.nxv16i8( +  <vscale x 16 x i8>, +  ptr, +  iXLen, +  <vscale x 16 x i1>, +  iXLen); + +define void @intrinsic_vsse_mask_v_nxv16i8_nxv16i8(<vscale x 16 x i8> %0, ptr %1, iXLen %2, <vscale x 16 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vsse_mask_v_nxv16i8_nxv16i8: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a2, e8, m2, ta, ma +; CHECK-NEXT:    vsse8.v v8, (a0), a1, v0.t +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsse.mask.nxv16i8( +    <vscale x 16 x i8> %0, +    ptr %1, +    iXLen %2, +    <vscale x 16 x i1> %3, +    iXLen %4) + +  ret void +} + +declare void @llvm.riscv.vsse.nxv32i8( +  <vscale x 32 x i8>, +  ptr, +  iXLen, +  iXLen); + +define void @intrinsic_vsse_v_nxv32i8_nxv32i8(<vscale x 32 x i8> %0, ptr %1, iXLen %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vsse_v_nxv32i8_nxv32i8: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a2, e8, m4, ta, ma +; CHECK-NEXT:    vsse8.v v8, (a0), a1 +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsse.nxv32i8( +    <vscale x 32 x i8> %0, +    ptr %1, +    iXLen %2, +    iXLen %3) + +  ret void +} + +declare void @llvm.riscv.vsse.mask.nxv32i8( +  <vscale x 32 x i8>, +  ptr, +  iXLen, +  <vscale x 32 x i1>, +  iXLen); + +define void @intrinsic_vsse_mask_v_nxv32i8_nxv32i8(<vscale x 32 x i8> %0, ptr %1, iXLen %2, <vscale x 32 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vsse_mask_v_nxv32i8_nxv32i8: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a2, e8, m4, ta, ma +; CHECK-NEXT:    vsse8.v v8, (a0), a1, v0.t +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsse.mask.nxv32i8( +    <vscale x 32 x i8> %0, +    ptr %1, +    iXLen %2, +    <vscale x 32 x i1> %3, +    iXLen %4) + +  ret void +} + +declare void @llvm.riscv.vsse.nxv64i8( +  <vscale x 64 x i8>, +  ptr, +  iXLen, +  iXLen); + +define void @intrinsic_vsse_v_nxv64i8_nxv64i8(<vscale x 64 x i8> %0, ptr %1, iXLen %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vsse_v_nxv64i8_nxv64i8: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a2, e8, m8, ta, ma +; CHECK-NEXT:    vsse8.v v8, (a0), a1 +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsse.nxv64i8( +    <vscale x 64 x i8> %0, +    ptr %1, +    iXLen %2, +    iXLen %3) + +  ret void +} + +declare void @llvm.riscv.vsse.mask.nxv64i8( +  <vscale x 64 x i8>, +  ptr, +  iXLen, +  <vscale x 64 x i1>, +  iXLen); + +define void @intrinsic_vsse_mask_v_nxv64i8_nxv64i8(<vscale x 64 x i8> %0, ptr %1, iXLen %2, <vscale x 64 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vsse_mask_v_nxv64i8_nxv64i8: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vsetvli zero, a2, e8, m8, ta, ma +; CHECK-NEXT:    vsse8.v v8, (a0), a1, v0.t +; CHECK-NEXT:    ret +entry: +  call void @llvm.riscv.vsse.mask.nxv64i8( +    <vscale x 64 x i8> %0, +    ptr %1, +    iXLen %2, +    <vscale x 64 x i1> %3, +    iXLen %4) + +  ret void +} diff --git a/llvm/test/CodeGen/RISCV/features-info.ll b/llvm/test/CodeGen/RISCV/features-info.ll index 37e11db..988d049 100644 --- a/llvm/test/CodeGen/RISCV/features-info.ll +++ b/llvm/test/CodeGen/RISCV/features-info.ll @@ -136,6 +136,7 @@  ; CHECK-NEXT:   shgatpa                          - 'Shgatpa' (SvNNx4 mode supported for all modes supported by satp, as well as Bare).  ; CHECK-NEXT:   shifted-zextw-fusion             - Enable SLLI+SRLI to be fused when computing (shifted) word zero extension.  ; CHECK-NEXT:   shlcofideleg                     - 'Shlcofideleg' (Delegating LCOFI Interrupts to VS-mode). +; CHECK-NEXT:   short-forward-branch-i-minmax    - Enable short forward branch optimization for min,max instructions in Zbb.  ; CHECK-NEXT:   short-forward-branch-opt         - Enable short forward branch optimization.  ; CHECK-NEXT:   shtvala                          - 'Shtvala' (htval provides all needed values).  ; CHECK-NEXT:   shvsatpa                         - 'Shvsatpa' (vsatp supports all modes supported by satp). diff --git a/llvm/test/CodeGen/RISCV/short-forward-branch-opt-min-max.ll b/llvm/test/CodeGen/RISCV/short-forward-branch-opt-min-max.ll new file mode 100644 index 0000000..05e06cea --- /dev/null +++ b/llvm/test/CodeGen/RISCV/short-forward-branch-opt-min-max.ll @@ -0,0 +1,703 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6 +; RUN: llc < %s -mtriple=riscv32 -mattr=+zbb | FileCheck %s --check-prefixes=RV32I-ZBB +; RUN: llc < %s -mtriple=riscv64 -mattr=+zbb | FileCheck %s --check-prefixes=RV64I-ZBB +; RUN: llc < %s -mtriple=riscv32 -mattr=+zbb,+short-forward-branch-opt | \ +; RUN:   FileCheck %s --check-prefixes=RV32I-SFB-ZBB +; RUN: llc < %s -mtriple=riscv64 -mattr=+zbb,+short-forward-branch-opt | \ +; RUN:   FileCheck %s --check-prefixes=RV64I-SFB-ZBB +; RUN: llc < %s -mtriple=riscv32 -mattr=+zbb,+short-forward-branch-i-minmax | \ +; RUN:   FileCheck %s --check-prefixes=RV32I-SFBIMinMax-ZBB +; RUN: llc < %s -mtriple=riscv64 -mattr=+zbb,+short-forward-branch-i-minmax | \ +; RUN:   FileCheck %s --check-prefixes=RV64I-SFBIMinMax-ZBB + +define i32 @select_example_smax(i32 %a, i32 %b, i1 zeroext %x, i32 %y) { +; RV32I-ZBB-LABEL: select_example_smax: +; RV32I-ZBB:       # %bb.0: # %entry +; RV32I-ZBB-NEXT:    beqz a2, .LBB0_2 +; RV32I-ZBB-NEXT:  # %bb.1: +; RV32I-ZBB-NEXT:    max a1, a0, a3 +; RV32I-ZBB-NEXT:  .LBB0_2: # %entry +; RV32I-ZBB-NEXT:    mv a0, a1 +; RV32I-ZBB-NEXT:    ret +; +; RV64I-ZBB-LABEL: select_example_smax: +; RV64I-ZBB:       # %bb.0: # %entry +; RV64I-ZBB-NEXT:    beqz a2, .LBB0_2 +; RV64I-ZBB-NEXT:  # %bb.1: +; RV64I-ZBB-NEXT:    sext.w a3, a3 +; RV64I-ZBB-NEXT:    sext.w a0, a0 +; RV64I-ZBB-NEXT:    max a1, a0, a3 +; RV64I-ZBB-NEXT:  .LBB0_2: # %entry +; RV64I-ZBB-NEXT:    mv a0, a1 +; RV64I-ZBB-NEXT:    ret +; +; RV32I-SFB-ZBB-LABEL: select_example_smax: +; RV32I-SFB-ZBB:       # %bb.0: # %entry +; RV32I-SFB-ZBB-NEXT:    max a0, a0, a3 +; RV32I-SFB-ZBB-NEXT:    bnez a2, .LBB0_2 +; RV32I-SFB-ZBB-NEXT:  # %bb.1: # %entry +; RV32I-SFB-ZBB-NEXT:    mv a0, a1 +; RV32I-SFB-ZBB-NEXT:  .LBB0_2: # %entry +; RV32I-SFB-ZBB-NEXT:    ret +; +; RV64I-SFB-ZBB-LABEL: select_example_smax: +; RV64I-SFB-ZBB:       # %bb.0: # %entry +; RV64I-SFB-ZBB-NEXT:    sext.w a3, a3 +; RV64I-SFB-ZBB-NEXT:    sext.w a0, a0 +; RV64I-SFB-ZBB-NEXT:    max a0, a0, a3 +; RV64I-SFB-ZBB-NEXT:    bnez a2, .LBB0_2 +; RV64I-SFB-ZBB-NEXT:  # %bb.1: # %entry +; RV64I-SFB-ZBB-NEXT:    mv a0, a1 +; RV64I-SFB-ZBB-NEXT:  .LBB0_2: # %entry +; RV64I-SFB-ZBB-NEXT:    ret +; +; RV32I-SFBIMinMax-ZBB-LABEL: select_example_smax: +; RV32I-SFBIMinMax-ZBB:       # %bb.0: # %entry +; RV32I-SFBIMinMax-ZBB-NEXT:    beqz a2, .LBB0_2 +; RV32I-SFBIMinMax-ZBB-NEXT:  # %bb.1: # %entry +; RV32I-SFBIMinMax-ZBB-NEXT:    max a1, a0, a3 +; RV32I-SFBIMinMax-ZBB-NEXT:  .LBB0_2: # %entry +; RV32I-SFBIMinMax-ZBB-NEXT:    mv a0, a1 +; RV32I-SFBIMinMax-ZBB-NEXT:    ret +; +; RV64I-SFBIMinMax-ZBB-LABEL: select_example_smax: +; RV64I-SFBIMinMax-ZBB:       # %bb.0: # %entry +; RV64I-SFBIMinMax-ZBB-NEXT:    sext.w a3, a3 +; RV64I-SFBIMinMax-ZBB-NEXT:    sext.w a0, a0 +; RV64I-SFBIMinMax-ZBB-NEXT:    beqz a2, .LBB0_2 +; RV64I-SFBIMinMax-ZBB-NEXT:  # %bb.1: # %entry +; RV64I-SFBIMinMax-ZBB-NEXT:    max a1, a0, a3 +; RV64I-SFBIMinMax-ZBB-NEXT:  .LBB0_2: # %entry +; RV64I-SFBIMinMax-ZBB-NEXT:    mv a0, a1 +; RV64I-SFBIMinMax-ZBB-NEXT:    ret +entry: +  %res = call i32 @llvm.smax.i32(i32 %a, i32 %y) +  %sel = select i1 %x, i32 %res, i32 %b +  ret i32 %sel +} + +define i32 @select_example_smin(i32 %a, i32 %b, i1 zeroext %x, i32 %y) { +; RV32I-ZBB-LABEL: select_example_smin: +; RV32I-ZBB:       # %bb.0: # %entry +; RV32I-ZBB-NEXT:    beqz a2, .LBB1_2 +; RV32I-ZBB-NEXT:  # %bb.1: +; RV32I-ZBB-NEXT:    min a1, a0, a3 +; RV32I-ZBB-NEXT:  .LBB1_2: # %entry +; RV32I-ZBB-NEXT:    mv a0, a1 +; RV32I-ZBB-NEXT:    ret +; +; RV64I-ZBB-LABEL: select_example_smin: +; RV64I-ZBB:       # %bb.0: # %entry +; RV64I-ZBB-NEXT:    beqz a2, .LBB1_2 +; RV64I-ZBB-NEXT:  # %bb.1: +; RV64I-ZBB-NEXT:    sext.w a3, a3 +; RV64I-ZBB-NEXT:    sext.w a0, a0 +; RV64I-ZBB-NEXT:    min a1, a0, a3 +; RV64I-ZBB-NEXT:  .LBB1_2: # %entry +; RV64I-ZBB-NEXT:    mv a0, a1 +; RV64I-ZBB-NEXT:    ret +; +; RV32I-SFB-ZBB-LABEL: select_example_smin: +; RV32I-SFB-ZBB:       # %bb.0: # %entry +; RV32I-SFB-ZBB-NEXT:    min a0, a0, a3 +; RV32I-SFB-ZBB-NEXT:    bnez a2, .LBB1_2 +; RV32I-SFB-ZBB-NEXT:  # %bb.1: # %entry +; RV32I-SFB-ZBB-NEXT:    mv a0, a1 +; RV32I-SFB-ZBB-NEXT:  .LBB1_2: # %entry +; RV32I-SFB-ZBB-NEXT:    ret +; +; RV64I-SFB-ZBB-LABEL: select_example_smin: +; RV64I-SFB-ZBB:       # %bb.0: # %entry +; RV64I-SFB-ZBB-NEXT:    sext.w a3, a3 +; RV64I-SFB-ZBB-NEXT:    sext.w a0, a0 +; RV64I-SFB-ZBB-NEXT:    min a0, a0, a3 +; RV64I-SFB-ZBB-NEXT:    bnez a2, .LBB1_2 +; RV64I-SFB-ZBB-NEXT:  # %bb.1: # %entry +; RV64I-SFB-ZBB-NEXT:    mv a0, a1 +; RV64I-SFB-ZBB-NEXT:  .LBB1_2: # %entry +; RV64I-SFB-ZBB-NEXT:    ret +; +; RV32I-SFBIMinMax-ZBB-LABEL: select_example_smin: +; RV32I-SFBIMinMax-ZBB:       # %bb.0: # %entry +; RV32I-SFBIMinMax-ZBB-NEXT:    beqz a2, .LBB1_2 +; RV32I-SFBIMinMax-ZBB-NEXT:  # %bb.1: # %entry +; RV32I-SFBIMinMax-ZBB-NEXT:    min a1, a0, a3 +; RV32I-SFBIMinMax-ZBB-NEXT:  .LBB1_2: # %entry +; RV32I-SFBIMinMax-ZBB-NEXT:    mv a0, a1 +; RV32I-SFBIMinMax-ZBB-NEXT:    ret +; +; RV64I-SFBIMinMax-ZBB-LABEL: select_example_smin: +; RV64I-SFBIMinMax-ZBB:       # %bb.0: # %entry +; RV64I-SFBIMinMax-ZBB-NEXT:    sext.w a3, a3 +; RV64I-SFBIMinMax-ZBB-NEXT:    sext.w a0, a0 +; RV64I-SFBIMinMax-ZBB-NEXT:    beqz a2, .LBB1_2 +; RV64I-SFBIMinMax-ZBB-NEXT:  # %bb.1: # %entry +; RV64I-SFBIMinMax-ZBB-NEXT:    min a1, a0, a3 +; RV64I-SFBIMinMax-ZBB-NEXT:  .LBB1_2: # %entry +; RV64I-SFBIMinMax-ZBB-NEXT:    mv a0, a1 +; RV64I-SFBIMinMax-ZBB-NEXT:    ret +entry: +  %res = call i32 @llvm.smin.i32(i32 %a, i32 %y) +  %sel = select i1 %x, i32 %res, i32 %b +  ret i32 %sel +} + +define i32 @select_example_umax(i32 %a, i32 %b, i1 zeroext %x, i32 %y) { +; RV32I-ZBB-LABEL: select_example_umax: +; RV32I-ZBB:       # %bb.0: # %entry +; RV32I-ZBB-NEXT:    beqz a2, .LBB2_2 +; RV32I-ZBB-NEXT:  # %bb.1: +; RV32I-ZBB-NEXT:    maxu a1, a0, a3 +; RV32I-ZBB-NEXT:  .LBB2_2: # %entry +; RV32I-ZBB-NEXT:    mv a0, a1 +; RV32I-ZBB-NEXT:    ret +; +; RV64I-ZBB-LABEL: select_example_umax: +; RV64I-ZBB:       # %bb.0: # %entry +; RV64I-ZBB-NEXT:    beqz a2, .LBB2_2 +; RV64I-ZBB-NEXT:  # %bb.1: +; RV64I-ZBB-NEXT:    sext.w a3, a3 +; RV64I-ZBB-NEXT:    sext.w a0, a0 +; RV64I-ZBB-NEXT:    maxu a1, a0, a3 +; RV64I-ZBB-NEXT:  .LBB2_2: # %entry +; RV64I-ZBB-NEXT:    mv a0, a1 +; RV64I-ZBB-NEXT:    ret +; +; RV32I-SFB-ZBB-LABEL: select_example_umax: +; RV32I-SFB-ZBB:       # %bb.0: # %entry +; RV32I-SFB-ZBB-NEXT:    maxu a0, a0, a3 +; RV32I-SFB-ZBB-NEXT:    bnez a2, .LBB2_2 +; RV32I-SFB-ZBB-NEXT:  # %bb.1: # %entry +; RV32I-SFB-ZBB-NEXT:    mv a0, a1 +; RV32I-SFB-ZBB-NEXT:  .LBB2_2: # %entry +; RV32I-SFB-ZBB-NEXT:    ret +; +; RV64I-SFB-ZBB-LABEL: select_example_umax: +; RV64I-SFB-ZBB:       # %bb.0: # %entry +; RV64I-SFB-ZBB-NEXT:    sext.w a3, a3 +; RV64I-SFB-ZBB-NEXT:    sext.w a0, a0 +; RV64I-SFB-ZBB-NEXT:    maxu a0, a0, a3 +; RV64I-SFB-ZBB-NEXT:    bnez a2, .LBB2_2 +; RV64I-SFB-ZBB-NEXT:  # %bb.1: # %entry +; RV64I-SFB-ZBB-NEXT:    mv a0, a1 +; RV64I-SFB-ZBB-NEXT:  .LBB2_2: # %entry +; RV64I-SFB-ZBB-NEXT:    ret +; +; RV32I-SFBIMinMax-ZBB-LABEL: select_example_umax: +; RV32I-SFBIMinMax-ZBB:       # %bb.0: # %entry +; RV32I-SFBIMinMax-ZBB-NEXT:    beqz a2, .LBB2_2 +; RV32I-SFBIMinMax-ZBB-NEXT:  # %bb.1: # %entry +; RV32I-SFBIMinMax-ZBB-NEXT:    maxu a1, a0, a3 +; RV32I-SFBIMinMax-ZBB-NEXT:  .LBB2_2: # %entry +; RV32I-SFBIMinMax-ZBB-NEXT:    mv a0, a1 +; RV32I-SFBIMinMax-ZBB-NEXT:    ret +; +; RV64I-SFBIMinMax-ZBB-LABEL: select_example_umax: +; RV64I-SFBIMinMax-ZBB:       # %bb.0: # %entry +; RV64I-SFBIMinMax-ZBB-NEXT:    sext.w a3, a3 +; RV64I-SFBIMinMax-ZBB-NEXT:    sext.w a0, a0 +; RV64I-SFBIMinMax-ZBB-NEXT:    beqz a2, .LBB2_2 +; RV64I-SFBIMinMax-ZBB-NEXT:  # %bb.1: # %entry +; RV64I-SFBIMinMax-ZBB-NEXT:    maxu a1, a0, a3 +; RV64I-SFBIMinMax-ZBB-NEXT:  .LBB2_2: # %entry +; RV64I-SFBIMinMax-ZBB-NEXT:    mv a0, a1 +; RV64I-SFBIMinMax-ZBB-NEXT:    ret +entry: +  %res = call i32 @llvm.umax.i32(i32 %a, i32 %y) +  %sel = select i1 %x, i32 %res, i32 %b +  ret i32 %sel +} + +define i32 @select_example_umin(i32 %a, i32 %b, i1 zeroext %x, i32 %y) { +; RV32I-ZBB-LABEL: select_example_umin: +; RV32I-ZBB:       # %bb.0: # %entry +; RV32I-ZBB-NEXT:    beqz a2, .LBB3_2 +; RV32I-ZBB-NEXT:  # %bb.1: +; RV32I-ZBB-NEXT:    minu a1, a0, a3 +; RV32I-ZBB-NEXT:  .LBB3_2: # %entry +; RV32I-ZBB-NEXT:    mv a0, a1 +; RV32I-ZBB-NEXT:    ret +; +; RV64I-ZBB-LABEL: select_example_umin: +; RV64I-ZBB:       # %bb.0: # %entry +; RV64I-ZBB-NEXT:    beqz a2, .LBB3_2 +; RV64I-ZBB-NEXT:  # %bb.1: +; RV64I-ZBB-NEXT:    sext.w a3, a3 +; RV64I-ZBB-NEXT:    sext.w a0, a0 +; RV64I-ZBB-NEXT:    minu a1, a0, a3 +; RV64I-ZBB-NEXT:  .LBB3_2: # %entry +; RV64I-ZBB-NEXT:    mv a0, a1 +; RV64I-ZBB-NEXT:    ret +; +; RV32I-SFB-ZBB-LABEL: select_example_umin: +; RV32I-SFB-ZBB:       # %bb.0: # %entry +; RV32I-SFB-ZBB-NEXT:    minu a0, a0, a3 +; RV32I-SFB-ZBB-NEXT:    bnez a2, .LBB3_2 +; RV32I-SFB-ZBB-NEXT:  # %bb.1: # %entry +; RV32I-SFB-ZBB-NEXT:    mv a0, a1 +; RV32I-SFB-ZBB-NEXT:  .LBB3_2: # %entry +; RV32I-SFB-ZBB-NEXT:    ret +; +; RV64I-SFB-ZBB-LABEL: select_example_umin: +; RV64I-SFB-ZBB:       # %bb.0: # %entry +; RV64I-SFB-ZBB-NEXT:    sext.w a3, a3 +; RV64I-SFB-ZBB-NEXT:    sext.w a0, a0 +; RV64I-SFB-ZBB-NEXT:    minu a0, a0, a3 +; RV64I-SFB-ZBB-NEXT:    bnez a2, .LBB3_2 +; RV64I-SFB-ZBB-NEXT:  # %bb.1: # %entry +; RV64I-SFB-ZBB-NEXT:    mv a0, a1 +; RV64I-SFB-ZBB-NEXT:  .LBB3_2: # %entry +; RV64I-SFB-ZBB-NEXT:    ret +; +; RV32I-SFBIMinMax-ZBB-LABEL: select_example_umin: +; RV32I-SFBIMinMax-ZBB:       # %bb.0: # %entry +; RV32I-SFBIMinMax-ZBB-NEXT:    beqz a2, .LBB3_2 +; RV32I-SFBIMinMax-ZBB-NEXT:  # %bb.1: # %entry +; RV32I-SFBIMinMax-ZBB-NEXT:    minu a1, a0, a3 +; RV32I-SFBIMinMax-ZBB-NEXT:  .LBB3_2: # %entry +; RV32I-SFBIMinMax-ZBB-NEXT:    mv a0, a1 +; RV32I-SFBIMinMax-ZBB-NEXT:    ret +; +; RV64I-SFBIMinMax-ZBB-LABEL: select_example_umin: +; RV64I-SFBIMinMax-ZBB:       # %bb.0: # %entry +; RV64I-SFBIMinMax-ZBB-NEXT:    sext.w a3, a3 +; RV64I-SFBIMinMax-ZBB-NEXT:    sext.w a0, a0 +; RV64I-SFBIMinMax-ZBB-NEXT:    beqz a2, .LBB3_2 +; RV64I-SFBIMinMax-ZBB-NEXT:  # %bb.1: # %entry +; RV64I-SFBIMinMax-ZBB-NEXT:    minu a1, a0, a3 +; RV64I-SFBIMinMax-ZBB-NEXT:  .LBB3_2: # %entry +; RV64I-SFBIMinMax-ZBB-NEXT:    mv a0, a1 +; RV64I-SFBIMinMax-ZBB-NEXT:    ret +entry: +  %res = call i32 @llvm.umin.i32(i32 %a, i32 %y) +  %sel = select i1 %x, i32 %res, i32 %b +  ret i32 %sel +} + +define i64 @select_example_smax_1(i64 %a, i64 %b, i1 zeroext %x, i64 %y) { +; RV32I-ZBB-LABEL: select_example_smax_1: +; RV32I-ZBB:       # %bb.0: # %entry +; RV32I-ZBB-NEXT:    beq a1, a6, .LBB4_2 +; RV32I-ZBB-NEXT:  # %bb.1: # %entry +; RV32I-ZBB-NEXT:    slt a7, a6, a1 +; RV32I-ZBB-NEXT:    beqz a7, .LBB4_3 +; RV32I-ZBB-NEXT:    j .LBB4_4 +; RV32I-ZBB-NEXT:  .LBB4_2: +; RV32I-ZBB-NEXT:    sltu a7, a5, a0 +; RV32I-ZBB-NEXT:    bnez a7, .LBB4_4 +; RV32I-ZBB-NEXT:  .LBB4_3: # %entry +; RV32I-ZBB-NEXT:    mv a1, a6 +; RV32I-ZBB-NEXT:    mv a0, a5 +; RV32I-ZBB-NEXT:  .LBB4_4: # %entry +; RV32I-ZBB-NEXT:    beqz a4, .LBB4_6 +; RV32I-ZBB-NEXT:  # %bb.5: # %entry +; RV32I-ZBB-NEXT:    ret +; RV32I-ZBB-NEXT:  .LBB4_6: # %entry +; RV32I-ZBB-NEXT:    mv a0, a2 +; RV32I-ZBB-NEXT:    mv a1, a3 +; RV32I-ZBB-NEXT:    ret +; +; RV64I-ZBB-LABEL: select_example_smax_1: +; RV64I-ZBB:       # %bb.0: # %entry +; RV64I-ZBB-NEXT:    beqz a2, .LBB4_2 +; RV64I-ZBB-NEXT:  # %bb.1: +; RV64I-ZBB-NEXT:    max a1, a0, a3 +; RV64I-ZBB-NEXT:  .LBB4_2: # %entry +; RV64I-ZBB-NEXT:    mv a0, a1 +; RV64I-ZBB-NEXT:    ret +; +; RV32I-SFB-ZBB-LABEL: select_example_smax_1: +; RV32I-SFB-ZBB:       # %bb.0: # %entry +; RV32I-SFB-ZBB-NEXT:    sltu a7, a5, a0 +; RV32I-SFB-ZBB-NEXT:    slt t0, a6, a1 +; RV32I-SFB-ZBB-NEXT:    bne a1, a6, .LBB4_2 +; RV32I-SFB-ZBB-NEXT:  # %bb.1: # %entry +; RV32I-SFB-ZBB-NEXT:    mv t0, a7 +; RV32I-SFB-ZBB-NEXT:  .LBB4_2: # %entry +; RV32I-SFB-ZBB-NEXT:    bnez t0, .LBB4_4 +; RV32I-SFB-ZBB-NEXT:  # %bb.3: # %entry +; RV32I-SFB-ZBB-NEXT:    mv a1, a6 +; RV32I-SFB-ZBB-NEXT:  .LBB4_4: # %entry +; RV32I-SFB-ZBB-NEXT:    bnez t0, .LBB4_6 +; RV32I-SFB-ZBB-NEXT:  # %bb.5: # %entry +; RV32I-SFB-ZBB-NEXT:    mv a0, a5 +; RV32I-SFB-ZBB-NEXT:  .LBB4_6: # %entry +; RV32I-SFB-ZBB-NEXT:    bnez a4, .LBB4_8 +; RV32I-SFB-ZBB-NEXT:  # %bb.7: # %entry +; RV32I-SFB-ZBB-NEXT:    mv a0, a2 +; RV32I-SFB-ZBB-NEXT:  .LBB4_8: # %entry +; RV32I-SFB-ZBB-NEXT:    bnez a4, .LBB4_10 +; RV32I-SFB-ZBB-NEXT:  # %bb.9: # %entry +; RV32I-SFB-ZBB-NEXT:    mv a1, a3 +; RV32I-SFB-ZBB-NEXT:  .LBB4_10: # %entry +; RV32I-SFB-ZBB-NEXT:    ret +; +; RV64I-SFB-ZBB-LABEL: select_example_smax_1: +; RV64I-SFB-ZBB:       # %bb.0: # %entry +; RV64I-SFB-ZBB-NEXT:    max a0, a0, a3 +; RV64I-SFB-ZBB-NEXT:    bnez a2, .LBB4_2 +; RV64I-SFB-ZBB-NEXT:  # %bb.1: # %entry +; RV64I-SFB-ZBB-NEXT:    mv a0, a1 +; RV64I-SFB-ZBB-NEXT:  .LBB4_2: # %entry +; RV64I-SFB-ZBB-NEXT:    ret +; +; RV32I-SFBIMinMax-ZBB-LABEL: select_example_smax_1: +; RV32I-SFBIMinMax-ZBB:       # %bb.0: # %entry +; RV32I-SFBIMinMax-ZBB-NEXT:    sltu a7, a5, a0 +; RV32I-SFBIMinMax-ZBB-NEXT:    slt t0, a6, a1 +; RV32I-SFBIMinMax-ZBB-NEXT:    bne a1, a6, .LBB4_2 +; RV32I-SFBIMinMax-ZBB-NEXT:  # %bb.1: # %entry +; RV32I-SFBIMinMax-ZBB-NEXT:    mv t0, a7 +; RV32I-SFBIMinMax-ZBB-NEXT:  .LBB4_2: # %entry +; RV32I-SFBIMinMax-ZBB-NEXT:    bnez t0, .LBB4_4 +; RV32I-SFBIMinMax-ZBB-NEXT:  # %bb.3: # %entry +; RV32I-SFBIMinMax-ZBB-NEXT:    mv a1, a6 +; RV32I-SFBIMinMax-ZBB-NEXT:  .LBB4_4: # %entry +; RV32I-SFBIMinMax-ZBB-NEXT:    bnez t0, .LBB4_6 +; RV32I-SFBIMinMax-ZBB-NEXT:  # %bb.5: # %entry +; RV32I-SFBIMinMax-ZBB-NEXT:    mv a0, a5 +; RV32I-SFBIMinMax-ZBB-NEXT:  .LBB4_6: # %entry +; RV32I-SFBIMinMax-ZBB-NEXT:    bnez a4, .LBB4_8 +; RV32I-SFBIMinMax-ZBB-NEXT:  # %bb.7: # %entry +; RV32I-SFBIMinMax-ZBB-NEXT:    mv a0, a2 +; RV32I-SFBIMinMax-ZBB-NEXT:  .LBB4_8: # %entry +; RV32I-SFBIMinMax-ZBB-NEXT:    bnez a4, .LBB4_10 +; RV32I-SFBIMinMax-ZBB-NEXT:  # %bb.9: # %entry +; RV32I-SFBIMinMax-ZBB-NEXT:    mv a1, a3 +; RV32I-SFBIMinMax-ZBB-NEXT:  .LBB4_10: # %entry +; RV32I-SFBIMinMax-ZBB-NEXT:    ret +; +; RV64I-SFBIMinMax-ZBB-LABEL: select_example_smax_1: +; RV64I-SFBIMinMax-ZBB:       # %bb.0: # %entry +; RV64I-SFBIMinMax-ZBB-NEXT:    beqz a2, .LBB4_2 +; RV64I-SFBIMinMax-ZBB-NEXT:  # %bb.1: # %entry +; RV64I-SFBIMinMax-ZBB-NEXT:    max a1, a0, a3 +; RV64I-SFBIMinMax-ZBB-NEXT:  .LBB4_2: # %entry +; RV64I-SFBIMinMax-ZBB-NEXT:    mv a0, a1 +; RV64I-SFBIMinMax-ZBB-NEXT:    ret +entry: +  %res = call i64 @llvm.smax.i64(i64 %a, i64 %y) +  %sel = select i1 %x, i64 %res, i64 %b +  ret i64 %sel +} + +define i64 @select_example_smin_1(i64 %a, i64 %b, i1 zeroext %x, i64 %y) { +; RV32I-ZBB-LABEL: select_example_smin_1: +; RV32I-ZBB:       # %bb.0: # %entry +; RV32I-ZBB-NEXT:    beq a1, a6, .LBB5_2 +; RV32I-ZBB-NEXT:  # %bb.1: # %entry +; RV32I-ZBB-NEXT:    slt a7, a1, a6 +; RV32I-ZBB-NEXT:    beqz a7, .LBB5_3 +; RV32I-ZBB-NEXT:    j .LBB5_4 +; RV32I-ZBB-NEXT:  .LBB5_2: +; RV32I-ZBB-NEXT:    sltu a7, a0, a5 +; RV32I-ZBB-NEXT:    bnez a7, .LBB5_4 +; RV32I-ZBB-NEXT:  .LBB5_3: # %entry +; RV32I-ZBB-NEXT:    mv a1, a6 +; RV32I-ZBB-NEXT:    mv a0, a5 +; RV32I-ZBB-NEXT:  .LBB5_4: # %entry +; RV32I-ZBB-NEXT:    beqz a4, .LBB5_6 +; RV32I-ZBB-NEXT:  # %bb.5: # %entry +; RV32I-ZBB-NEXT:    ret +; RV32I-ZBB-NEXT:  .LBB5_6: # %entry +; RV32I-ZBB-NEXT:    mv a0, a2 +; RV32I-ZBB-NEXT:    mv a1, a3 +; RV32I-ZBB-NEXT:    ret +; +; RV64I-ZBB-LABEL: select_example_smin_1: +; RV64I-ZBB:       # %bb.0: # %entry +; RV64I-ZBB-NEXT:    beqz a2, .LBB5_2 +; RV64I-ZBB-NEXT:  # %bb.1: +; RV64I-ZBB-NEXT:    min a1, a0, a3 +; RV64I-ZBB-NEXT:  .LBB5_2: # %entry +; RV64I-ZBB-NEXT:    mv a0, a1 +; RV64I-ZBB-NEXT:    ret +; +; RV32I-SFB-ZBB-LABEL: select_example_smin_1: +; RV32I-SFB-ZBB:       # %bb.0: # %entry +; RV32I-SFB-ZBB-NEXT:    sltu a7, a0, a5 +; RV32I-SFB-ZBB-NEXT:    slt t0, a1, a6 +; RV32I-SFB-ZBB-NEXT:    bne a1, a6, .LBB5_2 +; RV32I-SFB-ZBB-NEXT:  # %bb.1: # %entry +; RV32I-SFB-ZBB-NEXT:    mv t0, a7 +; RV32I-SFB-ZBB-NEXT:  .LBB5_2: # %entry +; RV32I-SFB-ZBB-NEXT:    bnez t0, .LBB5_4 +; RV32I-SFB-ZBB-NEXT:  # %bb.3: # %entry +; RV32I-SFB-ZBB-NEXT:    mv a1, a6 +; RV32I-SFB-ZBB-NEXT:  .LBB5_4: # %entry +; RV32I-SFB-ZBB-NEXT:    bnez t0, .LBB5_6 +; RV32I-SFB-ZBB-NEXT:  # %bb.5: # %entry +; RV32I-SFB-ZBB-NEXT:    mv a0, a5 +; RV32I-SFB-ZBB-NEXT:  .LBB5_6: # %entry +; RV32I-SFB-ZBB-NEXT:    bnez a4, .LBB5_8 +; RV32I-SFB-ZBB-NEXT:  # %bb.7: # %entry +; RV32I-SFB-ZBB-NEXT:    mv a0, a2 +; RV32I-SFB-ZBB-NEXT:  .LBB5_8: # %entry +; RV32I-SFB-ZBB-NEXT:    bnez a4, .LBB5_10 +; RV32I-SFB-ZBB-NEXT:  # %bb.9: # %entry +; RV32I-SFB-ZBB-NEXT:    mv a1, a3 +; RV32I-SFB-ZBB-NEXT:  .LBB5_10: # %entry +; RV32I-SFB-ZBB-NEXT:    ret +; +; RV64I-SFB-ZBB-LABEL: select_example_smin_1: +; RV64I-SFB-ZBB:       # %bb.0: # %entry +; RV64I-SFB-ZBB-NEXT:    min a0, a0, a3 +; RV64I-SFB-ZBB-NEXT:    bnez a2, .LBB5_2 +; RV64I-SFB-ZBB-NEXT:  # %bb.1: # %entry +; RV64I-SFB-ZBB-NEXT:    mv a0, a1 +; RV64I-SFB-ZBB-NEXT:  .LBB5_2: # %entry +; RV64I-SFB-ZBB-NEXT:    ret +; +; RV32I-SFBIMinMax-ZBB-LABEL: select_example_smin_1: +; RV32I-SFBIMinMax-ZBB:       # %bb.0: # %entry +; RV32I-SFBIMinMax-ZBB-NEXT:    sltu a7, a0, a5 +; RV32I-SFBIMinMax-ZBB-NEXT:    slt t0, a1, a6 +; RV32I-SFBIMinMax-ZBB-NEXT:    bne a1, a6, .LBB5_2 +; RV32I-SFBIMinMax-ZBB-NEXT:  # %bb.1: # %entry +; RV32I-SFBIMinMax-ZBB-NEXT:    mv t0, a7 +; RV32I-SFBIMinMax-ZBB-NEXT:  .LBB5_2: # %entry +; RV32I-SFBIMinMax-ZBB-NEXT:    bnez t0, .LBB5_4 +; RV32I-SFBIMinMax-ZBB-NEXT:  # %bb.3: # %entry +; RV32I-SFBIMinMax-ZBB-NEXT:    mv a1, a6 +; RV32I-SFBIMinMax-ZBB-NEXT:  .LBB5_4: # %entry +; RV32I-SFBIMinMax-ZBB-NEXT:    bnez t0, .LBB5_6 +; RV32I-SFBIMinMax-ZBB-NEXT:  # %bb.5: # %entry +; RV32I-SFBIMinMax-ZBB-NEXT:    mv a0, a5 +; RV32I-SFBIMinMax-ZBB-NEXT:  .LBB5_6: # %entry +; RV32I-SFBIMinMax-ZBB-NEXT:    bnez a4, .LBB5_8 +; RV32I-SFBIMinMax-ZBB-NEXT:  # %bb.7: # %entry +; RV32I-SFBIMinMax-ZBB-NEXT:    mv a0, a2 +; RV32I-SFBIMinMax-ZBB-NEXT:  .LBB5_8: # %entry +; RV32I-SFBIMinMax-ZBB-NEXT:    bnez a4, .LBB5_10 +; RV32I-SFBIMinMax-ZBB-NEXT:  # %bb.9: # %entry +; RV32I-SFBIMinMax-ZBB-NEXT:    mv a1, a3 +; RV32I-SFBIMinMax-ZBB-NEXT:  .LBB5_10: # %entry +; RV32I-SFBIMinMax-ZBB-NEXT:    ret +; +; RV64I-SFBIMinMax-ZBB-LABEL: select_example_smin_1: +; RV64I-SFBIMinMax-ZBB:       # %bb.0: # %entry +; RV64I-SFBIMinMax-ZBB-NEXT:    beqz a2, .LBB5_2 +; RV64I-SFBIMinMax-ZBB-NEXT:  # %bb.1: # %entry +; RV64I-SFBIMinMax-ZBB-NEXT:    min a1, a0, a3 +; RV64I-SFBIMinMax-ZBB-NEXT:  .LBB5_2: # %entry +; RV64I-SFBIMinMax-ZBB-NEXT:    mv a0, a1 +; RV64I-SFBIMinMax-ZBB-NEXT:    ret +entry: +  %res = call i64 @llvm.smin.i64(i64 %a, i64 %y) +  %sel = select i1 %x, i64 %res, i64 %b +  ret i64 %sel +} + +define i64 @select_example_umax_1(i64 %a, i64 %b, i1 zeroext %x, i64 %y) { +; RV32I-ZBB-LABEL: select_example_umax_1: +; RV32I-ZBB:       # %bb.0: # %entry +; RV32I-ZBB-NEXT:    beq a1, a6, .LBB6_2 +; RV32I-ZBB-NEXT:  # %bb.1: # %entry +; RV32I-ZBB-NEXT:    sltu a7, a6, a1 +; RV32I-ZBB-NEXT:    beqz a7, .LBB6_3 +; RV32I-ZBB-NEXT:    j .LBB6_4 +; RV32I-ZBB-NEXT:  .LBB6_2: +; RV32I-ZBB-NEXT:    sltu a7, a5, a0 +; RV32I-ZBB-NEXT:    bnez a7, .LBB6_4 +; RV32I-ZBB-NEXT:  .LBB6_3: # %entry +; RV32I-ZBB-NEXT:    mv a1, a6 +; RV32I-ZBB-NEXT:    mv a0, a5 +; RV32I-ZBB-NEXT:  .LBB6_4: # %entry +; RV32I-ZBB-NEXT:    beqz a4, .LBB6_6 +; RV32I-ZBB-NEXT:  # %bb.5: # %entry +; RV32I-ZBB-NEXT:    ret +; RV32I-ZBB-NEXT:  .LBB6_6: # %entry +; RV32I-ZBB-NEXT:    mv a0, a2 +; RV32I-ZBB-NEXT:    mv a1, a3 +; RV32I-ZBB-NEXT:    ret +; +; RV64I-ZBB-LABEL: select_example_umax_1: +; RV64I-ZBB:       # %bb.0: # %entry +; RV64I-ZBB-NEXT:    beqz a2, .LBB6_2 +; RV64I-ZBB-NEXT:  # %bb.1: +; RV64I-ZBB-NEXT:    maxu a1, a0, a3 +; RV64I-ZBB-NEXT:  .LBB6_2: # %entry +; RV64I-ZBB-NEXT:    mv a0, a1 +; RV64I-ZBB-NEXT:    ret +; +; RV32I-SFB-ZBB-LABEL: select_example_umax_1: +; RV32I-SFB-ZBB:       # %bb.0: # %entry +; RV32I-SFB-ZBB-NEXT:    sltu a7, a5, a0 +; RV32I-SFB-ZBB-NEXT:    sltu t0, a6, a1 +; RV32I-SFB-ZBB-NEXT:    bne a1, a6, .LBB6_2 +; RV32I-SFB-ZBB-NEXT:  # %bb.1: # %entry +; RV32I-SFB-ZBB-NEXT:    mv t0, a7 +; RV32I-SFB-ZBB-NEXT:  .LBB6_2: # %entry +; RV32I-SFB-ZBB-NEXT:    bnez t0, .LBB6_4 +; RV32I-SFB-ZBB-NEXT:  # %bb.3: # %entry +; RV32I-SFB-ZBB-NEXT:    mv a1, a6 +; RV32I-SFB-ZBB-NEXT:  .LBB6_4: # %entry +; RV32I-SFB-ZBB-NEXT:    bnez t0, .LBB6_6 +; RV32I-SFB-ZBB-NEXT:  # %bb.5: # %entry +; RV32I-SFB-ZBB-NEXT:    mv a0, a5 +; RV32I-SFB-ZBB-NEXT:  .LBB6_6: # %entry +; RV32I-SFB-ZBB-NEXT:    bnez a4, .LBB6_8 +; RV32I-SFB-ZBB-NEXT:  # %bb.7: # %entry +; RV32I-SFB-ZBB-NEXT:    mv a0, a2 +; RV32I-SFB-ZBB-NEXT:  .LBB6_8: # %entry +; RV32I-SFB-ZBB-NEXT:    bnez a4, .LBB6_10 +; RV32I-SFB-ZBB-NEXT:  # %bb.9: # %entry +; RV32I-SFB-ZBB-NEXT:    mv a1, a3 +; RV32I-SFB-ZBB-NEXT:  .LBB6_10: # %entry +; RV32I-SFB-ZBB-NEXT:    ret +; +; RV64I-SFB-ZBB-LABEL: select_example_umax_1: +; RV64I-SFB-ZBB:       # %bb.0: # %entry +; RV64I-SFB-ZBB-NEXT:    maxu a0, a0, a3 +; RV64I-SFB-ZBB-NEXT:    bnez a2, .LBB6_2 +; RV64I-SFB-ZBB-NEXT:  # %bb.1: # %entry +; RV64I-SFB-ZBB-NEXT:    mv a0, a1 +; RV64I-SFB-ZBB-NEXT:  .LBB6_2: # %entry +; RV64I-SFB-ZBB-NEXT:    ret +; +; RV32I-SFBIMinMax-ZBB-LABEL: select_example_umax_1: +; RV32I-SFBIMinMax-ZBB:       # %bb.0: # %entry +; RV32I-SFBIMinMax-ZBB-NEXT:    sltu a7, a5, a0 +; RV32I-SFBIMinMax-ZBB-NEXT:    sltu t0, a6, a1 +; RV32I-SFBIMinMax-ZBB-NEXT:    bne a1, a6, .LBB6_2 +; RV32I-SFBIMinMax-ZBB-NEXT:  # %bb.1: # %entry +; RV32I-SFBIMinMax-ZBB-NEXT:    mv t0, a7 +; RV32I-SFBIMinMax-ZBB-NEXT:  .LBB6_2: # %entry +; RV32I-SFBIMinMax-ZBB-NEXT:    bnez t0, .LBB6_4 +; RV32I-SFBIMinMax-ZBB-NEXT:  # %bb.3: # %entry +; RV32I-SFBIMinMax-ZBB-NEXT:    mv a1, a6 +; RV32I-SFBIMinMax-ZBB-NEXT:  .LBB6_4: # %entry +; RV32I-SFBIMinMax-ZBB-NEXT:    bnez t0, .LBB6_6 +; RV32I-SFBIMinMax-ZBB-NEXT:  # %bb.5: # %entry +; RV32I-SFBIMinMax-ZBB-NEXT:    mv a0, a5 +; RV32I-SFBIMinMax-ZBB-NEXT:  .LBB6_6: # %entry +; RV32I-SFBIMinMax-ZBB-NEXT:    bnez a4, .LBB6_8 +; RV32I-SFBIMinMax-ZBB-NEXT:  # %bb.7: # %entry +; RV32I-SFBIMinMax-ZBB-NEXT:    mv a0, a2 +; RV32I-SFBIMinMax-ZBB-NEXT:  .LBB6_8: # %entry +; RV32I-SFBIMinMax-ZBB-NEXT:    bnez a4, .LBB6_10 +; RV32I-SFBIMinMax-ZBB-NEXT:  # %bb.9: # %entry +; RV32I-SFBIMinMax-ZBB-NEXT:    mv a1, a3 +; RV32I-SFBIMinMax-ZBB-NEXT:  .LBB6_10: # %entry +; RV32I-SFBIMinMax-ZBB-NEXT:    ret +; +; RV64I-SFBIMinMax-ZBB-LABEL: select_example_umax_1: +; RV64I-SFBIMinMax-ZBB:       # %bb.0: # %entry +; RV64I-SFBIMinMax-ZBB-NEXT:    beqz a2, .LBB6_2 +; RV64I-SFBIMinMax-ZBB-NEXT:  # %bb.1: # %entry +; RV64I-SFBIMinMax-ZBB-NEXT:    maxu a1, a0, a3 +; RV64I-SFBIMinMax-ZBB-NEXT:  .LBB6_2: # %entry +; RV64I-SFBIMinMax-ZBB-NEXT:    mv a0, a1 +; RV64I-SFBIMinMax-ZBB-NEXT:    ret +entry: +  %res = call i64 @llvm.umax.i64(i64 %a, i64 %y) +  %sel = select i1 %x, i64 %res, i64 %b +  ret i64 %sel +} + +define i64 @select_example_umin_1(i64 %a, i64 %b, i1 zeroext %x, i64 %y) { +; RV32I-ZBB-LABEL: select_example_umin_1: +; RV32I-ZBB:       # %bb.0: # %entry +; RV32I-ZBB-NEXT:    beq a1, a6, .LBB7_2 +; RV32I-ZBB-NEXT:  # %bb.1: # %entry +; RV32I-ZBB-NEXT:    sltu a7, a1, a6 +; RV32I-ZBB-NEXT:    beqz a7, .LBB7_3 +; RV32I-ZBB-NEXT:    j .LBB7_4 +; RV32I-ZBB-NEXT:  .LBB7_2: +; RV32I-ZBB-NEXT:    sltu a7, a0, a5 +; RV32I-ZBB-NEXT:    bnez a7, .LBB7_4 +; RV32I-ZBB-NEXT:  .LBB7_3: # %entry +; RV32I-ZBB-NEXT:    mv a1, a6 +; RV32I-ZBB-NEXT:    mv a0, a5 +; RV32I-ZBB-NEXT:  .LBB7_4: # %entry +; RV32I-ZBB-NEXT:    beqz a4, .LBB7_6 +; RV32I-ZBB-NEXT:  # %bb.5: # %entry +; RV32I-ZBB-NEXT:    ret +; RV32I-ZBB-NEXT:  .LBB7_6: # %entry +; RV32I-ZBB-NEXT:    mv a0, a2 +; RV32I-ZBB-NEXT:    mv a1, a3 +; RV32I-ZBB-NEXT:    ret +; +; RV64I-ZBB-LABEL: select_example_umin_1: +; RV64I-ZBB:       # %bb.0: # %entry +; RV64I-ZBB-NEXT:    beqz a2, .LBB7_2 +; RV64I-ZBB-NEXT:  # %bb.1: +; RV64I-ZBB-NEXT:    minu a1, a0, a3 +; RV64I-ZBB-NEXT:  .LBB7_2: # %entry +; RV64I-ZBB-NEXT:    mv a0, a1 +; RV64I-ZBB-NEXT:    ret +; +; RV32I-SFB-ZBB-LABEL: select_example_umin_1: +; RV32I-SFB-ZBB:       # %bb.0: # %entry +; RV32I-SFB-ZBB-NEXT:    sltu a7, a0, a5 +; RV32I-SFB-ZBB-NEXT:    sltu t0, a1, a6 +; RV32I-SFB-ZBB-NEXT:    bne a1, a6, .LBB7_2 +; RV32I-SFB-ZBB-NEXT:  # %bb.1: # %entry +; RV32I-SFB-ZBB-NEXT:    mv t0, a7 +; RV32I-SFB-ZBB-NEXT:  .LBB7_2: # %entry +; RV32I-SFB-ZBB-NEXT:    bnez t0, .LBB7_4 +; RV32I-SFB-ZBB-NEXT:  # %bb.3: # %entry +; RV32I-SFB-ZBB-NEXT:    mv a1, a6 +; RV32I-SFB-ZBB-NEXT:  .LBB7_4: # %entry +; RV32I-SFB-ZBB-NEXT:    bnez t0, .LBB7_6 +; RV32I-SFB-ZBB-NEXT:  # %bb.5: # %entry +; RV32I-SFB-ZBB-NEXT:    mv a0, a5 +; RV32I-SFB-ZBB-NEXT:  .LBB7_6: # %entry +; RV32I-SFB-ZBB-NEXT:    bnez a4, .LBB7_8 +; RV32I-SFB-ZBB-NEXT:  # %bb.7: # %entry +; RV32I-SFB-ZBB-NEXT:    mv a0, a2 +; RV32I-SFB-ZBB-NEXT:  .LBB7_8: # %entry +; RV32I-SFB-ZBB-NEXT:    bnez a4, .LBB7_10 +; RV32I-SFB-ZBB-NEXT:  # %bb.9: # %entry +; RV32I-SFB-ZBB-NEXT:    mv a1, a3 +; RV32I-SFB-ZBB-NEXT:  .LBB7_10: # %entry +; RV32I-SFB-ZBB-NEXT:    ret +; +; RV64I-SFB-ZBB-LABEL: select_example_umin_1: +; RV64I-SFB-ZBB:       # %bb.0: # %entry +; RV64I-SFB-ZBB-NEXT:    minu a0, a0, a3 +; RV64I-SFB-ZBB-NEXT:    bnez a2, .LBB7_2 +; RV64I-SFB-ZBB-NEXT:  # %bb.1: # %entry +; RV64I-SFB-ZBB-NEXT:    mv a0, a1 +; RV64I-SFB-ZBB-NEXT:  .LBB7_2: # %entry +; RV64I-SFB-ZBB-NEXT:    ret +; +; RV32I-SFBIMinMax-ZBB-LABEL: select_example_umin_1: +; RV32I-SFBIMinMax-ZBB:       # %bb.0: # %entry +; RV32I-SFBIMinMax-ZBB-NEXT:    sltu a7, a0, a5 +; RV32I-SFBIMinMax-ZBB-NEXT:    sltu t0, a1, a6 +; RV32I-SFBIMinMax-ZBB-NEXT:    bne a1, a6, .LBB7_2 +; RV32I-SFBIMinMax-ZBB-NEXT:  # %bb.1: # %entry +; RV32I-SFBIMinMax-ZBB-NEXT:    mv t0, a7 +; RV32I-SFBIMinMax-ZBB-NEXT:  .LBB7_2: # %entry +; RV32I-SFBIMinMax-ZBB-NEXT:    bnez t0, .LBB7_4 +; RV32I-SFBIMinMax-ZBB-NEXT:  # %bb.3: # %entry +; RV32I-SFBIMinMax-ZBB-NEXT:    mv a1, a6 +; RV32I-SFBIMinMax-ZBB-NEXT:  .LBB7_4: # %entry +; RV32I-SFBIMinMax-ZBB-NEXT:    bnez t0, .LBB7_6 +; RV32I-SFBIMinMax-ZBB-NEXT:  # %bb.5: # %entry +; RV32I-SFBIMinMax-ZBB-NEXT:    mv a0, a5 +; RV32I-SFBIMinMax-ZBB-NEXT:  .LBB7_6: # %entry +; RV32I-SFBIMinMax-ZBB-NEXT:    bnez a4, .LBB7_8 +; RV32I-SFBIMinMax-ZBB-NEXT:  # %bb.7: # %entry +; RV32I-SFBIMinMax-ZBB-NEXT:    mv a0, a2 +; RV32I-SFBIMinMax-ZBB-NEXT:  .LBB7_8: # %entry +; RV32I-SFBIMinMax-ZBB-NEXT:    bnez a4, .LBB7_10 +; RV32I-SFBIMinMax-ZBB-NEXT:  # %bb.9: # %entry +; RV32I-SFBIMinMax-ZBB-NEXT:    mv a1, a3 +; RV32I-SFBIMinMax-ZBB-NEXT:  .LBB7_10: # %entry +; RV32I-SFBIMinMax-ZBB-NEXT:    ret +; +; RV64I-SFBIMinMax-ZBB-LABEL: select_example_umin_1: +; RV64I-SFBIMinMax-ZBB:       # %bb.0: # %entry +; RV64I-SFBIMinMax-ZBB-NEXT:    beqz a2, .LBB7_2 +; RV64I-SFBIMinMax-ZBB-NEXT:  # %bb.1: # %entry +; RV64I-SFBIMinMax-ZBB-NEXT:    minu a1, a0, a3 +; RV64I-SFBIMinMax-ZBB-NEXT:  .LBB7_2: # %entry +; RV64I-SFBIMinMax-ZBB-NEXT:    mv a0, a1 +; RV64I-SFBIMinMax-ZBB-NEXT:    ret +entry: +  %res = call i64 @llvm.umin.i64(i64 %a, i64 %y) +  %sel = select i1 %x, i64 %res, i64 %b +  ret i64 %sel +} diff --git a/llvm/test/CodeGen/X86/basic-block-sections-bb-hash.ll b/llvm/test/CodeGen/X86/basic-block-sections-bb-hash.ll new file mode 100644 index 0000000..293b48d --- /dev/null +++ b/llvm/test/CodeGen/X86/basic-block-sections-bb-hash.ll @@ -0,0 +1,39 @@ +;; BB section test with basic block hashes. + +;; basic block sections Profile with bb hashes +; RUN: echo 'v1' > %t +; RUN: echo 'f foo' >> %t +; RUN: echo 'g 0:10,1:9,2:1 1:8,3:8 2:2,3:2 3:11' >> %t +; RUN: echo 'c 0 2 3' >> %t +; RUN: echo 'h 0:64863A11B5CA0000 1:54F1E80D6B270006 2:54F1F4E66B270008 3:C8BC6041A2CB0009' >> %t +; RUN: llc < %s -O0 -mtriple=x86_64-pc-linux -function-sections -basic-block-sections=%t | FileCheck %s +; +define void @foo(i1 zeroext) nounwind { +  %2 = alloca i8, align 1 +  %3 = zext i1 %0 to i8 +  store i8 %3, ptr %2, align 1 +  %4 = load i8, ptr %2, align 1 +  %5 = trunc i8 %4 to i1 +  br i1 %5, label %6, label %8 + +6:                                                ; preds = %1 +  %7 = call i32 @bar() +  br label %10 + +8:                                                ; preds = %1 +  %9 = call i32 @baz() +  br label %10 + +10:                                               ; preds = %8, %6 +  ret void +} + +declare i32 @bar() #1 + +declare i32 @baz() #1 + +; CHECK: .section	.text.foo,"ax",@progbits +; CHECK: callq baz +; CHECK: retq +; CHECK: .section	.text.split.foo,"ax",@progbits +; CHECK: callq bar diff --git a/llvm/test/CodeGen/X86/basic-block-sections-clusters-error.ll b/llvm/test/CodeGen/X86/basic-block-sections-clusters-error.ll index 751ab76..eb0a14b 100644 --- a/llvm/test/CodeGen/X86/basic-block-sections-clusters-error.ll +++ b/llvm/test/CodeGen/X86/basic-block-sections-clusters-error.ll @@ -69,6 +69,20 @@  ; RUN: echo 'g 0:4,1:2:3' >> %t15  ; RUN: not --crash llc < %s -O0 -mtriple=x86_64 -function-sections -basic-block-sections=%t15 2>&1 | FileCheck %s --check-prefix=CHECK-ERROR15  ; CHECK-ERROR15: LLVM ERROR: invalid profile {{.*}} at line 4: unsigned integer expected: '2:3' +; RUN: echo 'v1' > %t16 +; RUN: echo 'f dummy1' >> %t16 +; RUN: echo 'c 0 1' >> %t16 +; RUN: echo 'g 0:4,1:2' >> %t16 +; RUN: echo 'h a:1111111111111111 1:ffffffffffffffff' >> %t16 +; RUN: not --crash llc < %s -O0 -mtriple=x86_64 -function-sections -basic-block-sections=%t16 2>&1 | FileCheck %s --check-prefix=CHECK-ERROR16 +; CHECK-ERROR16: LLVM ERROR: invalid profile {{.*}} at line 5: unsigned integer expected: 'a' +; RUN: echo 'v1' > %t17 +; RUN: echo 'f dummy1' >> %t17 +; RUN: echo 'c 0 1' >> %t17 +; RUN: echo 'g 0:4,1:2' >> %t17 +; RUN: echo 'h 0:111111111111111g 1:ffffffffffffffff' >> %t17 +; RUN: not --crash llc < %s -O0 -mtriple=x86_64 -function-sections -basic-block-sections=%t17 2>&1 | FileCheck %s --check-prefix=CHECK-ERROR17 +; CHECK-ERROR17: LLVM ERROR: invalid profile {{.*}} at line 5: unsigned integer expected in hex format: '111111111111111g'  define i32 @dummy1(i32 %x, i32 %y, i32 %z) { diff --git a/llvm/test/CodeGen/X86/bittest-big-integer.ll b/llvm/test/CodeGen/X86/bittest-big-integer.ll index cc3dcf3..06e7d47 100644 --- a/llvm/test/CodeGen/X86/bittest-big-integer.ll +++ b/llvm/test/CodeGen/X86/bittest-big-integer.ll @@ -1676,3 +1676,291 @@ define i1 @test_ne_i4096(ptr %word, i32 %position) nounwind {    %cmp = icmp ne i4096 %test, 0    ret i1 %cmp  } + +; Special Cases + +; Multiple uses of the stored value +define i1 @complement_cmpz_i128(ptr %word, i32 %position) nounwind { +; X86-LABEL: complement_cmpz_i128: +; X86:       # %bb.0: +; X86-NEXT:    pushl %ebp +; X86-NEXT:    movl %esp, %ebp +; X86-NEXT:    pushl %ebx +; X86-NEXT:    pushl %edi +; X86-NEXT:    pushl %esi +; X86-NEXT:    andl $-16, %esp +; X86-NEXT:    subl $64, %esp +; X86-NEXT:    movzbl 12(%ebp), %ecx +; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) +; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) +; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) +; X86-NEXT:    movl $1, {{[0-9]+}}(%esp) +; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) +; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) +; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) +; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) +; X86-NEXT:    movl %ecx, %eax +; X86-NEXT:    shrb $3, %al +; X86-NEXT:    andb $12, %al +; X86-NEXT:    negb %al +; X86-NEXT:    movsbl %al, %esi +; X86-NEXT:    movl 36(%esp,%esi), %eax +; X86-NEXT:    movl 40(%esp,%esi), %edi +; X86-NEXT:    movl %edi, %edx +; X86-NEXT:    shldl %cl, %eax, %edx +; X86-NEXT:    movl 32(%esp,%esi), %ebx +; X86-NEXT:    movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT:    movl 44(%esp,%esi), %esi +; X86-NEXT:    shldl %cl, %edi, %esi +; X86-NEXT:    movl %ebx, %edi +; X86-NEXT:    shll %cl, %edi +; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload +; X86-NEXT:    shldl %cl, %ebx, %eax +; X86-NEXT:    movl 8(%ebp), %ecx +; X86-NEXT:    xorl 12(%ecx), %esi +; X86-NEXT:    xorl 8(%ecx), %edx +; X86-NEXT:    xorl 4(%ecx), %eax +; X86-NEXT:    xorl (%ecx), %edi +; X86-NEXT:    movl %edx, 8(%ecx) +; X86-NEXT:    movl %esi, 12(%ecx) +; X86-NEXT:    movl %edi, (%ecx) +; X86-NEXT:    movl %eax, 4(%ecx) +; X86-NEXT:    orl %esi, %eax +; X86-NEXT:    orl %edx, %edi +; X86-NEXT:    orl %eax, %edi +; X86-NEXT:    setne %al +; X86-NEXT:    leal -12(%ebp), %esp +; X86-NEXT:    popl %esi +; X86-NEXT:    popl %edi +; X86-NEXT:    popl %ebx +; X86-NEXT:    popl %ebp +; X86-NEXT:    retl +; +; SSE-LABEL: complement_cmpz_i128: +; SSE:       # %bb.0: +; SSE-NEXT:    movl %esi, %ecx +; SSE-NEXT:    movl $1, %eax +; SSE-NEXT:    xorl %edx, %edx +; SSE-NEXT:    shldq %cl, %rax, %rdx +; SSE-NEXT:    shlq %cl, %rax +; SSE-NEXT:    xorl %esi, %esi +; SSE-NEXT:    testb $64, %cl +; SSE-NEXT:    cmovneq %rax, %rdx +; SSE-NEXT:    cmovneq %rsi, %rax +; SSE-NEXT:    xorq 8(%rdi), %rdx +; SSE-NEXT:    xorq (%rdi), %rax +; SSE-NEXT:    movq %rax, (%rdi) +; SSE-NEXT:    movq %rdx, 8(%rdi) +; SSE-NEXT:    orq %rdx, %rax +; SSE-NEXT:    setne %al +; SSE-NEXT:    retq +; +; AVX2-LABEL: complement_cmpz_i128: +; AVX2:       # %bb.0: +; AVX2-NEXT:    movl %esi, %ecx +; AVX2-NEXT:    movl $1, %eax +; AVX2-NEXT:    xorl %edx, %edx +; AVX2-NEXT:    shldq %cl, %rax, %rdx +; AVX2-NEXT:    xorl %esi, %esi +; AVX2-NEXT:    shlxq %rcx, %rax, %rax +; AVX2-NEXT:    testb $64, %cl +; AVX2-NEXT:    cmovneq %rax, %rdx +; AVX2-NEXT:    cmovneq %rsi, %rax +; AVX2-NEXT:    xorq 8(%rdi), %rdx +; AVX2-NEXT:    xorq (%rdi), %rax +; AVX2-NEXT:    movq %rax, (%rdi) +; AVX2-NEXT:    movq %rdx, 8(%rdi) +; AVX2-NEXT:    orq %rdx, %rax +; AVX2-NEXT:    setne %al +; AVX2-NEXT:    retq +; +; AVX512-LABEL: complement_cmpz_i128: +; AVX512:       # %bb.0: +; AVX512-NEXT:    movl %esi, %ecx +; AVX512-NEXT:    xorl %eax, %eax +; AVX512-NEXT:    movl $1, %edx +; AVX512-NEXT:    xorl %esi, %esi +; AVX512-NEXT:    shldq %cl, %rdx, %rsi +; AVX512-NEXT:    shlxq %rcx, %rdx, %rdx +; AVX512-NEXT:    testb $64, %cl +; AVX512-NEXT:    cmovneq %rdx, %rsi +; AVX512-NEXT:    cmovneq %rax, %rdx +; AVX512-NEXT:    xorq 8(%rdi), %rsi +; AVX512-NEXT:    xorq (%rdi), %rdx +; AVX512-NEXT:    movq %rdx, (%rdi) +; AVX512-NEXT:    movq %rsi, 8(%rdi) +; AVX512-NEXT:    orq %rsi, %rdx +; AVX512-NEXT:    setne %al +; AVX512-NEXT:    retq +  %rem = and i32 %position, 127 +  %ofs = zext nneg i32 %rem to i128 +  %bit = shl nuw i128 1, %ofs +  %ld = load i128, ptr %word +  %res = xor i128 %ld, %bit +  store i128 %res, ptr %word +  %cmp = icmp ne i128 %res, 0 +  ret i1 %cmp +} + +; Multiple loads in store chain +define i32 @reset_multiload_i128(ptr %word, i32 %position, ptr %p) nounwind { +; X86-LABEL: reset_multiload_i128: +; X86:       # %bb.0: +; X86-NEXT:    pushl %ebp +; X86-NEXT:    movl %esp, %ebp +; X86-NEXT:    pushl %ebx +; X86-NEXT:    pushl %edi +; X86-NEXT:    pushl %esi +; X86-NEXT:    andl $-16, %esp +; X86-NEXT:    subl $64, %esp +; X86-NEXT:    movl 12(%ebp), %ecx +; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) +; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) +; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) +; X86-NEXT:    movl $1, {{[0-9]+}}(%esp) +; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) +; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) +; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) +; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) +; X86-NEXT:    movl %ecx, %eax +; X86-NEXT:    shrb $3, %al +; X86-NEXT:    andb $12, %al +; X86-NEXT:    negb %al +; X86-NEXT:    movsbl %al, %edi +; X86-NEXT:    movl 36(%esp,%edi), %edx +; X86-NEXT:    movl 40(%esp,%edi), %ebx +; X86-NEXT:    movl %ebx, %esi +; X86-NEXT:    shldl %cl, %edx, %esi +; X86-NEXT:    movl 32(%esp,%edi), %eax +; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT:    movl 44(%esp,%edi), %edi +; X86-NEXT:    shldl %cl, %ebx, %edi +; X86-NEXT:    movl %eax, %ebx +; X86-NEXT:    # kill: def $cl killed $cl killed $ecx +; X86-NEXT:    shll %cl, %ebx +; X86-NEXT:    notl %ebx +; X86-NEXT:    movl 16(%ebp), %eax +; X86-NEXT:    movl (%eax), %eax +; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT:    movl 12(%ebp), %eax +; X86-NEXT:    andl $96, %eax +; X86-NEXT:    shrl $3, %eax +; X86-NEXT:    movl 8(%ebp), %ecx +; X86-NEXT:    movl (%ecx,%eax), %eax +; X86-NEXT:    andl %ebx, (%ecx) +; X86-NEXT:    movl 12(%ebp), %ecx +; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload +; X86-NEXT:    shldl %cl, %ebx, %edx +; X86-NEXT:    notl %edx +; X86-NEXT:    movl 8(%ebp), %ebx +; X86-NEXT:    andl %edx, 4(%ebx) +; X86-NEXT:    notl %esi +; X86-NEXT:    andl %esi, 8(%ebx) +; X86-NEXT:    notl %edi +; X86-NEXT:    andl %edi, 12(%ebx) +; X86-NEXT:    btl %ecx, %eax +; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X86-NEXT:    jae .LBB22_2 +; X86-NEXT:  # %bb.1: +; X86-NEXT:    xorl %eax, %eax +; X86-NEXT:  .LBB22_2: +; X86-NEXT:    leal -12(%ebp), %esp +; X86-NEXT:    popl %esi +; X86-NEXT:    popl %edi +; X86-NEXT:    popl %ebx +; X86-NEXT:    popl %ebp +; X86-NEXT:    retl +; +; SSE-LABEL: reset_multiload_i128: +; SSE:       # %bb.0: +; SSE-NEXT:    movl %esi, %ecx +; SSE-NEXT:    movl $1, %esi +; SSE-NEXT:    xorl %r8d, %r8d +; SSE-NEXT:    shldq %cl, %rsi, %r8 +; SSE-NEXT:    xorl %eax, %eax +; SSE-NEXT:    shlq %cl, %rsi +; SSE-NEXT:    testb $64, %cl +; SSE-NEXT:    cmovneq %rsi, %r8 +; SSE-NEXT:    cmovneq %rax, %rsi +; SSE-NEXT:    notq %r8 +; SSE-NEXT:    notq %rsi +; SSE-NEXT:    movl %ecx, %r9d +; SSE-NEXT:    andl $96, %r9d +; SSE-NEXT:    shrl $3, %r9d +; SSE-NEXT:    movl (%rdi,%r9), %r9d +; SSE-NEXT:    btl %ecx, %r9d +; SSE-NEXT:    jb .LBB22_2 +; SSE-NEXT:  # %bb.1: +; SSE-NEXT:    movl (%rdx), %eax +; SSE-NEXT:  .LBB22_2: +; SSE-NEXT:    andq %r8, 8(%rdi) +; SSE-NEXT:    andq %rsi, (%rdi) +; SSE-NEXT:    # kill: def $eax killed $eax killed $rax +; SSE-NEXT:    retq +; +; AVX2-LABEL: reset_multiload_i128: +; AVX2:       # %bb.0: +; AVX2-NEXT:    movl %esi, %ecx +; AVX2-NEXT:    xorl %eax, %eax +; AVX2-NEXT:    movl $1, %r8d +; AVX2-NEXT:    xorl %esi, %esi +; AVX2-NEXT:    shldq %cl, %r8, %rsi +; AVX2-NEXT:    shlxq %rcx, %r8, %r8 +; AVX2-NEXT:    testb $64, %cl +; AVX2-NEXT:    cmovneq %r8, %rsi +; AVX2-NEXT:    cmovneq %rax, %r8 +; AVX2-NEXT:    notq %rsi +; AVX2-NEXT:    notq %r8 +; AVX2-NEXT:    movl %ecx, %r9d +; AVX2-NEXT:    andl $96, %r9d +; AVX2-NEXT:    shrl $3, %r9d +; AVX2-NEXT:    movl (%rdi,%r9), %r9d +; AVX2-NEXT:    btl %ecx, %r9d +; AVX2-NEXT:    jb .LBB22_2 +; AVX2-NEXT:  # %bb.1: +; AVX2-NEXT:    movl (%rdx), %eax +; AVX2-NEXT:  .LBB22_2: +; AVX2-NEXT:    andq %rsi, 8(%rdi) +; AVX2-NEXT:    andq %r8, (%rdi) +; AVX2-NEXT:    # kill: def $eax killed $eax killed $rax +; AVX2-NEXT:    retq +; +; AVX512-LABEL: reset_multiload_i128: +; AVX512:       # %bb.0: +; AVX512-NEXT:    movl %esi, %ecx +; AVX512-NEXT:    movl $1, %r8d +; AVX512-NEXT:    xorl %esi, %esi +; AVX512-NEXT:    shldq %cl, %r8, %rsi +; AVX512-NEXT:    xorl %eax, %eax +; AVX512-NEXT:    shlxq %rcx, %r8, %r8 +; AVX512-NEXT:    testb $64, %cl +; AVX512-NEXT:    cmovneq %r8, %rsi +; AVX512-NEXT:    cmovneq %rax, %r8 +; AVX512-NEXT:    notq %rsi +; AVX512-NEXT:    notq %r8 +; AVX512-NEXT:    movl %ecx, %r9d +; AVX512-NEXT:    andl $96, %r9d +; AVX512-NEXT:    shrl $3, %r9d +; AVX512-NEXT:    movl (%rdi,%r9), %r9d +; AVX512-NEXT:    btl %ecx, %r9d +; AVX512-NEXT:    jb .LBB22_2 +; AVX512-NEXT:  # %bb.1: +; AVX512-NEXT:    movl (%rdx), %eax +; AVX512-NEXT:  .LBB22_2: +; AVX512-NEXT:    andq %rsi, 8(%rdi) +; AVX512-NEXT:    andq %r8, (%rdi) +; AVX512-NEXT:    # kill: def $eax killed $eax killed $rax +; AVX512-NEXT:    retq +  %rem = and i32 %position, 127 +  %ofs = zext nneg i32 %rem to i128 +  %bit = shl nuw i128 1, %ofs +  %mask = xor i128 %bit, -1 +  %ld = load i128, ptr %word +  %sel = load i32, ptr %p +  %test = and i128 %ld, %bit +  %res = and i128 %ld, %mask +  %cmp = icmp eq i128 %test, 0 +  store i128 %res, ptr %word +  %ret = select i1 %cmp, i32 %sel, i32 0 +  ret i32 %ret +} diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/replicating-load-store-costs.ll b/llvm/test/Transforms/LoopVectorize/AArch64/replicating-load-store-costs.ll index 7f34513..68cfc65 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/replicating-load-store-costs.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/replicating-load-store-costs.ll @@ -660,6 +660,114 @@ exit:    ret i32 %red  } + +define i32 @test_or_reduction_with_stride_2(i32 %scale, ptr %src) { +; CHECK-LABEL: define i32 @test_or_reduction_with_stride_2( +; CHECK-SAME: i32 [[SCALE:%.*]], ptr [[SRC:%.*]]) { +; CHECK-NEXT:  [[ENTRY:.*:]] +; CHECK-NEXT:    br label %[[VECTOR_PH:.*]] +; CHECK:       [[VECTOR_PH]]: +; CHECK-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <16 x i32> poison, i32 [[SCALE]], i64 0 +; CHECK-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <16 x i32> [[BROADCAST_SPLATINSERT]], <16 x i32> poison, <16 x i32> zeroinitializer +; CHECK-NEXT:    br label %[[VECTOR_BODY:.*]] +; CHECK:       [[VECTOR_BODY]]: +; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi <16 x i32> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP66:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT:    [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 2 +; CHECK-NEXT:    [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 0 +; CHECK-NEXT:    [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 2 +; CHECK-NEXT:    [[TMP2:%.*]] = add i64 [[OFFSET_IDX]], 4 +; CHECK-NEXT:    [[TMP3:%.*]] = add i64 [[OFFSET_IDX]], 6 +; CHECK-NEXT:    [[TMP4:%.*]] = add i64 [[OFFSET_IDX]], 8 +; CHECK-NEXT:    [[TMP5:%.*]] = add i64 [[OFFSET_IDX]], 10 +; CHECK-NEXT:    [[TMP6:%.*]] = add i64 [[OFFSET_IDX]], 12 +; CHECK-NEXT:    [[TMP7:%.*]] = add i64 [[OFFSET_IDX]], 14 +; CHECK-NEXT:    [[TMP8:%.*]] = add i64 [[OFFSET_IDX]], 16 +; CHECK-NEXT:    [[TMP9:%.*]] = add i64 [[OFFSET_IDX]], 18 +; CHECK-NEXT:    [[TMP10:%.*]] = add i64 [[OFFSET_IDX]], 20 +; CHECK-NEXT:    [[TMP11:%.*]] = add i64 [[OFFSET_IDX]], 22 +; CHECK-NEXT:    [[TMP12:%.*]] = add i64 [[OFFSET_IDX]], 24 +; CHECK-NEXT:    [[TMP13:%.*]] = add i64 [[OFFSET_IDX]], 26 +; CHECK-NEXT:    [[TMP14:%.*]] = add i64 [[OFFSET_IDX]], 28 +; CHECK-NEXT:    [[TMP15:%.*]] = add i64 [[OFFSET_IDX]], 30 +; CHECK-NEXT:    [[TMP16:%.*]] = getelementptr [32 x i8], ptr [[SRC]], i64 [[TMP0]] +; CHECK-NEXT:    [[TMP17:%.*]] = getelementptr [32 x i8], ptr [[SRC]], i64 [[TMP1]] +; CHECK-NEXT:    [[TMP18:%.*]] = getelementptr [32 x i8], ptr [[SRC]], i64 [[TMP2]] +; CHECK-NEXT:    [[TMP19:%.*]] = getelementptr [32 x i8], ptr [[SRC]], i64 [[TMP3]] +; CHECK-NEXT:    [[TMP20:%.*]] = getelementptr [32 x i8], ptr [[SRC]], i64 [[TMP4]] +; CHECK-NEXT:    [[TMP21:%.*]] = getelementptr [32 x i8], ptr [[SRC]], i64 [[TMP5]] +; CHECK-NEXT:    [[TMP22:%.*]] = getelementptr [32 x i8], ptr [[SRC]], i64 [[TMP6]] +; CHECK-NEXT:    [[TMP23:%.*]] = getelementptr [32 x i8], ptr [[SRC]], i64 [[TMP7]] +; CHECK-NEXT:    [[TMP24:%.*]] = getelementptr [32 x i8], ptr [[SRC]], i64 [[TMP8]] +; CHECK-NEXT:    [[TMP25:%.*]] = getelementptr [32 x i8], ptr [[SRC]], i64 [[TMP9]] +; CHECK-NEXT:    [[TMP26:%.*]] = getelementptr [32 x i8], ptr [[SRC]], i64 [[TMP10]] +; CHECK-NEXT:    [[TMP27:%.*]] = getelementptr [32 x i8], ptr [[SRC]], i64 [[TMP11]] +; CHECK-NEXT:    [[TMP28:%.*]] = getelementptr [32 x i8], ptr [[SRC]], i64 [[TMP12]] +; CHECK-NEXT:    [[TMP29:%.*]] = getelementptr [32 x i8], ptr [[SRC]], i64 [[TMP13]] +; CHECK-NEXT:    [[TMP30:%.*]] = getelementptr [32 x i8], ptr [[SRC]], i64 [[TMP14]] +; CHECK-NEXT:    [[TMP31:%.*]] = getelementptr [32 x i8], ptr [[SRC]], i64 [[TMP15]] +; CHECK-NEXT:    [[TMP32:%.*]] = load i8, ptr [[TMP16]], align 1 +; CHECK-NEXT:    [[TMP33:%.*]] = load i8, ptr [[TMP17]], align 1 +; CHECK-NEXT:    [[TMP34:%.*]] = load i8, ptr [[TMP18]], align 1 +; CHECK-NEXT:    [[TMP35:%.*]] = load i8, ptr [[TMP19]], align 1 +; CHECK-NEXT:    [[TMP36:%.*]] = load i8, ptr [[TMP20]], align 1 +; CHECK-NEXT:    [[TMP37:%.*]] = load i8, ptr [[TMP21]], align 1 +; CHECK-NEXT:    [[TMP38:%.*]] = load i8, ptr [[TMP22]], align 1 +; CHECK-NEXT:    [[TMP39:%.*]] = load i8, ptr [[TMP23]], align 1 +; CHECK-NEXT:    [[TMP40:%.*]] = load i8, ptr [[TMP24]], align 1 +; CHECK-NEXT:    [[TMP41:%.*]] = load i8, ptr [[TMP25]], align 1 +; CHECK-NEXT:    [[TMP42:%.*]] = load i8, ptr [[TMP26]], align 1 +; CHECK-NEXT:    [[TMP43:%.*]] = load i8, ptr [[TMP27]], align 1 +; CHECK-NEXT:    [[TMP44:%.*]] = load i8, ptr [[TMP28]], align 1 +; CHECK-NEXT:    [[TMP45:%.*]] = load i8, ptr [[TMP29]], align 1 +; CHECK-NEXT:    [[TMP46:%.*]] = load i8, ptr [[TMP30]], align 1 +; CHECK-NEXT:    [[TMP47:%.*]] = load i8, ptr [[TMP31]], align 1 +; CHECK-NEXT:    [[TMP48:%.*]] = insertelement <16 x i8> poison, i8 [[TMP32]], i32 0 +; CHECK-NEXT:    [[TMP49:%.*]] = insertelement <16 x i8> [[TMP48]], i8 [[TMP33]], i32 1 +; CHECK-NEXT:    [[TMP50:%.*]] = insertelement <16 x i8> [[TMP49]], i8 [[TMP34]], i32 2 +; CHECK-NEXT:    [[TMP51:%.*]] = insertelement <16 x i8> [[TMP50]], i8 [[TMP35]], i32 3 +; CHECK-NEXT:    [[TMP52:%.*]] = insertelement <16 x i8> [[TMP51]], i8 [[TMP36]], i32 4 +; CHECK-NEXT:    [[TMP53:%.*]] = insertelement <16 x i8> [[TMP52]], i8 [[TMP37]], i32 5 +; CHECK-NEXT:    [[TMP54:%.*]] = insertelement <16 x i8> [[TMP53]], i8 [[TMP38]], i32 6 +; CHECK-NEXT:    [[TMP55:%.*]] = insertelement <16 x i8> [[TMP54]], i8 [[TMP39]], i32 7 +; CHECK-NEXT:    [[TMP56:%.*]] = insertelement <16 x i8> [[TMP55]], i8 [[TMP40]], i32 8 +; CHECK-NEXT:    [[TMP57:%.*]] = insertelement <16 x i8> [[TMP56]], i8 [[TMP41]], i32 9 +; CHECK-NEXT:    [[TMP58:%.*]] = insertelement <16 x i8> [[TMP57]], i8 [[TMP42]], i32 10 +; CHECK-NEXT:    [[TMP59:%.*]] = insertelement <16 x i8> [[TMP58]], i8 [[TMP43]], i32 11 +; CHECK-NEXT:    [[TMP60:%.*]] = insertelement <16 x i8> [[TMP59]], i8 [[TMP44]], i32 12 +; CHECK-NEXT:    [[TMP61:%.*]] = insertelement <16 x i8> [[TMP60]], i8 [[TMP45]], i32 13 +; CHECK-NEXT:    [[TMP62:%.*]] = insertelement <16 x i8> [[TMP61]], i8 [[TMP46]], i32 14 +; CHECK-NEXT:    [[TMP63:%.*]] = insertelement <16 x i8> [[TMP62]], i8 [[TMP47]], i32 15 +; CHECK-NEXT:    [[TMP64:%.*]] = sext <16 x i8> [[TMP63]] to <16 x i32> +; CHECK-NEXT:    [[TMP65:%.*]] = mul <16 x i32> [[BROADCAST_SPLAT]], [[TMP64]] +; CHECK-NEXT:    [[TMP66]] = or <16 x i32> [[TMP65]], [[VEC_PHI]] +; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16 +; CHECK-NEXT:    [[TMP67:%.*]] = icmp eq i64 [[INDEX_NEXT]], 48 +; CHECK-NEXT:    br i1 [[TMP67]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP22:![0-9]+]] +; CHECK:       [[MIDDLE_BLOCK]]: +; CHECK-NEXT:    [[TMP68:%.*]] = call i32 @llvm.vector.reduce.or.v16i32(<16 x i32> [[TMP66]]) +; CHECK-NEXT:    br label %[[SCALAR_PH:.*]] +; CHECK:       [[SCALAR_PH]]: +; +entry: +  br label %loop + +loop: +  %iv = phi i64 [ %iv.next, %loop ], [ 0, %entry ] +  %reduction = phi i32 [ %reduction.next, %loop ], [ 0, %entry ] +  %gep = getelementptr [32 x i8], ptr %src, i64 %iv +  %load = load i8, ptr %gep, align 1 +  %sext = sext i8 %load to i32 +  %mul = mul i32 %scale, %sext +  %reduction.next = or i32 %mul, %reduction +  %iv.next = add i64 %iv, 2 +  %cmp = icmp eq i64 %iv.next, 100 +  br i1 %cmp, label %exit, label %loop + +exit: +  ret i32 %reduction.next +} +  attributes #0 = { "target-cpu"="neoverse-512tvb" }  !0 = !{!1, !2, i64 0} diff --git a/llvm/test/Transforms/LoopVectorize/X86/uniform_load.ll b/llvm/test/Transforms/LoopVectorize/X86/uniform_load.ll index d4004da..8081c0e 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/uniform_load.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/uniform_load.ll @@ -64,39 +64,24 @@ exit:  define void @uniform_load_can_fold_users(ptr noalias %src, ptr %dst, i64 %start, double %d) {  ; CHECK-LABEL: define void @uniform_load_can_fold_users(  ; CHECK-SAME: ptr noalias [[SRC:%.*]], ptr [[DST:%.*]], i64 [[START:%.*]], double [[D:%.*]]) { -; CHECK-NEXT:  [[ENTRY:.*:]] -; CHECK-NEXT:    [[TMP0:%.*]] = add i64 [[START]], 1 -; CHECK-NEXT:    [[SMIN:%.*]] = call i64 @llvm.smin.i64(i64 [[START]], i64 0) -; CHECK-NEXT:    [[TMP1:%.*]] = sub i64 [[TMP0]], [[SMIN]] -; CHECK-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP1]], 2 -; CHECK-NEXT:    br i1 [[MIN_ITERS_CHECK]], [[SCALAR_PH:label %.*]], label %[[VECTOR_PH:.*]] -; CHECK:       [[VECTOR_PH]]: -; CHECK-NEXT:    [[N_MOD_VF:%.*]] = urem i64 [[TMP1]], 2 -; CHECK-NEXT:    [[N_VEC:%.*]] = sub i64 [[TMP1]], [[N_MOD_VF]] -; CHECK-NEXT:    [[TMP2:%.*]] = sub i64 [[START]], [[N_VEC]] -; CHECK-NEXT:    br label %[[VECTOR_BODY:.*]] -; CHECK:       [[VECTOR_BODY]]: -; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] -; CHECK-NEXT:    [[TMP3:%.*]] = add i64 [[INDEX]], 0 -; CHECK-NEXT:    [[TMP4:%.*]] = add i64 [[INDEX]], 1 +; CHECK-NEXT:  [[ENTRY:.*]]: +; CHECK-NEXT:    br label %[[LOOP:.*]] +; CHECK:       [[LOOP]]: +; CHECK-NEXT:    [[TMP4:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[IV_1_NEXT:%.*]], %[[LOOP]] ] +; CHECK-NEXT:    [[IV_2:%.*]] = phi i64 [ [[START]], %[[ENTRY]] ], [ [[IV_2_NEXT:%.*]], %[[LOOP]] ]  ; CHECK-NEXT:    [[TMP5:%.*]] = load double, ptr [[SRC]], align 8 -; CHECK-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x double> poison, double [[TMP5]], i64 0 -; CHECK-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x double> [[BROADCAST_SPLATINSERT]], <2 x double> poison, <2 x i32> zeroinitializer -; CHECK-NEXT:    [[TMP6:%.*]] = fmul <2 x double> [[BROADCAST_SPLAT]], splat (double 9.000000e+00) -; CHECK-NEXT:    [[TMP7:%.*]] = extractelement <2 x double> [[TMP6]], i32 0 +; CHECK-NEXT:    [[TMP7:%.*]] = fmul double [[TMP5]], 9.000000e+00  ; CHECK-NEXT:    [[TMP8:%.*]] = fdiv double [[TMP7]], [[D]] -; CHECK-NEXT:    [[TMP9:%.*]] = sub i64 [[TMP3]], 1  ; CHECK-NEXT:    [[TMP10:%.*]] = sub i64 [[TMP4]], 1 -; CHECK-NEXT:    [[TMP11:%.*]] = getelementptr double, ptr [[DST]], i64 [[TMP3]]  ; CHECK-NEXT:    [[TMP12:%.*]] = getelementptr double, ptr [[DST]], i64 [[TMP4]] -; CHECK-NEXT:    [[TMP13:%.*]] = getelementptr double, ptr [[TMP11]], i64 [[TMP9]]  ; CHECK-NEXT:    [[TMP14:%.*]] = getelementptr double, ptr [[TMP12]], i64 [[TMP10]] -; CHECK-NEXT:    store double [[TMP8]], ptr [[TMP13]], align 8  ; CHECK-NEXT:    store double [[TMP8]], ptr [[TMP14]], align 8 -; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 -; CHECK-NEXT:    [[TMP15:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT:    br i1 [[TMP15]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] -; CHECK:       [[MIDDLE_BLOCK]]: +; CHECK-NEXT:    [[IV_1_NEXT]] = add i64 [[TMP4]], 1 +; CHECK-NEXT:    [[IV_2_NEXT]] = add i64 [[IV_2]], -1 +; CHECK-NEXT:    [[EC:%.*]] = icmp sgt i64 [[IV_2]], 0 +; CHECK-NEXT:    br i1 [[EC]], label %[[LOOP]], label %[[EXIT:.*]] +; CHECK:       [[EXIT]]: +; CHECK-NEXT:    ret void  ;  entry:    br label %loop diff --git a/llvm/test/Transforms/LoopVectorize/single-scalar-cast-minbw.ll b/llvm/test/Transforms/LoopVectorize/single-scalar-cast-minbw.ll index 9a69982..70adac2 100644 --- a/llvm/test/Transforms/LoopVectorize/single-scalar-cast-minbw.ll +++ b/llvm/test/Transforms/LoopVectorize/single-scalar-cast-minbw.ll @@ -84,12 +84,8 @@ define void @single_scalar_cast_stored(ptr %src, ptr %dst, i32 %n) {  ; CHECK:       [[VECTOR_BODY]]:  ; CHECK-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]  ; CHECK-NEXT:    [[TMP0:%.*]] = load i16, ptr [[SRC]], align 2, !alias.scope [[META4:![0-9]+]] -; CHECK-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i16> poison, i16 [[TMP0]], i64 0 -; CHECK-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i16> [[BROADCAST_SPLATINSERT]], <4 x i16> poison, <4 x i32> zeroinitializer -; CHECK-NEXT:    [[TMP1:%.*]] = icmp eq <4 x i16> [[BROADCAST_SPLAT]], zeroinitializer -; CHECK-NEXT:    [[TMP2:%.*]] = and <4 x i16> [[BROADCAST_SPLAT]], splat (i16 15) -; CHECK-NEXT:    [[TMP3:%.*]] = extractelement <4 x i1> [[TMP1]], i32 0 -; CHECK-NEXT:    [[TMP4:%.*]] = extractelement <4 x i16> [[TMP2]], i32 0 +; CHECK-NEXT:    [[TMP3:%.*]] = icmp eq i16 [[TMP0]], 0 +; CHECK-NEXT:    [[TMP4:%.*]] = and i16 [[TMP0]], 15  ; CHECK-NEXT:    [[TMP5:%.*]] = select i1 [[TMP3]], i16 0, i16 [[TMP4]]  ; CHECK-NEXT:    store i16 [[TMP5]], ptr [[DST]], align 2, !alias.scope [[META7:![0-9]+]], !noalias [[META4]]  ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4 diff --git a/llvm/test/Transforms/SimplifyCFG/pr165301.ll b/llvm/test/Transforms/SimplifyCFG/pr165301.ll new file mode 100644 index 0000000..4a539d7 --- /dev/null +++ b/llvm/test/Transforms/SimplifyCFG/pr165301.ll @@ -0,0 +1,26 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6 +; RUN: opt -S -passes="simplifycfg<switch-range-to-icmp>" < %s | FileCheck %s + +; Make sure there's no use after free when removing incoming values from PHI nodes + +define i32 @pr165301(i1 %cond) { +; CHECK-LABEL: define i32 @pr165301( +; CHECK-SAME: i1 [[COND:%.*]]) { +; CHECK-NEXT:  [[ENTRY:.*:]] +; CHECK-NEXT:    br label %[[SWITCHBB:.*]] +; CHECK:       [[SWITCHBB]]: +; CHECK-NEXT:    br label %[[SWITCHBB]] +; +entry: +  br label %switchbb + +switchbb: +  switch i1 %cond, label %default [ +  i1 false, label %switchbb +  i1 true, label %switchbb +  ] + +default: +  %phi.lcssa = phi i32 [ 0, %switchbb ] +  ret i32 %phi.lcssa +} diff --git a/llvm/test/tools/llvm-dwarfdump/AArch64/DW_AT_APPLE_property.s b/llvm/test/tools/llvm-dwarfdump/AArch64/DW_AT_APPLE_property.s new file mode 100644 index 0000000..6c38791 --- /dev/null +++ b/llvm/test/tools/llvm-dwarfdump/AArch64/DW_AT_APPLE_property.s @@ -0,0 +1,126 @@ +# Checks that we correctly display the DW_AT_APPLE_property_name of a +# referenced DW_TAG_APPLE_property. +# +# RUN: llvm-mc -triple=aarch64--darwin -filetype=obj -o %t.o < %s +# RUN: not llvm-dwarfdump %t.o 2> %t.errs.txt | FileCheck %s +# RUN: FileCheck %s --check-prefix=ERRORS < %t.errs.txt  + +# CHECK: 0x[[PROP_REF:[0-9a-f]+]]: DW_TAG_APPLE_property +# CHECK-NEXT: DW_AT_APPLE_property_name ("autoSynthProp") +# +# CHECK: 0x[[NO_NAME_PROP:[0-9a-f]+]]: DW_TAG_APPLE_property +# CHECK-NOT: DW_AT_APPLE_property_name +# +# CHECK: 0x[[INVALID_STRP:[0-9a-f]+]]: DW_TAG_APPLE_property +# CHECK-NEXT: DW_AT_APPLE_property_name +# +# CHECK: DW_TAG_member +# CHECK:   DW_AT_APPLE_property  (0x[[PROP_REF]] "autoSynthProp") +# CHECK:   DW_AT_APPLE_property  (0x[[NO_NAME_PROP]] "") +# CHECK:   DW_AT_APPLE_property  (0x{{.*}}) +# CHECK:   DW_AT_APPLE_property  (0x{{.*}}) +# CHECK:   DW_AT_APPLE_property  (0x[[INVALID_STRP]]) + +# ERRORS: error: decoding DW_AT_APPLE_property_name: not referencing a DW_TAG_APPLE_property +# ERRORS: error: decoding DW_AT_APPLE_property_name: invalid DIE +# ERRORS: error: decoding DW_AT_APPLE_property_name: DW_FORM_strp offset 102 is beyond .debug_str bounds + +	.section	__DWARF,__debug_abbrev,regular,debug +Lsection_abbrev: +	.byte	1                               ; Abbreviation Code +	.byte	17                              ; DW_TAG_compile_unit +	.byte	1                               ; DW_CHILDREN_yes +	.byte	114                             ; DW_AT_str_offsets_base +	.byte	23                              ; DW_FORM_sec_offset +	.byte	0                               ; EOM(1) +	.byte	0                               ; EOM(2) +	.byte	2                               ; Abbreviation Code +	.byte	19                              ; DW_TAG_structure_type +	.byte	1                               ; DW_CHILDREN_yes +	.byte	3                               ; DW_AT_name +	.byte	37                              ; DW_FORM_strx1 +	.byte	0                               ; EOM(1) +	.byte	0                               ; EOM(2) +	.byte	3                               ; Abbreviation Code +	.ascii	"\200\204\001"                  ; DW_TAG_APPLE_property +	.byte	0                               ; DW_CHILDREN_no +	.ascii	"\350\177"                      ; DW_AT_APPLE_property_name +	.byte	37                              ; DW_FORM_strx1 +	.byte	0                               ; EOM(1) +	.byte	0                               ; EOM(2) +	.byte	4                               ; Abbreviation Code +	.ascii	"\200\204\001"                  ; DW_TAG_APPLE_property +	.byte	0                               ; DW_CHILDREN_no +	.byte	0                               ; EOM(1) +	.byte	0                               ; EOM(2) +	.byte	5                               ; Abbreviation Code +	.ascii	"\200\204\001"                  ; DW_TAG_APPLE_property +	.byte	0                               ; DW_CHILDREN_no +	.ascii	"\350\177"                      ; DW_AT_APPLE_property_name +	.byte	14                              ; DW_FORM_strp +	.byte	0                               ; EOM(1) +	.byte	0                               ; EOM(2) +	.byte	6                               ; Abbreviation Code +	.byte	13                              ; DW_TAG_member +	.byte	0                               ; DW_CHILDREN_no +	.byte	3                               ; DW_AT_name +	.byte	37                              ; DW_FORM_strx1 +	.ascii	"\355\177"                      ; DW_AT_APPLE_property +	.byte	19                              ; DW_FORM_ref4 +	.ascii	"\355\177"                      ; DW_AT_APPLE_property +	.byte	19                              ; DW_FORM_ref4 +	.ascii	"\355\177"                      ; DW_AT_APPLE_property +	.byte	19                              ; DW_FORM_ref4 +	.ascii	"\355\177"                      ; DW_AT_APPLE_property +	.byte	19                              ; DW_FORM_ref4 +	.ascii	"\355\177"                      ; DW_AT_APPLE_property +	.byte	19                              ; DW_FORM_ref4 +	.byte	0                               ; EOM(1) +	.byte	0                               ; EOM(2) +	.byte	0                               ; EOM(3) +	.section	__DWARF,__debug_info,regular,debug +Lsection_info: +Lcu_begin0: +Lset0 = Ldebug_info_end0-Ldebug_info_start0 ; Length of Unit +	.long	Lset0 +Ldebug_info_start0: +	.short	5                               ; DWARF version number +	.byte	1                               ; DWARF Unit Type +	.byte	8                               ; Address Size (in bytes) +Lset1 = Lsection_abbrev-Lsection_abbrev ; Offset Into Abbrev. Section +	.long	Lset1 +	.byte	1                               ; Abbrev [1] DW_TAG_compile_unit +Lset2 = Lstr_offsets_base0-Lsection_str_off ; DW_AT_str_offsets_base +	.long	Lset2 +	.byte	2                               ; Abbrev [2] DW_TAG_structure_type +	.byte	2                               ; DW_AT_name +	.byte	3                               ; Abbrev [3] DW_TAG_APPLE_property +	.byte	0                               ; DW_AT_APPLE_property_name +	.byte	4                               ; Abbrev [4] DW_TAG_APPLE_property +	.byte	5                               ; Abbrev [5] DW_TAG_APPLE_property +	.long	102                             ; DW_AT_APPLE_property_name +	.byte	6                               ; Abbrev [6] DW_TAG_member +	.byte	1                               ; DW_AT_name +	.long	19                              ; DW_AT_APPLE_property +	.long	21                              ; DW_AT_APPLE_property +	.long	17                              ; DW_AT_APPLE_property +	.long	0                               ; DW_AT_APPLE_property +	.long	22                              ; DW_AT_APPLE_property +	.byte	0                               ; End Of Children Mark +	.byte	0                               ; End Of Children Mark +Ldebug_info_end0: +	.section	__DWARF,__debug_str_offs,regular,debug +Lsection_str_off: +	.long	16                              ; Length of String Offsets Set +	.short	5 +	.short	0 +Lstr_offsets_base0: +	.section	__DWARF,__debug_str,regular,debug +Linfo_string: +	.asciz	"autoSynthProp"                 ; string offset=0 +	.asciz	"_var"                          ; string offset=14 +	.asciz	"Foo"                           ; string offset=19 +	.section	__DWARF,__debug_str_offs,regular,debug +	.long	0 +	.long	14 +	.long	19 diff --git a/llvm/test/tools/llvm-readobj/ELF/bb-addr-map-feature-warning.test b/llvm/test/tools/llvm-readobj/ELF/bb-addr-map-feature-warning.test new file mode 100644 index 0000000..24726c34 --- /dev/null +++ b/llvm/test/tools/llvm-readobj/ELF/bb-addr-map-feature-warning.test @@ -0,0 +1,37 @@ +## This test checks that we output a warning when the specified version is too old to support the given features. + +# RUN: yaml2obj %s -o %t +# RUN: llvm-readobj --bb-addr-map %t 2>&1 | FileCheck -DFILE=%t %s + +--- !ELF +FileHeader: +  Class: ELFCLASS64 +  Data:  ELFDATA2LSB +  Type:  ET_EXEC + +# CHECK: BBAddrMap [ +# CHECK-NEXT: warning: '[[FILE]]': unable to dump SHT_LLVM_BB_ADDR_MAP section with index 1: version should be >= 3 for SHT_LLVM_BB_ADDR_MAP when callsite offsets feature is enabled: version = 2 feature = 32 +Sections: +  - Name: '.llvm_bb_addr_map (1)' +    Type: SHT_LLVM_BB_ADDR_MAP +    Entries: +      - Version: 2 +        Feature: 0x20 + +# CHECK: BBAddrMap [ +# CHECK-NEXT: warning: '[[FILE]]': unable to dump SHT_LLVM_BB_ADDR_MAP section with index 2: version should be >= 4 for SHT_LLVM_BB_ADDR_MAP when basic block hash feature is enabled: version = 3 feature = 64 + +  - Name: '.llvm_bb_addr_map (2)' +    Type: SHT_LLVM_BB_ADDR_MAP +    Entries: +      - Version: 3 +        Feature: 0x40 + +# CHECK: BBAddrMap [ +# CHECK-NEXT: warning: '[[FILE]]': unable to dump SHT_LLVM_BB_ADDR_MAP section with index 3: version should be >= 5 for SHT_LLVM_BB_ADDR_MAP when post link cfg feature is enabled: version = 4 feature = 128 + +  - Name: '.llvm_bb_addr_map (3)' +    Type: SHT_LLVM_BB_ADDR_MAP +    Entries: +      - Version: 4 +        Feature: 0x80 diff --git a/llvm/test/tools/llvm-readobj/ELF/bb-addr-map-pgo-analysis-map.test b/llvm/test/tools/llvm-readobj/ELF/bb-addr-map-pgo-analysis-map.test index 5faafd4..8e9d227 100644 --- a/llvm/test/tools/llvm-readobj/ELF/bb-addr-map-pgo-analysis-map.test +++ b/llvm/test/tools/llvm-readobj/ELF/bb-addr-map-pgo-analysis-map.test @@ -15,7 +15,7 @@  ## Check that a malformed section can be handled.  # RUN: yaml2obj %s -DBITS=32 -DSIZE=24 -o %t2.o -# RUN: llvm-readobj %t2.o --bb-addr-map 2>&1 | FileCheck --match-full-lines %s -DOFFSET=0x00000018 -DFILE=%t2.o --check-prefix=TRUNCATED +# RUN: llvm-readobj %t2.o --bb-addr-map 2>&1 | FileCheck --match-full-lines %s -DOFFSET=0x00000015 -DFILE=%t2.o --check-prefix=TRUNCATED  ## Check that missing features can be handled.  # RUN: yaml2obj %s -DBITS=32 -DFEATURE=0x2 -o %t3.o @@ -59,17 +59,20 @@  # CHECK-NEXT:         {  # RAW-NEXT:             Frequency: 100  # PRETTY-NEXT:          Frequency: 1.0 +# CHECK-NEXT:           PostLink Frequency: 10  # CHECK-NEXT:           Successors [  # CHECK-NEXT:             {  # CHECK-NEXT:               ID: 2  # RAW-NEXT:                 Probability: 0x80000000  # PRETTY-NEXT:              Probability: 0x80000000 / 0x80000000 = 100.00% +# CHECK-NEXT:               PostLink Probability: 7  # CHECK-NEXT:             }  # CHECK-NEXT:           ]  # CHECK-NEXT:         }  # CHECK-NEXT:         {  # RAW-NEXT:             Frequency: 100  # PRETTY-NEXT:          Frequency: 1.0 +# CHECK-NEXT:           PostLink Frequency: 0  # CHECK-NEXT:           Successors [  # CHECK-NEXT:           ]  # CHECK-NEXT:         } @@ -172,8 +175,8 @@ Sections:      ShSize: [[SIZE=<none>]]      Link:   .text      Entries: -      - Version: 2 -        Feature: 0x7 +      - Version: 5 +        Feature: 0x87          BBRanges:            - BaseAddress: [[ADDR=0x11111]]              BBEntries: @@ -197,10 +200,12 @@ Sections:      PGOAnalyses:        - FuncEntryCount: 100          PGOBBEntries: -          - BBFreq:        100 +          - BBFreq:          100 +            PostLinkBBFreq: 10              Successors: -              - ID:        2 -                BrProb:    0x80000000 +              - ID:              2 +                BrProb:          0x80000000 +                PostLinkBrFreq: 7            - BBFreq:        100              Successors:    []        - FuncEntryCount: 8888 diff --git a/llvm/test/tools/obj2yaml/ELF/bb-addr-map-pgo-analysis-map.yaml b/llvm/test/tools/obj2yaml/ELF/bb-addr-map-pgo-analysis-map.yaml index 299bf46..645507a 100644 --- a/llvm/test/tools/obj2yaml/ELF/bb-addr-map-pgo-analysis-map.yaml +++ b/llvm/test/tools/obj2yaml/ELF/bb-addr-map-pgo-analysis-map.yaml @@ -15,7 +15,7 @@  # VALID-NEXT:     Type: SHT_LLVM_BB_ADDR_MAP  # VALID-NEXT:     Entries:  # VALID-NEXT:       - Version: 2 -# VALID-NEXT:         Feature: 0x7 +# VALID-NEXT:         Feature: 0x87  ## The 'BaseAddress' field is omitted when it's zero.  # VALID-NEXT:         BBRanges:  # VALID-NEXT:           - BBEntries: @@ -43,17 +43,23 @@  # VALID-NEXT:     PGOAnalyses:  # VALID-NEXT:       - FuncEntryCount: 100  # VALID-NEXT:         PGOBBEntries: -# VALID-NEXT:           - BBFreq:        100 +# VALID-NEXT:           - BBFreq:           100 +# VALID-NEXT:             PostLinkBBFreq:   10  # VALID-NEXT:             Successors: -# VALID-NEXT:               - ID:        2 -# VALID-NEXT:                 BrProb:    0x80000000 -# VALID-NEXT:               - ID:        4 -# VALID-NEXT:                 BrProb:    0x80000000 -# VALID-NEXT:           - BBFreq:        50 +# VALID-NEXT:               - ID:              2 +# VALID-NEXT:                 BrProb:          0x80000000 +# VALID-NEXT:                 PostLinkBrFreq:  7 +# VALID-NEXT:               - ID:              4 +# VALID-NEXT:                 BrProb:          0x80000000 +# VALID-NEXT:                 PostLinkBrFreq:  0 +# VALID-NEXT:           - BBFreq:           50 +# VALID-NEXT:             PostLinkBBFreq:   0  # VALID-NEXT:             Successors: -# VALID-NEXT:               - ID:        4 -# VALID-NEXT:                 BrProb:    0xFFFFFFFF -# VALID-NEXT:           - BBFreq:        100 +# VALID-NEXT:               - ID:              4 +# VALID-NEXT:                 BrProb:          0xFFFFFFFF +# VALID-NEXT:                 PostLinkBrFreq:  0 +# VALID-NEXT:           - BBFreq:           100 +# VALID-NEXT:             PostLinkBBFreq:   3  # VALID-NEXT:             Successors:    []  # VALID-NEXT:         PGOBBEntries:  # VALID-NEXT:           - BBFreq:        20 @@ -69,7 +75,7 @@ Sections:      ShSize: [[SIZE=<none>]]      Entries:        - Version: 2 -        Feature: 0x7 +        Feature: 0x87          BBRanges:            - BaseAddress: 0x0              BBEntries: @@ -97,17 +103,20 @@ Sections:      PGOAnalyses:        - FuncEntryCount: 100          PGOBBEntries: -          - BBFreq:        100 +          - BBFreq:          100 +            PostLinkBBFreq:  10              Successors: -              - ID:        2 -                BrProb:    0x80000000 -              - ID:        4 -                BrProb:    0x80000000 -          - BBFreq:        50 +              - ID:              2 +                BrProb:          0x80000000 +                PostLinkBrFreq:  7 +              - ID:              4 +                BrProb:          0x80000000 +          - BBFreq:              50              Successors: -              - ID:        4 -                BrProb:    0xFFFFFFFF -          - BBFreq:        100 +              - ID:              4 +                BrProb:          0xFFFFFFFF +          - BBFreq:              100 +            PostLinkBBFreq:      3              Successors: []        - PGOBBEntries:            - BBFreq:        20 diff --git a/llvm/test/tools/yaml2obj/ELF/bb-addr-map-pgo-analysis-map.yaml b/llvm/test/tools/yaml2obj/ELF/bb-addr-map-pgo-analysis-map.yaml index a4cb572..ac9c8d4 100644 --- a/llvm/test/tools/yaml2obj/ELF/bb-addr-map-pgo-analysis-map.yaml +++ b/llvm/test/tools/yaml2obj/ELF/bb-addr-map-pgo-analysis-map.yaml @@ -6,8 +6,9 @@  # Case 4: Specify Entries.  # CHECK:        Name: .llvm_bb_addr_map (1)  # CHECK:        SectionData ( -# CHECK-NEXT:     0000: 02072000 00000000 0000010B 010203E8 -# CHECK-NEXT:     0010: 07E80702 0CEEDDBB F70E0D91 A2C48801 +# CHECK-NEXT:     0000: 02872000 00000000 0000010B 010203E8 +# CHECK-NEXT:     0010: 07E80764 020CEEDD BBF70E28 0D91A2C4 +# CHECK-NEXT:     0020: 880100  # CHECK-NEXT:   )  # Case 7: Not including a field which is enabled in feature doesn't emit value @@ -26,12 +27,12 @@ Sections:  ## Test the following cases:  ## 1) We can produce an .llvm_bb_addr_map section from a description with -##    Entries and PGO Analysis data. +##    Entries and PGO Analysis and Post Link data.    - Name: '.llvm_bb_addr_map (1)'      Type: SHT_LLVM_BB_ADDR_MAP      Entries:        - Version: 2 -        Feature: 0x7 +        Feature: 0x87          BBRanges:            - BaseAddress: 0x0000000000000020              BBEntries: @@ -42,12 +43,14 @@ Sections:      PGOAnalyses:        - FuncEntryCount: 1000          PGOBBEntries: -          - BBFreq:        1000 +          - BBFreq:          1000 +            PostLinkBBFreq:  100              Successors: -              - ID:        12 -                BrProb:    0xeeeeeeee -              - ID:        13 -                BrProb:    0x11111111 +              - ID:               12 +                BrProb:           0xeeeeeeee +                PostLinkBrFreq:   40 +              - ID:               13 +                BrProb:           0x11111111  ## 2) According to feature we have FuncEntryCount but none is provided in yaml    - Name: '.llvm_bb_addr_map (2)' @@ -66,7 +69,7 @@ Sections:  ## Check that yaml2obj generates a warning when we use unsupported feature.  # RUN: yaml2obj --docnum=2  %s 2>&1 | FileCheck %s --check-prefix=INVALID-FEATURE -# INVALID-FEATURE: warning: invalid encoding for BBAddrMap::Features: 0xf0 +# INVALID-FEATURE: warning: invalid encoding for BBAddrMap::Features: 0x100  --- !ELF  FileHeader: @@ -79,4 +82,4 @@ Sections:      Entries:        - Version: 2  ##  Specify unsupported feature -        Feature: 0xF0 +        Feature: 0x100 diff --git a/llvm/test/tools/yaml2obj/ELF/bb-addr-map.yaml b/llvm/test/tools/yaml2obj/ELF/bb-addr-map.yaml index 339e419..05d77d6 100644 --- a/llvm/test/tools/yaml2obj/ELF/bb-addr-map.yaml +++ b/llvm/test/tools/yaml2obj/ELF/bb-addr-map.yaml @@ -220,7 +220,7 @@ Sections:  ## Check that yaml2obj generates a warning when we use unsupported versions.  # RUN: yaml2obj --docnum=3  %s 2>&1 | FileCheck %s --check-prefix=INVALID-VERSION -# INVALID-VERSION: warning: unsupported SHT_LLVM_BB_ADDR_MAP version: 5; encoding using the most recent version +# INVALID-VERSION: warning: unsupported SHT_LLVM_BB_ADDR_MAP version: 6; encoding using the most recent version  --- !ELF  FileHeader: @@ -232,4 +232,4 @@ Sections:      Type: SHT_LLVM_BB_ADDR_MAP      Entries:  ##  Specify unsupported version -      - Version: 5 +      - Version: 6 | 
