aboutsummaryrefslogtreecommitdiff
path: root/llvm/test
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/test')
-rw-r--r--llvm/test/CodeGen/AArch64/GlobalISel/combine-freeze.mir47
-rw-r--r--llvm/test/CodeGen/AArch64/llvm.sincos.ll195
-rw-r--r--llvm/test/CodeGen/Hexagon/isel/trunc-vNi1-HVX.ll18
-rw-r--r--llvm/test/CodeGen/LoongArch/ir-instruction/flog2.ll8
-rw-r--r--llvm/test/CodeGen/LoongArch/lasx/fp-rounding.ll308
-rw-r--r--llvm/test/CodeGen/LoongArch/lasx/ir-instruction/avg.ll146
-rw-r--r--llvm/test/CodeGen/LoongArch/lasx/ir-instruction/avgfloor-ceil.ll379
-rw-r--r--llvm/test/CodeGen/LoongArch/lasx/ir-instruction/flog2.ll258
-rw-r--r--llvm/test/CodeGen/LoongArch/lsx/fp-rounding.ll212
-rw-r--r--llvm/test/CodeGen/LoongArch/lsx/ir-instruction/avg.ll146
-rw-r--r--llvm/test/CodeGen/LoongArch/lsx/ir-instruction/avgfloor-ceil.ll379
-rw-r--r--llvm/test/CodeGen/LoongArch/lsx/ir-instruction/flog2.ll156
-rw-r--r--llvm/test/CodeGen/RISCV/GlobalISel/rvv/vse.ll1575
-rw-r--r--llvm/test/CodeGen/RISCV/GlobalISel/rvv/vsm.ll139
-rw-r--r--llvm/test/CodeGen/RISCV/GlobalISel/rvv/vsse.ll1724
-rw-r--r--llvm/test/CodeGen/RISCV/features-info.ll1
-rw-r--r--llvm/test/CodeGen/RISCV/short-forward-branch-opt-min-max.ll703
-rw-r--r--llvm/test/CodeGen/X86/basic-block-sections-bb-hash.ll39
-rw-r--r--llvm/test/CodeGen/X86/basic-block-sections-clusters-error.ll14
-rw-r--r--llvm/test/CodeGen/X86/bittest-big-integer.ll288
-rw-r--r--llvm/test/Transforms/LoopVectorize/AArch64/replicating-load-store-costs.ll108
-rw-r--r--llvm/test/Transforms/LoopVectorize/X86/uniform_load.ll39
-rw-r--r--llvm/test/Transforms/LoopVectorize/single-scalar-cast-minbw.ll8
-rw-r--r--llvm/test/Transforms/SimplifyCFG/pr165301.ll26
-rw-r--r--llvm/test/tools/llvm-dwarfdump/AArch64/DW_AT_APPLE_property.s126
-rw-r--r--llvm/test/tools/llvm-readobj/ELF/bb-addr-map-feature-warning.test37
-rw-r--r--llvm/test/tools/llvm-readobj/ELF/bb-addr-map-pgo-analysis-map.test17
-rw-r--r--llvm/test/tools/obj2yaml/ELF/bb-addr-map-pgo-analysis-map.yaml49
-rw-r--r--llvm/test/tools/yaml2obj/ELF/bb-addr-map-pgo-analysis-map.yaml25
-rw-r--r--llvm/test/tools/yaml2obj/ELF/bb-addr-map.yaml4
30 files changed, 6580 insertions, 594 deletions
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/combine-freeze.mir b/llvm/test/CodeGen/AArch64/GlobalISel/combine-freeze.mir
index 6b84a84..1950e60 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/combine-freeze.mir
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/combine-freeze.mir
@@ -1440,3 +1440,50 @@ body: |
%freeze:_(<4 x s32>) = G_FREEZE %extract
$q0 = COPY %freeze(<4 x s32>)
RET_ReallyLR implicit $x0
+...
+---
+name: ubfx_does_not_generate_poison
+body: |
+ bb.1:
+ liveins: $w0
+
+ ; CHECK-LABEL: name: ubfx_does_not_generate_poison
+ ; CHECK: liveins: $w0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $x0
+ ; CHECK-NEXT: %c1:_(s64) = G_CONSTANT i64 1
+ ; CHECK-NEXT: [[FREEZE:%[0-9]+]]:_(s64) = G_FREEZE [[COPY]]
+ ; CHECK-NEXT: [[UBFX:%[0-9]+]]:_(s64) = G_UBFX [[FREEZE]], %c1(s64), %c1
+ ; CHECK-NEXT: $x0 = COPY [[UBFX]](s64)
+ ; CHECK-NEXT: RET_ReallyLR implicit $x0
+ %0:_(s64) = COPY $x0
+ %c1:_(s64) = G_CONSTANT i64 1
+ %1:_(s64) = G_UBFX %0, %c1, %c1
+ %2:_(s64) = G_FREEZE %1
+ $x0 = COPY %2(s64)
+ RET_ReallyLR implicit $x0
+
+...
+---
+name: sbfx_does_not_generate_poison
+body: |
+ bb.1:
+ liveins: $w0
+
+ ; CHECK-LABEL: name: sbfx_does_not_generate_poison
+ ; CHECK: liveins: $w0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $x0
+ ; CHECK-NEXT: %c1:_(s64) = G_CONSTANT i64 1
+ ; CHECK-NEXT: [[FREEZE:%[0-9]+]]:_(s64) = G_FREEZE [[COPY]]
+ ; CHECK-NEXT: [[SBFX:%[0-9]+]]:_(s64) = G_SBFX [[FREEZE]], %c1(s64), %c1
+ ; CHECK-NEXT: $x0 = COPY [[SBFX]](s64)
+ ; CHECK-NEXT: RET_ReallyLR implicit $x0
+ %0:_(s64) = COPY $x0
+ %c1:_(s64) = G_CONSTANT i64 1
+ %1:_(s64) = G_SBFX %0, %c1, %c1
+ %2:_(s64) = G_FREEZE %1
+ $x0 = COPY %2(s64)
+ RET_ReallyLR implicit $x0
+
+...
diff --git a/llvm/test/CodeGen/AArch64/llvm.sincos.ll b/llvm/test/CodeGen/AArch64/llvm.sincos.ll
index f1dcb2a..21da864 100644
--- a/llvm/test/CodeGen/AArch64/llvm.sincos.ll
+++ b/llvm/test/CodeGen/AArch64/llvm.sincos.ll
@@ -215,6 +215,133 @@ define { <2 x half>, <2 x half> } @test_sincos_v2f16(<2 x half> %a) nounwind {
ret { <2 x half>, <2 x half> } %result
}
+define { <3 x half>, <3 x half> } @test_sincos_v3f16(<3 x half> %a) nounwind {
+; CHECK-LABEL: test_sincos_v3f16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: sub sp, sp, #64
+; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
+; CHECK-NEXT: mov h1, v0.h[1]
+; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill
+; CHECK-NEXT: add x0, sp, #36
+; CHECK-NEXT: add x1, sp, #32
+; CHECK-NEXT: str x30, [sp, #48] // 8-byte Folded Spill
+; CHECK-NEXT: fcvt s0, h1
+; CHECK-NEXT: bl sincosf
+; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload
+; CHECK-NEXT: add x0, sp, #28
+; CHECK-NEXT: add x1, sp, #24
+; CHECK-NEXT: fcvt s0, h0
+; CHECK-NEXT: bl sincosf
+; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload
+; CHECK-NEXT: add x0, sp, #44
+; CHECK-NEXT: add x1, sp, #40
+; CHECK-NEXT: mov h0, v0.h[2]
+; CHECK-NEXT: fcvt s0, h0
+; CHECK-NEXT: bl sincosf
+; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload
+; CHECK-NEXT: add x0, sp, #60
+; CHECK-NEXT: add x1, sp, #56
+; CHECK-NEXT: mov h0, v0.h[3]
+; CHECK-NEXT: fcvt s0, h0
+; CHECK-NEXT: bl sincosf
+; CHECK-NEXT: ldp s2, s0, [sp, #32]
+; CHECK-NEXT: ldr x30, [sp, #48] // 8-byte Folded Reload
+; CHECK-NEXT: ldp s3, s1, [sp, #24]
+; CHECK-NEXT: fcvt h4, s0
+; CHECK-NEXT: fcvt h2, s2
+; CHECK-NEXT: fcvt h0, s1
+; CHECK-NEXT: fcvt h1, s3
+; CHECK-NEXT: ldp s5, s3, [sp, #40]
+; CHECK-NEXT: fcvt h3, s3
+; CHECK-NEXT: mov v0.h[1], v4.h[0]
+; CHECK-NEXT: fcvt h4, s5
+; CHECK-NEXT: mov v1.h[1], v2.h[0]
+; CHECK-NEXT: ldp s5, s2, [sp, #56]
+; CHECK-NEXT: mov v0.h[2], v3.h[0]
+; CHECK-NEXT: fcvt h2, s2
+; CHECK-NEXT: fcvt h3, s5
+; CHECK-NEXT: mov v1.h[2], v4.h[0]
+; CHECK-NEXT: mov v0.h[3], v2.h[0]
+; CHECK-NEXT: mov v1.h[3], v3.h[0]
+; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0
+; CHECK-NEXT: // kill: def $d1 killed $d1 killed $q1
+; CHECK-NEXT: add sp, sp, #64
+; CHECK-NEXT: ret
+;
+; NO-LIBCALL-LABEL: test_sincos_v3f16:
+; NO-LIBCALL: // %bb.0:
+; NO-LIBCALL-NEXT: sub sp, sp, #80
+; NO-LIBCALL-NEXT: // kill: def $d0 killed $d0 def $q0
+; NO-LIBCALL-NEXT: mov h1, v0.h[1]
+; NO-LIBCALL-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill
+; NO-LIBCALL-NEXT: str q0, [sp] // 16-byte Folded Spill
+; NO-LIBCALL-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill
+; NO-LIBCALL-NEXT: str x30, [sp, #64] // 8-byte Folded Spill
+; NO-LIBCALL-NEXT: fcvt s8, h1
+; NO-LIBCALL-NEXT: fmov s0, s8
+; NO-LIBCALL-NEXT: bl sinf
+; NO-LIBCALL-NEXT: ldr q1, [sp] // 16-byte Folded Reload
+; NO-LIBCALL-NEXT: fcvt h0, s0
+; NO-LIBCALL-NEXT: fcvt s9, h1
+; NO-LIBCALL-NEXT: str q0, [sp, #16] // 16-byte Folded Spill
+; NO-LIBCALL-NEXT: fmov s0, s9
+; NO-LIBCALL-NEXT: bl sinf
+; NO-LIBCALL-NEXT: ldr q1, [sp] // 16-byte Folded Reload
+; NO-LIBCALL-NEXT: fcvt h0, s0
+; NO-LIBCALL-NEXT: mov h1, v1.h[2]
+; NO-LIBCALL-NEXT: fcvt s10, h1
+; NO-LIBCALL-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload
+; NO-LIBCALL-NEXT: mov v0.h[1], v1.h[0]
+; NO-LIBCALL-NEXT: str q0, [sp, #16] // 16-byte Folded Spill
+; NO-LIBCALL-NEXT: fmov s0, s10
+; NO-LIBCALL-NEXT: bl sinf
+; NO-LIBCALL-NEXT: ldr q1, [sp] // 16-byte Folded Reload
+; NO-LIBCALL-NEXT: fcvt h0, s0
+; NO-LIBCALL-NEXT: mov h1, v1.h[3]
+; NO-LIBCALL-NEXT: fcvt s11, h1
+; NO-LIBCALL-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload
+; NO-LIBCALL-NEXT: mov v1.h[2], v0.h[0]
+; NO-LIBCALL-NEXT: fmov s0, s11
+; NO-LIBCALL-NEXT: str q1, [sp, #16] // 16-byte Folded Spill
+; NO-LIBCALL-NEXT: bl sinf
+; NO-LIBCALL-NEXT: fcvt h0, s0
+; NO-LIBCALL-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload
+; NO-LIBCALL-NEXT: mov v1.h[3], v0.h[0]
+; NO-LIBCALL-NEXT: fmov s0, s8
+; NO-LIBCALL-NEXT: str q1, [sp, #16] // 16-byte Folded Spill
+; NO-LIBCALL-NEXT: bl cosf
+; NO-LIBCALL-NEXT: fcvt h0, s0
+; NO-LIBCALL-NEXT: str q0, [sp] // 16-byte Folded Spill
+; NO-LIBCALL-NEXT: fmov s0, s9
+; NO-LIBCALL-NEXT: bl cosf
+; NO-LIBCALL-NEXT: fcvt h0, s0
+; NO-LIBCALL-NEXT: ldr q1, [sp] // 16-byte Folded Reload
+; NO-LIBCALL-NEXT: mov v0.h[1], v1.h[0]
+; NO-LIBCALL-NEXT: str q0, [sp] // 16-byte Folded Spill
+; NO-LIBCALL-NEXT: fmov s0, s10
+; NO-LIBCALL-NEXT: bl cosf
+; NO-LIBCALL-NEXT: fcvt h0, s0
+; NO-LIBCALL-NEXT: ldr q1, [sp] // 16-byte Folded Reload
+; NO-LIBCALL-NEXT: mov v1.h[2], v0.h[0]
+; NO-LIBCALL-NEXT: fmov s0, s11
+; NO-LIBCALL-NEXT: str q1, [sp] // 16-byte Folded Spill
+; NO-LIBCALL-NEXT: bl cosf
+; NO-LIBCALL-NEXT: fmov s1, s0
+; NO-LIBCALL-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload
+; NO-LIBCALL-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload
+; NO-LIBCALL-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload
+; NO-LIBCALL-NEXT: ldr x30, [sp, #64] // 8-byte Folded Reload
+; NO-LIBCALL-NEXT: // kill: def $d0 killed $d0 killed $q0
+; NO-LIBCALL-NEXT: fcvt h2, s1
+; NO-LIBCALL-NEXT: ldr q1, [sp] // 16-byte Folded Reload
+; NO-LIBCALL-NEXT: mov v1.h[3], v2.h[0]
+; NO-LIBCALL-NEXT: // kill: def $d1 killed $d1 killed $q1
+; NO-LIBCALL-NEXT: add sp, sp, #80
+; NO-LIBCALL-NEXT: ret
+ %result = call { <3 x half>, <3 x half> } @llvm.sincos.v3f16(<3 x half> %a)
+ ret { <3 x half>, <3 x half> } %result
+}
+
define { float, float } @test_sincos_f32(float %a) nounwind {
; CHECK-LABEL: test_sincos_f32:
; CHECK: // %bb.0:
@@ -493,3 +620,71 @@ define { <2 x double>, <2 x double> } @test_sincos_v2f64(<2 x double> %a) nounwi
%result = call { <2 x double>, <2 x double> } @llvm.sincos.v2f64(<2 x double> %a)
ret { <2 x double>, <2 x double> } %result
}
+
+define { <3 x double>, <3 x double> } @test_sincos_v3f64(<3 x double> %a) nounwind {
+; CHECK-LABEL: test_sincos_v3f64:
+; CHECK: // %bb.0:
+; CHECK-NEXT: sub sp, sp, #80
+; CHECK-NEXT: add x0, sp, #16
+; CHECK-NEXT: add x1, sp, #8
+; CHECK-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill
+; CHECK-NEXT: str x30, [sp, #64] // 8-byte Folded Spill
+; CHECK-NEXT: fmov d8, d2
+; CHECK-NEXT: fmov d9, d1
+; CHECK-NEXT: bl sincos
+; CHECK-NEXT: fmov d0, d9
+; CHECK-NEXT: add x0, sp, #32
+; CHECK-NEXT: add x1, sp, #24
+; CHECK-NEXT: bl sincos
+; CHECK-NEXT: fmov d0, d8
+; CHECK-NEXT: add x0, sp, #72
+; CHECK-NEXT: add x1, sp, #40
+; CHECK-NEXT: bl sincos
+; CHECK-NEXT: ldp d3, d0, [sp, #8]
+; CHECK-NEXT: ldr d2, [sp, #72]
+; CHECK-NEXT: ldp d4, d1, [sp, #24]
+; CHECK-NEXT: ldr d5, [sp, #40]
+; CHECK-NEXT: ldr x30, [sp, #64] // 8-byte Folded Reload
+; CHECK-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload
+; CHECK-NEXT: add sp, sp, #80
+; CHECK-NEXT: ret
+;
+; NO-LIBCALL-LABEL: test_sincos_v3f64:
+; NO-LIBCALL: // %bb.0:
+; NO-LIBCALL-NEXT: stp d13, d12, [sp, #-64]! // 16-byte Folded Spill
+; NO-LIBCALL-NEXT: stp d11, d10, [sp, #16] // 16-byte Folded Spill
+; NO-LIBCALL-NEXT: fmov d10, d0
+; NO-LIBCALL-NEXT: stp d9, d8, [sp, #32] // 16-byte Folded Spill
+; NO-LIBCALL-NEXT: fmov d8, d2
+; NO-LIBCALL-NEXT: fmov d9, d1
+; NO-LIBCALL-NEXT: str x30, [sp, #48] // 8-byte Folded Spill
+; NO-LIBCALL-NEXT: bl sin
+; NO-LIBCALL-NEXT: fmov d11, d0
+; NO-LIBCALL-NEXT: fmov d0, d9
+; NO-LIBCALL-NEXT: bl sin
+; NO-LIBCALL-NEXT: fmov d12, d0
+; NO-LIBCALL-NEXT: fmov d0, d8
+; NO-LIBCALL-NEXT: bl sin
+; NO-LIBCALL-NEXT: fmov d13, d0
+; NO-LIBCALL-NEXT: fmov d0, d10
+; NO-LIBCALL-NEXT: bl cos
+; NO-LIBCALL-NEXT: fmov d10, d0
+; NO-LIBCALL-NEXT: fmov d0, d9
+; NO-LIBCALL-NEXT: bl cos
+; NO-LIBCALL-NEXT: fmov d9, d0
+; NO-LIBCALL-NEXT: fmov d0, d8
+; NO-LIBCALL-NEXT: bl cos
+; NO-LIBCALL-NEXT: fmov d5, d0
+; NO-LIBCALL-NEXT: fmov d0, d11
+; NO-LIBCALL-NEXT: ldr x30, [sp, #48] // 8-byte Folded Reload
+; NO-LIBCALL-NEXT: fmov d3, d10
+; NO-LIBCALL-NEXT: fmov d4, d9
+; NO-LIBCALL-NEXT: fmov d1, d12
+; NO-LIBCALL-NEXT: ldp d9, d8, [sp, #32] // 16-byte Folded Reload
+; NO-LIBCALL-NEXT: fmov d2, d13
+; NO-LIBCALL-NEXT: ldp d11, d10, [sp, #16] // 16-byte Folded Reload
+; NO-LIBCALL-NEXT: ldp d13, d12, [sp], #64 // 16-byte Folded Reload
+; NO-LIBCALL-NEXT: ret
+ %result = call { <3 x double>, <3 x double> } @llvm.sincos.v3f64(<3 x double> %a)
+ ret { <3 x double>, <3 x double> } %result
+}
diff --git a/llvm/test/CodeGen/Hexagon/isel/trunc-vNi1-HVX.ll b/llvm/test/CodeGen/Hexagon/isel/trunc-vNi1-HVX.ll
new file mode 100644
index 0000000..1491729
--- /dev/null
+++ b/llvm/test/CodeGen/Hexagon/isel/trunc-vNi1-HVX.ll
@@ -0,0 +1,18 @@
+; RUN: llc --mtriple=hexagon -mattr=+hvxv79,+hvx-length128b < %s | FileCheck %s
+
+define void @f5(<64 x i32> %a0, ptr %a1) {
+; CHECK-LABEL: f5:
+; CHECK: [[REG0:(r[0-9]+)]] = ##16843009
+; CHECK-DAG: q[[Q0:[0-9]+]] = vand(v{{[0-9]+}},[[REG0]])
+; CHECK-DAG: q[[Q1:[0-9]+]] = vand(v{{[0-9]+}},[[REG0]])
+; CHECK: v{{[0-9]+}}.b = vpacke(v{{[0-9]+}}.h,v{{[0-9]+}}.h)
+; CHECK: v{{[0-9]+}}.b = vpacke(v{{[0-9]+}}.h,v{{[0-9]+}}.h)
+; CHECK: v[[VROR:[0-9]+]] = vror(v{{[0-9]+}},r{{[0-9]+}})
+; CHECK: v[[VOR:[0-9]+]] = vor(v[[VROR]],v{{[0-9]+}})
+; CHECK: q{{[0-9]+}} = vand(v[[VOR]],r{{[0-9]+}})
+b0:
+ %v0 = trunc <64 x i32> %a0 to <64 x i1>
+ store <64 x i1> %v0, ptr %a1, align 1
+ ret void
+}
+
diff --git a/llvm/test/CodeGen/LoongArch/ir-instruction/flog2.ll b/llvm/test/CodeGen/LoongArch/ir-instruction/flog2.ll
index 93fcd42..e02a2e7 100644
--- a/llvm/test/CodeGen/LoongArch/ir-instruction/flog2.ll
+++ b/llvm/test/CodeGen/LoongArch/ir-instruction/flog2.ll
@@ -12,8 +12,8 @@ define float @flog2_s(float %x) nounwind {
;
; LA64-LABEL: flog2_s:
; LA64: # %bb.0:
-; LA64-NEXT: pcaddu18i $t8, %call36(log2f)
-; LA64-NEXT: jr $t8
+; LA64-NEXT: flogb.s $fa0, $fa0
+; LA64-NEXT: ret
%y = call float @llvm.log2.f32(float %x)
ret float %y
}
@@ -25,8 +25,8 @@ define double @flog2_d(double %x) nounwind {
;
; LA64-LABEL: flog2_d:
; LA64: # %bb.0:
-; LA64-NEXT: pcaddu18i $t8, %call36(log2)
-; LA64-NEXT: jr $t8
+; LA64-NEXT: flogb.d $fa0, $fa0
+; LA64-NEXT: ret
%y = call double @llvm.log2.f64(double %x)
ret double %y
}
diff --git a/llvm/test/CodeGen/LoongArch/lasx/fp-rounding.ll b/llvm/test/CodeGen/LoongArch/lasx/fp-rounding.ll
new file mode 100644
index 0000000..79407c3
--- /dev/null
+++ b/llvm/test/CodeGen/LoongArch/lasx/fp-rounding.ll
@@ -0,0 +1,308 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6
+; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lasx < %s | FileCheck %s
+; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s
+
+;; ceilf
+define void @ceil_v8f32(ptr %res, ptr %a0) nounwind {
+; CHECK-LABEL: ceil_v8f32:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: xvld $xr0, $a1, 0
+; CHECK-NEXT: xvpickve.w $xr1, $xr0, 5
+; CHECK-NEXT: vreplvei.w $vr1, $vr1, 0
+; CHECK-NEXT: vfrintrp.s $vr1, $vr1
+; CHECK-NEXT: xvpickve.w $xr2, $xr0, 4
+; CHECK-NEXT: vreplvei.w $vr2, $vr2, 0
+; CHECK-NEXT: vfrintrp.s $vr2, $vr2
+; CHECK-NEXT: vextrins.w $vr2, $vr1, 16
+; CHECK-NEXT: xvpickve.w $xr1, $xr0, 6
+; CHECK-NEXT: vreplvei.w $vr1, $vr1, 0
+; CHECK-NEXT: vfrintrp.s $vr1, $vr1
+; CHECK-NEXT: vextrins.w $vr2, $vr1, 32
+; CHECK-NEXT: xvpickve.w $xr1, $xr0, 7
+; CHECK-NEXT: vreplvei.w $vr1, $vr1, 0
+; CHECK-NEXT: vfrintrp.s $vr1, $vr1
+; CHECK-NEXT: vextrins.w $vr2, $vr1, 48
+; CHECK-NEXT: xvpickve.w $xr1, $xr0, 1
+; CHECK-NEXT: vreplvei.w $vr1, $vr1, 0
+; CHECK-NEXT: vfrintrp.s $vr1, $vr1
+; CHECK-NEXT: xvpickve.w $xr3, $xr0, 0
+; CHECK-NEXT: vreplvei.w $vr3, $vr3, 0
+; CHECK-NEXT: vfrintrp.s $vr3, $vr3
+; CHECK-NEXT: vextrins.w $vr3, $vr1, 16
+; CHECK-NEXT: xvpickve.w $xr1, $xr0, 2
+; CHECK-NEXT: vreplvei.w $vr1, $vr1, 0
+; CHECK-NEXT: vfrintrp.s $vr1, $vr1
+; CHECK-NEXT: vextrins.w $vr3, $vr1, 32
+; CHECK-NEXT: xvpickve.w $xr0, $xr0, 3
+; CHECK-NEXT: vreplvei.w $vr0, $vr0, 0
+; CHECK-NEXT: vfrintrp.s $vr0, $vr0
+; CHECK-NEXT: vextrins.w $vr3, $vr0, 48
+; CHECK-NEXT: xvpermi.q $xr3, $xr2, 2
+; CHECK-NEXT: xvst $xr3, $a0, 0
+; CHECK-NEXT: ret
+entry:
+ %v0 = load <8 x float>, ptr %a0
+ %r = call <8 x float> @llvm.ceil.v8f32(<8 x float> %v0)
+ store <8 x float> %r, ptr %res
+ ret void
+}
+
+;; ceil
+define void @ceil_v4f64(ptr %res, ptr %a0) nounwind {
+; CHECK-LABEL: ceil_v4f64:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: xvld $xr0, $a1, 0
+; CHECK-NEXT: xvpickve.d $xr1, $xr0, 3
+; CHECK-NEXT: vreplvei.d $vr1, $vr1, 0
+; CHECK-NEXT: vfrintrp.d $vr1, $vr1
+; CHECK-NEXT: xvpickve.d $xr2, $xr0, 2
+; CHECK-NEXT: vreplvei.d $vr2, $vr2, 0
+; CHECK-NEXT: vfrintrp.d $vr2, $vr2
+; CHECK-NEXT: vextrins.d $vr2, $vr1, 16
+; CHECK-NEXT: xvpickve.d $xr1, $xr0, 1
+; CHECK-NEXT: vreplvei.d $vr1, $vr1, 0
+; CHECK-NEXT: vfrintrp.d $vr1, $vr1
+; CHECK-NEXT: xvpickve.d $xr0, $xr0, 0
+; CHECK-NEXT: vreplvei.d $vr0, $vr0, 0
+; CHECK-NEXT: vfrintrp.d $vr0, $vr0
+; CHECK-NEXT: vextrins.d $vr0, $vr1, 16
+; CHECK-NEXT: xvpermi.q $xr0, $xr2, 2
+; CHECK-NEXT: xvst $xr0, $a0, 0
+; CHECK-NEXT: ret
+entry:
+ %v0 = load <4 x double>, ptr %a0
+ %r = call <4 x double> @llvm.ceil.v4f64(<4 x double> %v0)
+ store <4 x double> %r, ptr %res
+ ret void
+}
+
+;; floorf
+define void @floor_v8f32(ptr %res, ptr %a0) nounwind {
+; CHECK-LABEL: floor_v8f32:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: xvld $xr0, $a1, 0
+; CHECK-NEXT: xvpickve.w $xr1, $xr0, 5
+; CHECK-NEXT: vreplvei.w $vr1, $vr1, 0
+; CHECK-NEXT: vfrintrm.s $vr1, $vr1
+; CHECK-NEXT: xvpickve.w $xr2, $xr0, 4
+; CHECK-NEXT: vreplvei.w $vr2, $vr2, 0
+; CHECK-NEXT: vfrintrm.s $vr2, $vr2
+; CHECK-NEXT: vextrins.w $vr2, $vr1, 16
+; CHECK-NEXT: xvpickve.w $xr1, $xr0, 6
+; CHECK-NEXT: vreplvei.w $vr1, $vr1, 0
+; CHECK-NEXT: vfrintrm.s $vr1, $vr1
+; CHECK-NEXT: vextrins.w $vr2, $vr1, 32
+; CHECK-NEXT: xvpickve.w $xr1, $xr0, 7
+; CHECK-NEXT: vreplvei.w $vr1, $vr1, 0
+; CHECK-NEXT: vfrintrm.s $vr1, $vr1
+; CHECK-NEXT: vextrins.w $vr2, $vr1, 48
+; CHECK-NEXT: xvpickve.w $xr1, $xr0, 1
+; CHECK-NEXT: vreplvei.w $vr1, $vr1, 0
+; CHECK-NEXT: vfrintrm.s $vr1, $vr1
+; CHECK-NEXT: xvpickve.w $xr3, $xr0, 0
+; CHECK-NEXT: vreplvei.w $vr3, $vr3, 0
+; CHECK-NEXT: vfrintrm.s $vr3, $vr3
+; CHECK-NEXT: vextrins.w $vr3, $vr1, 16
+; CHECK-NEXT: xvpickve.w $xr1, $xr0, 2
+; CHECK-NEXT: vreplvei.w $vr1, $vr1, 0
+; CHECK-NEXT: vfrintrm.s $vr1, $vr1
+; CHECK-NEXT: vextrins.w $vr3, $vr1, 32
+; CHECK-NEXT: xvpickve.w $xr0, $xr0, 3
+; CHECK-NEXT: vreplvei.w $vr0, $vr0, 0
+; CHECK-NEXT: vfrintrm.s $vr0, $vr0
+; CHECK-NEXT: vextrins.w $vr3, $vr0, 48
+; CHECK-NEXT: xvpermi.q $xr3, $xr2, 2
+; CHECK-NEXT: xvst $xr3, $a0, 0
+; CHECK-NEXT: ret
+entry:
+ %v0 = load <8 x float>, ptr %a0
+ %r = call <8 x float> @llvm.floor.v8f32(<8 x float> %v0)
+ store <8 x float> %r, ptr %res
+ ret void
+}
+
+;; floor
+define void @floor_v4f64(ptr %res, ptr %a0) nounwind {
+; CHECK-LABEL: floor_v4f64:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: xvld $xr0, $a1, 0
+; CHECK-NEXT: xvpickve.d $xr1, $xr0, 3
+; CHECK-NEXT: vreplvei.d $vr1, $vr1, 0
+; CHECK-NEXT: vfrintrm.d $vr1, $vr1
+; CHECK-NEXT: xvpickve.d $xr2, $xr0, 2
+; CHECK-NEXT: vreplvei.d $vr2, $vr2, 0
+; CHECK-NEXT: vfrintrm.d $vr2, $vr2
+; CHECK-NEXT: vextrins.d $vr2, $vr1, 16
+; CHECK-NEXT: xvpickve.d $xr1, $xr0, 1
+; CHECK-NEXT: vreplvei.d $vr1, $vr1, 0
+; CHECK-NEXT: vfrintrm.d $vr1, $vr1
+; CHECK-NEXT: xvpickve.d $xr0, $xr0, 0
+; CHECK-NEXT: vreplvei.d $vr0, $vr0, 0
+; CHECK-NEXT: vfrintrm.d $vr0, $vr0
+; CHECK-NEXT: vextrins.d $vr0, $vr1, 16
+; CHECK-NEXT: xvpermi.q $xr0, $xr2, 2
+; CHECK-NEXT: xvst $xr0, $a0, 0
+; CHECK-NEXT: ret
+entry:
+ %v0 = load <4 x double>, ptr %a0
+ %r = call <4 x double> @llvm.floor.v4f64(<4 x double> %v0)
+ store <4 x double> %r, ptr %res
+ ret void
+}
+
+;; truncf
+define void @trunc_v8f32(ptr %res, ptr %a0) nounwind {
+; CHECK-LABEL: trunc_v8f32:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: xvld $xr0, $a1, 0
+; CHECK-NEXT: xvpickve.w $xr1, $xr0, 5
+; CHECK-NEXT: vreplvei.w $vr1, $vr1, 0
+; CHECK-NEXT: vfrintrz.s $vr1, $vr1
+; CHECK-NEXT: xvpickve.w $xr2, $xr0, 4
+; CHECK-NEXT: vreplvei.w $vr2, $vr2, 0
+; CHECK-NEXT: vfrintrz.s $vr2, $vr2
+; CHECK-NEXT: vextrins.w $vr2, $vr1, 16
+; CHECK-NEXT: xvpickve.w $xr1, $xr0, 6
+; CHECK-NEXT: vreplvei.w $vr1, $vr1, 0
+; CHECK-NEXT: vfrintrz.s $vr1, $vr1
+; CHECK-NEXT: vextrins.w $vr2, $vr1, 32
+; CHECK-NEXT: xvpickve.w $xr1, $xr0, 7
+; CHECK-NEXT: vreplvei.w $vr1, $vr1, 0
+; CHECK-NEXT: vfrintrz.s $vr1, $vr1
+; CHECK-NEXT: vextrins.w $vr2, $vr1, 48
+; CHECK-NEXT: xvpickve.w $xr1, $xr0, 1
+; CHECK-NEXT: vreplvei.w $vr1, $vr1, 0
+; CHECK-NEXT: vfrintrz.s $vr1, $vr1
+; CHECK-NEXT: xvpickve.w $xr3, $xr0, 0
+; CHECK-NEXT: vreplvei.w $vr3, $vr3, 0
+; CHECK-NEXT: vfrintrz.s $vr3, $vr3
+; CHECK-NEXT: vextrins.w $vr3, $vr1, 16
+; CHECK-NEXT: xvpickve.w $xr1, $xr0, 2
+; CHECK-NEXT: vreplvei.w $vr1, $vr1, 0
+; CHECK-NEXT: vfrintrz.s $vr1, $vr1
+; CHECK-NEXT: vextrins.w $vr3, $vr1, 32
+; CHECK-NEXT: xvpickve.w $xr0, $xr0, 3
+; CHECK-NEXT: vreplvei.w $vr0, $vr0, 0
+; CHECK-NEXT: vfrintrz.s $vr0, $vr0
+; CHECK-NEXT: vextrins.w $vr3, $vr0, 48
+; CHECK-NEXT: xvpermi.q $xr3, $xr2, 2
+; CHECK-NEXT: xvst $xr3, $a0, 0
+; CHECK-NEXT: ret
+entry:
+ %v0 = load <8 x float>, ptr %a0
+ %r = call <8 x float> @llvm.trunc.v8f32(<8 x float> %v0)
+ store <8 x float> %r, ptr %res
+ ret void
+}
+
+;; trunc
+define void @trunc_v4f64(ptr %res, ptr %a0) nounwind {
+; CHECK-LABEL: trunc_v4f64:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: xvld $xr0, $a1, 0
+; CHECK-NEXT: xvpickve.d $xr1, $xr0, 3
+; CHECK-NEXT: vreplvei.d $vr1, $vr1, 0
+; CHECK-NEXT: vfrintrz.d $vr1, $vr1
+; CHECK-NEXT: xvpickve.d $xr2, $xr0, 2
+; CHECK-NEXT: vreplvei.d $vr2, $vr2, 0
+; CHECK-NEXT: vfrintrz.d $vr2, $vr2
+; CHECK-NEXT: vextrins.d $vr2, $vr1, 16
+; CHECK-NEXT: xvpickve.d $xr1, $xr0, 1
+; CHECK-NEXT: vreplvei.d $vr1, $vr1, 0
+; CHECK-NEXT: vfrintrz.d $vr1, $vr1
+; CHECK-NEXT: xvpickve.d $xr0, $xr0, 0
+; CHECK-NEXT: vreplvei.d $vr0, $vr0, 0
+; CHECK-NEXT: vfrintrz.d $vr0, $vr0
+; CHECK-NEXT: vextrins.d $vr0, $vr1, 16
+; CHECK-NEXT: xvpermi.q $xr0, $xr2, 2
+; CHECK-NEXT: xvst $xr0, $a0, 0
+; CHECK-NEXT: ret
+entry:
+ %v0 = load <4 x double>, ptr %a0
+ %r = call <4 x double> @llvm.trunc.v4f64(<4 x double> %v0)
+ store <4 x double> %r, ptr %res
+ ret void
+}
+
+;; roundevenf
+define void @roundeven_v8f32(ptr %res, ptr %a0) nounwind {
+; CHECK-LABEL: roundeven_v8f32:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: xvld $xr0, $a1, 0
+; CHECK-NEXT: xvpickve.w $xr1, $xr0, 5
+; CHECK-NEXT: vreplvei.w $vr1, $vr1, 0
+; CHECK-NEXT: vfrintrne.s $vr1, $vr1
+; CHECK-NEXT: xvpickve.w $xr2, $xr0, 4
+; CHECK-NEXT: vreplvei.w $vr2, $vr2, 0
+; CHECK-NEXT: vfrintrne.s $vr2, $vr2
+; CHECK-NEXT: vextrins.w $vr2, $vr1, 16
+; CHECK-NEXT: xvpickve.w $xr1, $xr0, 6
+; CHECK-NEXT: vreplvei.w $vr1, $vr1, 0
+; CHECK-NEXT: vfrintrne.s $vr1, $vr1
+; CHECK-NEXT: vextrins.w $vr2, $vr1, 32
+; CHECK-NEXT: xvpickve.w $xr1, $xr0, 7
+; CHECK-NEXT: vreplvei.w $vr1, $vr1, 0
+; CHECK-NEXT: vfrintrne.s $vr1, $vr1
+; CHECK-NEXT: vextrins.w $vr2, $vr1, 48
+; CHECK-NEXT: xvpickve.w $xr1, $xr0, 1
+; CHECK-NEXT: vreplvei.w $vr1, $vr1, 0
+; CHECK-NEXT: vfrintrne.s $vr1, $vr1
+; CHECK-NEXT: xvpickve.w $xr3, $xr0, 0
+; CHECK-NEXT: vreplvei.w $vr3, $vr3, 0
+; CHECK-NEXT: vfrintrne.s $vr3, $vr3
+; CHECK-NEXT: vextrins.w $vr3, $vr1, 16
+; CHECK-NEXT: xvpickve.w $xr1, $xr0, 2
+; CHECK-NEXT: vreplvei.w $vr1, $vr1, 0
+; CHECK-NEXT: vfrintrne.s $vr1, $vr1
+; CHECK-NEXT: vextrins.w $vr3, $vr1, 32
+; CHECK-NEXT: xvpickve.w $xr0, $xr0, 3
+; CHECK-NEXT: vreplvei.w $vr0, $vr0, 0
+; CHECK-NEXT: vfrintrne.s $vr0, $vr0
+; CHECK-NEXT: vextrins.w $vr3, $vr0, 48
+; CHECK-NEXT: xvpermi.q $xr3, $xr2, 2
+; CHECK-NEXT: xvst $xr3, $a0, 0
+; CHECK-NEXT: ret
+entry:
+ %v0 = load <8 x float>, ptr %a0
+ %r = call <8 x float> @llvm.roundeven.v8f32(<8 x float> %v0)
+ store <8 x float> %r, ptr %res
+ ret void
+}
+
+;; roundeven
+define void @roundeven_v4f64(ptr %res, ptr %a0) nounwind {
+; CHECK-LABEL: roundeven_v4f64:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: xvld $xr0, $a1, 0
+; CHECK-NEXT: xvpickve.d $xr1, $xr0, 3
+; CHECK-NEXT: vreplvei.d $vr1, $vr1, 0
+; CHECK-NEXT: vfrintrne.d $vr1, $vr1
+; CHECK-NEXT: xvpickve.d $xr2, $xr0, 2
+; CHECK-NEXT: vreplvei.d $vr2, $vr2, 0
+; CHECK-NEXT: vfrintrne.d $vr2, $vr2
+; CHECK-NEXT: vextrins.d $vr2, $vr1, 16
+; CHECK-NEXT: xvpickve.d $xr1, $xr0, 1
+; CHECK-NEXT: vreplvei.d $vr1, $vr1, 0
+; CHECK-NEXT: vfrintrne.d $vr1, $vr1
+; CHECK-NEXT: xvpickve.d $xr0, $xr0, 0
+; CHECK-NEXT: vreplvei.d $vr0, $vr0, 0
+; CHECK-NEXT: vfrintrne.d $vr0, $vr0
+; CHECK-NEXT: vextrins.d $vr0, $vr1, 16
+; CHECK-NEXT: xvpermi.q $xr0, $xr2, 2
+; CHECK-NEXT: xvst $xr0, $a0, 0
+; CHECK-NEXT: ret
+entry:
+ %v0 = load <4 x double>, ptr %a0
+ %r = call <4 x double> @llvm.roundeven.v4f64(<4 x double> %v0)
+ store <4 x double> %r, ptr %res
+ ret void
+}
+
+declare <8 x float> @llvm.ceil.v8f32(<8 x float>)
+declare <4 x double> @llvm.ceil.v4f64(<4 x double>)
+declare <8 x float> @llvm.floor.v8f32(<8 x float>)
+declare <4 x double> @llvm.floor.v4f64(<4 x double>)
+declare <8 x float> @llvm.trunc.v8f32(<8 x float>)
+declare <4 x double> @llvm.trunc.v4f64(<4 x double>)
+declare <8 x float> @llvm.roundeven.v8f32(<8 x float>)
+declare <4 x double> @llvm.roundeven.v4f64(<4 x double>)
diff --git a/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/avg.ll b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/avg.ll
index 2a5a8fa..5c5c199 100644
--- a/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/avg.ll
+++ b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/avg.ll
@@ -1,14 +1,13 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6
-; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lasx < %s | FileCheck %s
-; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s
+; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lasx < %s | FileCheck %s --check-prefixes=CHECK,LA32
+; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s --check-prefixes=CHECK,LA64
define void @xvavg_b(ptr %res, ptr %a, ptr %b) nounwind {
; CHECK-LABEL: xvavg_b:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: xvld $xr0, $a1, 0
; CHECK-NEXT: xvld $xr1, $a2, 0
-; CHECK-NEXT: xvadd.b $xr0, $xr0, $xr1
-; CHECK-NEXT: xvsrai.b $xr0, $xr0, 1
+; CHECK-NEXT: xvavg.b $xr0, $xr0, $xr1
; CHECK-NEXT: xvst $xr0, $a0, 0
; CHECK-NEXT: ret
entry:
@@ -25,8 +24,7 @@ define void @xvavg_h(ptr %res, ptr %a, ptr %b) nounwind {
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: xvld $xr0, $a1, 0
; CHECK-NEXT: xvld $xr1, $a2, 0
-; CHECK-NEXT: xvadd.h $xr0, $xr0, $xr1
-; CHECK-NEXT: xvsrai.h $xr0, $xr0, 1
+; CHECK-NEXT: xvavg.h $xr0, $xr0, $xr1
; CHECK-NEXT: xvst $xr0, $a0, 0
; CHECK-NEXT: ret
entry:
@@ -43,8 +41,7 @@ define void @xvavg_w(ptr %res, ptr %a, ptr %b) nounwind {
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: xvld $xr0, $a1, 0
; CHECK-NEXT: xvld $xr1, $a2, 0
-; CHECK-NEXT: xvadd.w $xr0, $xr0, $xr1
-; CHECK-NEXT: xvsrai.w $xr0, $xr0, 1
+; CHECK-NEXT: xvavg.w $xr0, $xr0, $xr1
; CHECK-NEXT: xvst $xr0, $a0, 0
; CHECK-NEXT: ret
entry:
@@ -57,14 +54,22 @@ entry:
}
define void @xvavg_d(ptr %res, ptr %a, ptr %b) nounwind {
-; CHECK-LABEL: xvavg_d:
-; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: xvld $xr0, $a1, 0
-; CHECK-NEXT: xvld $xr1, $a2, 0
-; CHECK-NEXT: xvadd.d $xr0, $xr0, $xr1
-; CHECK-NEXT: xvsrai.d $xr0, $xr0, 1
-; CHECK-NEXT: xvst $xr0, $a0, 0
-; CHECK-NEXT: ret
+; LA32-LABEL: xvavg_d:
+; LA32: # %bb.0: # %entry
+; LA32-NEXT: xvld $xr0, $a1, 0
+; LA32-NEXT: xvld $xr1, $a2, 0
+; LA32-NEXT: xvadd.d $xr0, $xr0, $xr1
+; LA32-NEXT: xvsrai.d $xr0, $xr0, 1
+; LA32-NEXT: xvst $xr0, $a0, 0
+; LA32-NEXT: ret
+;
+; LA64-LABEL: xvavg_d:
+; LA64: # %bb.0: # %entry
+; LA64-NEXT: xvld $xr0, $a1, 0
+; LA64-NEXT: xvld $xr1, $a2, 0
+; LA64-NEXT: xvavg.d $xr0, $xr0, $xr1
+; LA64-NEXT: xvst $xr0, $a0, 0
+; LA64-NEXT: ret
entry:
%va = load <4 x i64>, ptr %a
%vb = load <4 x i64>, ptr %b
@@ -79,8 +84,7 @@ define void @xvavg_bu(ptr %res, ptr %a, ptr %b) nounwind {
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: xvld $xr0, $a1, 0
; CHECK-NEXT: xvld $xr1, $a2, 0
-; CHECK-NEXT: xvadd.b $xr0, $xr0, $xr1
-; CHECK-NEXT: xvsrli.b $xr0, $xr0, 1
+; CHECK-NEXT: xvavg.bu $xr0, $xr0, $xr1
; CHECK-NEXT: xvst $xr0, $a0, 0
; CHECK-NEXT: ret
entry:
@@ -97,8 +101,7 @@ define void @xvavg_hu(ptr %res, ptr %a, ptr %b) nounwind {
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: xvld $xr0, $a1, 0
; CHECK-NEXT: xvld $xr1, $a2, 0
-; CHECK-NEXT: xvadd.h $xr0, $xr0, $xr1
-; CHECK-NEXT: xvsrli.h $xr0, $xr0, 1
+; CHECK-NEXT: xvavg.hu $xr0, $xr0, $xr1
; CHECK-NEXT: xvst $xr0, $a0, 0
; CHECK-NEXT: ret
entry:
@@ -115,8 +118,7 @@ define void @xvavg_wu(ptr %res, ptr %a, ptr %b) nounwind {
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: xvld $xr0, $a1, 0
; CHECK-NEXT: xvld $xr1, $a2, 0
-; CHECK-NEXT: xvadd.w $xr0, $xr0, $xr1
-; CHECK-NEXT: xvsrli.w $xr0, $xr0, 1
+; CHECK-NEXT: xvavg.wu $xr0, $xr0, $xr1
; CHECK-NEXT: xvst $xr0, $a0, 0
; CHECK-NEXT: ret
entry:
@@ -129,14 +131,22 @@ entry:
}
define void @xvavg_du(ptr %res, ptr %a, ptr %b) nounwind {
-; CHECK-LABEL: xvavg_du:
-; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: xvld $xr0, $a1, 0
-; CHECK-NEXT: xvld $xr1, $a2, 0
-; CHECK-NEXT: xvadd.d $xr0, $xr0, $xr1
-; CHECK-NEXT: xvsrli.d $xr0, $xr0, 1
-; CHECK-NEXT: xvst $xr0, $a0, 0
-; CHECK-NEXT: ret
+; LA32-LABEL: xvavg_du:
+; LA32: # %bb.0: # %entry
+; LA32-NEXT: xvld $xr0, $a1, 0
+; LA32-NEXT: xvld $xr1, $a2, 0
+; LA32-NEXT: xvadd.d $xr0, $xr0, $xr1
+; LA32-NEXT: xvsrli.d $xr0, $xr0, 1
+; LA32-NEXT: xvst $xr0, $a0, 0
+; LA32-NEXT: ret
+;
+; LA64-LABEL: xvavg_du:
+; LA64: # %bb.0: # %entry
+; LA64-NEXT: xvld $xr0, $a1, 0
+; LA64-NEXT: xvld $xr1, $a2, 0
+; LA64-NEXT: xvavg.du $xr0, $xr0, $xr1
+; LA64-NEXT: xvst $xr0, $a0, 0
+; LA64-NEXT: ret
entry:
%va = load <4 x i64>, ptr %a
%vb = load <4 x i64>, ptr %b
@@ -151,9 +161,7 @@ define void @xvavgr_b(ptr %res, ptr %a, ptr %b) nounwind {
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: xvld $xr0, $a1, 0
; CHECK-NEXT: xvld $xr1, $a2, 0
-; CHECK-NEXT: xvadd.b $xr0, $xr0, $xr1
-; CHECK-NEXT: xvaddi.bu $xr0, $xr0, 1
-; CHECK-NEXT: xvsrai.b $xr0, $xr0, 1
+; CHECK-NEXT: xvavgr.b $xr0, $xr0, $xr1
; CHECK-NEXT: xvst $xr0, $a0, 0
; CHECK-NEXT: ret
entry:
@@ -171,9 +179,7 @@ define void @xvavgr_h(ptr %res, ptr %a, ptr %b) nounwind {
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: xvld $xr0, $a1, 0
; CHECK-NEXT: xvld $xr1, $a2, 0
-; CHECK-NEXT: xvadd.h $xr0, $xr0, $xr1
-; CHECK-NEXT: xvaddi.hu $xr0, $xr0, 1
-; CHECK-NEXT: xvsrai.h $xr0, $xr0, 1
+; CHECK-NEXT: xvavgr.h $xr0, $xr0, $xr1
; CHECK-NEXT: xvst $xr0, $a0, 0
; CHECK-NEXT: ret
entry:
@@ -191,9 +197,7 @@ define void @xvavgr_w(ptr %res, ptr %a, ptr %b) nounwind {
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: xvld $xr0, $a1, 0
; CHECK-NEXT: xvld $xr1, $a2, 0
-; CHECK-NEXT: xvadd.w $xr0, $xr0, $xr1
-; CHECK-NEXT: xvaddi.wu $xr0, $xr0, 1
-; CHECK-NEXT: xvsrai.w $xr0, $xr0, 1
+; CHECK-NEXT: xvavgr.w $xr0, $xr0, $xr1
; CHECK-NEXT: xvst $xr0, $a0, 0
; CHECK-NEXT: ret
entry:
@@ -207,15 +211,23 @@ entry:
}
define void @xvavgr_d(ptr %res, ptr %a, ptr %b) nounwind {
-; CHECK-LABEL: xvavgr_d:
-; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: xvld $xr0, $a1, 0
-; CHECK-NEXT: xvld $xr1, $a2, 0
-; CHECK-NEXT: xvadd.d $xr0, $xr0, $xr1
-; CHECK-NEXT: xvaddi.du $xr0, $xr0, 1
-; CHECK-NEXT: xvsrai.d $xr0, $xr0, 1
-; CHECK-NEXT: xvst $xr0, $a0, 0
-; CHECK-NEXT: ret
+; LA32-LABEL: xvavgr_d:
+; LA32: # %bb.0: # %entry
+; LA32-NEXT: xvld $xr0, $a1, 0
+; LA32-NEXT: xvld $xr1, $a2, 0
+; LA32-NEXT: xvadd.d $xr0, $xr0, $xr1
+; LA32-NEXT: xvaddi.du $xr0, $xr0, 1
+; LA32-NEXT: xvsrai.d $xr0, $xr0, 1
+; LA32-NEXT: xvst $xr0, $a0, 0
+; LA32-NEXT: ret
+;
+; LA64-LABEL: xvavgr_d:
+; LA64: # %bb.0: # %entry
+; LA64-NEXT: xvld $xr0, $a1, 0
+; LA64-NEXT: xvld $xr1, $a2, 0
+; LA64-NEXT: xvavgr.d $xr0, $xr0, $xr1
+; LA64-NEXT: xvst $xr0, $a0, 0
+; LA64-NEXT: ret
entry:
%va = load <4 x i64>, ptr %a
%vb = load <4 x i64>, ptr %b
@@ -231,9 +243,7 @@ define void @xvavgr_bu(ptr %res, ptr %a, ptr %b) nounwind {
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: xvld $xr0, $a1, 0
; CHECK-NEXT: xvld $xr1, $a2, 0
-; CHECK-NEXT: xvadd.b $xr0, $xr0, $xr1
-; CHECK-NEXT: xvaddi.bu $xr0, $xr0, 1
-; CHECK-NEXT: xvsrli.b $xr0, $xr0, 1
+; CHECK-NEXT: xvavgr.bu $xr0, $xr0, $xr1
; CHECK-NEXT: xvst $xr0, $a0, 0
; CHECK-NEXT: ret
entry:
@@ -251,9 +261,7 @@ define void @xvavgr_hu(ptr %res, ptr %a, ptr %b) nounwind {
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: xvld $xr0, $a1, 0
; CHECK-NEXT: xvld $xr1, $a2, 0
-; CHECK-NEXT: xvadd.h $xr0, $xr0, $xr1
-; CHECK-NEXT: xvaddi.hu $xr0, $xr0, 1
-; CHECK-NEXT: xvsrli.h $xr0, $xr0, 1
+; CHECK-NEXT: xvavgr.hu $xr0, $xr0, $xr1
; CHECK-NEXT: xvst $xr0, $a0, 0
; CHECK-NEXT: ret
entry:
@@ -271,9 +279,7 @@ define void @xvavgr_wu(ptr %res, ptr %a, ptr %b) nounwind {
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: xvld $xr0, $a1, 0
; CHECK-NEXT: xvld $xr1, $a2, 0
-; CHECK-NEXT: xvadd.w $xr0, $xr0, $xr1
-; CHECK-NEXT: xvaddi.wu $xr0, $xr0, 1
-; CHECK-NEXT: xvsrli.w $xr0, $xr0, 1
+; CHECK-NEXT: xvavgr.wu $xr0, $xr0, $xr1
; CHECK-NEXT: xvst $xr0, $a0, 0
; CHECK-NEXT: ret
entry:
@@ -287,15 +293,23 @@ entry:
}
define void @xvavgr_du(ptr %res, ptr %a, ptr %b) nounwind {
-; CHECK-LABEL: xvavgr_du:
-; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: xvld $xr0, $a1, 0
-; CHECK-NEXT: xvld $xr1, $a2, 0
-; CHECK-NEXT: xvadd.d $xr0, $xr0, $xr1
-; CHECK-NEXT: xvaddi.du $xr0, $xr0, 1
-; CHECK-NEXT: xvsrli.d $xr0, $xr0, 1
-; CHECK-NEXT: xvst $xr0, $a0, 0
-; CHECK-NEXT: ret
+; LA32-LABEL: xvavgr_du:
+; LA32: # %bb.0: # %entry
+; LA32-NEXT: xvld $xr0, $a1, 0
+; LA32-NEXT: xvld $xr1, $a2, 0
+; LA32-NEXT: xvadd.d $xr0, $xr0, $xr1
+; LA32-NEXT: xvaddi.du $xr0, $xr0, 1
+; LA32-NEXT: xvsrli.d $xr0, $xr0, 1
+; LA32-NEXT: xvst $xr0, $a0, 0
+; LA32-NEXT: ret
+;
+; LA64-LABEL: xvavgr_du:
+; LA64: # %bb.0: # %entry
+; LA64-NEXT: xvld $xr0, $a1, 0
+; LA64-NEXT: xvld $xr1, $a2, 0
+; LA64-NEXT: xvavgr.du $xr0, $xr0, $xr1
+; LA64-NEXT: xvst $xr0, $a0, 0
+; LA64-NEXT: ret
entry:
%va = load <4 x i64>, ptr %a
%vb = load <4 x i64>, ptr %b
diff --git a/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/avgfloor-ceil.ll b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/avgfloor-ceil.ll
new file mode 100644
index 0000000..c82adcb
--- /dev/null
+++ b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/avgfloor-ceil.ll
@@ -0,0 +1,379 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6
+; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lasx < %s | FileCheck %s
+; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s
+
+define void @xvavg_b(ptr %res, ptr %a, ptr %b) nounwind {
+; CHECK-LABEL: xvavg_b:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: xvld $xr0, $a1, 0
+; CHECK-NEXT: xvld $xr1, $a2, 0
+; CHECK-NEXT: xvand.v $xr2, $xr0, $xr1
+; CHECK-NEXT: xvxor.v $xr0, $xr0, $xr1
+; CHECK-NEXT: xvsrai.b $xr0, $xr0, 1
+; CHECK-NEXT: xvadd.b $xr0, $xr2, $xr0
+; CHECK-NEXT: xvst $xr0, $a0, 0
+; CHECK-NEXT: ret
+entry:
+ %va = load <32 x i8>, ptr %a
+ %vb = load <32 x i8>, ptr %b
+ %ea = sext <32 x i8> %va to <32 x i16>
+ %eb = sext <32 x i8> %vb to <32 x i16>
+ %add = add <32 x i16> %ea, %eb
+ %shr = lshr <32 x i16> %add, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
+ %r = trunc <32 x i16> %shr to <32 x i8>
+ store <32 x i8> %r, ptr %res
+ ret void
+}
+
+define void @xvavg_h(ptr %res, ptr %a, ptr %b) nounwind {
+; CHECK-LABEL: xvavg_h:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: xvld $xr0, $a1, 0
+; CHECK-NEXT: xvld $xr1, $a2, 0
+; CHECK-NEXT: xvand.v $xr2, $xr0, $xr1
+; CHECK-NEXT: xvxor.v $xr0, $xr0, $xr1
+; CHECK-NEXT: xvsrai.h $xr0, $xr0, 1
+; CHECK-NEXT: xvadd.h $xr0, $xr2, $xr0
+; CHECK-NEXT: xvst $xr0, $a0, 0
+; CHECK-NEXT: ret
+entry:
+ %va = load <16 x i16>, ptr %a
+ %vb = load <16 x i16>, ptr %b
+ %ea = sext <16 x i16> %va to <16 x i32>
+ %eb = sext <16 x i16> %vb to <16 x i32>
+ %add = add <16 x i32> %ea, %eb
+ %shr = lshr <16 x i32> %add, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
+ %r = trunc <16 x i32> %shr to <16 x i16>
+ store <16 x i16> %r, ptr %res
+ ret void
+}
+
+define void @xvavg_w(ptr %res, ptr %a, ptr %b) nounwind {
+; CHECK-LABEL: xvavg_w:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: xvld $xr0, $a1, 0
+; CHECK-NEXT: xvld $xr1, $a2, 0
+; CHECK-NEXT: xvand.v $xr2, $xr0, $xr1
+; CHECK-NEXT: xvxor.v $xr0, $xr0, $xr1
+; CHECK-NEXT: xvsrai.w $xr0, $xr0, 1
+; CHECK-NEXT: xvadd.w $xr0, $xr2, $xr0
+; CHECK-NEXT: xvst $xr0, $a0, 0
+; CHECK-NEXT: ret
+entry:
+ %va = load <8 x i32>, ptr %a
+ %vb = load <8 x i32>, ptr %b
+ %ea = sext <8 x i32> %va to <8 x i64>
+ %eb = sext <8 x i32> %vb to <8 x i64>
+ %add = add <8 x i64> %ea, %eb
+ %shr = lshr <8 x i64> %add, <i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1>
+ %r = trunc <8 x i64> %shr to <8 x i32>
+ store <8 x i32> %r, ptr %res
+ ret void
+}
+
+define void @xvavg_d(ptr %res, ptr %a, ptr %b) nounwind {
+; CHECK-LABEL: xvavg_d:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: xvld $xr0, $a1, 0
+; CHECK-NEXT: xvld $xr1, $a2, 0
+; CHECK-NEXT: xvand.v $xr2, $xr0, $xr1
+; CHECK-NEXT: xvxor.v $xr0, $xr0, $xr1
+; CHECK-NEXT: xvsrai.d $xr0, $xr0, 1
+; CHECK-NEXT: xvadd.d $xr0, $xr2, $xr0
+; CHECK-NEXT: xvst $xr0, $a0, 0
+; CHECK-NEXT: ret
+entry:
+ %va = load <4 x i64>, ptr %a
+ %vb = load <4 x i64>, ptr %b
+ %ea = sext <4 x i64> %va to <4 x i128>
+ %eb = sext <4 x i64> %vb to <4 x i128>
+ %add = add <4 x i128> %ea, %eb
+ %shr = lshr <4 x i128> %add, <i128 1, i128 1, i128 1, i128 1>
+ %r = trunc <4 x i128> %shr to <4 x i64>
+ store <4 x i64> %r, ptr %res
+ ret void
+}
+
+define void @xvavg_bu(ptr %res, ptr %a, ptr %b) nounwind {
+; CHECK-LABEL: xvavg_bu:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: xvld $xr0, $a1, 0
+; CHECK-NEXT: xvld $xr1, $a2, 0
+; CHECK-NEXT: xvand.v $xr2, $xr0, $xr1
+; CHECK-NEXT: xvxor.v $xr0, $xr0, $xr1
+; CHECK-NEXT: xvsrli.b $xr0, $xr0, 1
+; CHECK-NEXT: xvadd.b $xr0, $xr2, $xr0
+; CHECK-NEXT: xvst $xr0, $a0, 0
+; CHECK-NEXT: ret
+entry:
+ %va = load <32 x i8>, ptr %a
+ %vb = load <32 x i8>, ptr %b
+ %ea = zext <32 x i8> %va to <32 x i16>
+ %eb = zext <32 x i8> %vb to <32 x i16>
+ %add = add <32 x i16> %ea, %eb
+ %shr = lshr <32 x i16> %add, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
+ %r = trunc <32 x i16> %shr to <32 x i8>
+ store <32 x i8> %r, ptr %res
+ ret void
+}
+
+define void @xvavg_hu(ptr %res, ptr %a, ptr %b) nounwind {
+; CHECK-LABEL: xvavg_hu:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: xvld $xr0, $a1, 0
+; CHECK-NEXT: xvld $xr1, $a2, 0
+; CHECK-NEXT: xvand.v $xr2, $xr0, $xr1
+; CHECK-NEXT: xvxor.v $xr0, $xr0, $xr1
+; CHECK-NEXT: xvsrli.h $xr0, $xr0, 1
+; CHECK-NEXT: xvadd.h $xr0, $xr2, $xr0
+; CHECK-NEXT: xvst $xr0, $a0, 0
+; CHECK-NEXT: ret
+entry:
+ %va = load <16 x i16>, ptr %a
+ %vb = load <16 x i16>, ptr %b
+ %ea = zext <16 x i16> %va to <16 x i32>
+ %eb = zext <16 x i16> %vb to <16 x i32>
+ %add = add <16 x i32> %ea, %eb
+ %shr = lshr <16 x i32> %add, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
+ %r = trunc <16 x i32> %shr to <16 x i16>
+ store <16 x i16> %r, ptr %res
+ ret void
+}
+
+define void @xvavg_wu(ptr %res, ptr %a, ptr %b) nounwind {
+; CHECK-LABEL: xvavg_wu:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: xvld $xr0, $a1, 0
+; CHECK-NEXT: xvld $xr1, $a2, 0
+; CHECK-NEXT: xvand.v $xr2, $xr0, $xr1
+; CHECK-NEXT: xvxor.v $xr0, $xr0, $xr1
+; CHECK-NEXT: xvsrli.w $xr0, $xr0, 1
+; CHECK-NEXT: xvadd.w $xr0, $xr2, $xr0
+; CHECK-NEXT: xvst $xr0, $a0, 0
+; CHECK-NEXT: ret
+entry:
+ %va = load <8 x i32>, ptr %a
+ %vb = load <8 x i32>, ptr %b
+ %ea = zext <8 x i32> %va to <8 x i64>
+ %eb = zext <8 x i32> %vb to <8 x i64>
+ %add = add <8 x i64> %ea, %eb
+ %shr = lshr <8 x i64> %add, <i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1>
+ %r = trunc <8 x i64> %shr to <8 x i32>
+ store <8 x i32> %r, ptr %res
+ ret void
+}
+
+define void @xvavg_du(ptr %res, ptr %a, ptr %b) nounwind {
+; CHECK-LABEL: xvavg_du:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: xvld $xr0, $a1, 0
+; CHECK-NEXT: xvld $xr1, $a2, 0
+; CHECK-NEXT: xvand.v $xr2, $xr0, $xr1
+; CHECK-NEXT: xvxor.v $xr0, $xr0, $xr1
+; CHECK-NEXT: xvsrli.d $xr0, $xr0, 1
+; CHECK-NEXT: xvadd.d $xr0, $xr2, $xr0
+; CHECK-NEXT: xvst $xr0, $a0, 0
+; CHECK-NEXT: ret
+entry:
+ %va = load <4 x i64>, ptr %a
+ %vb = load <4 x i64>, ptr %b
+ %ea = zext <4 x i64> %va to <4 x i128>
+ %eb = zext <4 x i64> %vb to <4 x i128>
+ %add = add <4 x i128> %ea, %eb
+ %shr = lshr <4 x i128> %add, <i128 1, i128 1, i128 1, i128 1>
+ %r = trunc <4 x i128> %shr to <4 x i64>
+ store <4 x i64> %r, ptr %res
+ ret void
+}
+
+define void @xvavgr_b(ptr %res, ptr %a, ptr %b) nounwind {
+; CHECK-LABEL: xvavgr_b:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: xvld $xr0, $a1, 0
+; CHECK-NEXT: xvld $xr1, $a2, 0
+; CHECK-NEXT: xvor.v $xr2, $xr0, $xr1
+; CHECK-NEXT: xvxor.v $xr0, $xr0, $xr1
+; CHECK-NEXT: xvsrai.b $xr0, $xr0, 1
+; CHECK-NEXT: xvsub.b $xr0, $xr2, $xr0
+; CHECK-NEXT: xvst $xr0, $a0, 0
+; CHECK-NEXT: ret
+entry:
+ %va = load <32 x i8>, ptr %a
+ %vb = load <32 x i8>, ptr %b
+ %ea = sext <32 x i8> %va to <32 x i16>
+ %eb = sext <32 x i8> %vb to <32 x i16>
+ %add = add <32 x i16> %ea, %eb
+ %add1 = add <32 x i16> %add, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
+ %shr = lshr <32 x i16> %add1, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
+ %r = trunc <32 x i16> %shr to <32 x i8>
+ store <32 x i8> %r, ptr %res
+ ret void
+}
+
+define void @xvavgr_h(ptr %res, ptr %a, ptr %b) nounwind {
+; CHECK-LABEL: xvavgr_h:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: xvld $xr0, $a1, 0
+; CHECK-NEXT: xvld $xr1, $a2, 0
+; CHECK-NEXT: xvor.v $xr2, $xr0, $xr1
+; CHECK-NEXT: xvxor.v $xr0, $xr0, $xr1
+; CHECK-NEXT: xvsrai.h $xr0, $xr0, 1
+; CHECK-NEXT: xvsub.h $xr0, $xr2, $xr0
+; CHECK-NEXT: xvst $xr0, $a0, 0
+; CHECK-NEXT: ret
+entry:
+ %va = load <16 x i16>, ptr %a
+ %vb = load <16 x i16>, ptr %b
+ %ea = sext <16 x i16> %va to <16 x i32>
+ %eb = sext <16 x i16> %vb to <16 x i32>
+ %add = add <16 x i32> %ea, %eb
+ %add1 = add <16 x i32> %add, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
+ %shr = lshr <16 x i32> %add1, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
+ %r = trunc <16 x i32> %shr to <16 x i16>
+ store <16 x i16> %r, ptr %res
+ ret void
+}
+
+define void @xvavgr_w(ptr %res, ptr %a, ptr %b) nounwind {
+; CHECK-LABEL: xvavgr_w:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: xvld $xr0, $a1, 0
+; CHECK-NEXT: xvld $xr1, $a2, 0
+; CHECK-NEXT: xvor.v $xr2, $xr0, $xr1
+; CHECK-NEXT: xvxor.v $xr0, $xr0, $xr1
+; CHECK-NEXT: xvsrai.w $xr0, $xr0, 1
+; CHECK-NEXT: xvsub.w $xr0, $xr2, $xr0
+; CHECK-NEXT: xvst $xr0, $a0, 0
+; CHECK-NEXT: ret
+entry:
+ %va = load <8 x i32>, ptr %a
+ %vb = load <8 x i32>, ptr %b
+ %ea = sext <8 x i32> %va to <8 x i64>
+ %eb = sext <8 x i32> %vb to <8 x i64>
+ %add = add <8 x i64> %ea, %eb
+ %add1 = add <8 x i64> %add, <i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1>
+ %shr = lshr <8 x i64> %add1, <i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1>
+ %r = trunc <8 x i64> %shr to <8 x i32>
+ store <8 x i32> %r, ptr %res
+ ret void
+}
+
+define void @xvavgr_d(ptr %res, ptr %a, ptr %b) nounwind {
+; CHECK-LABEL: xvavgr_d:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: xvld $xr0, $a1, 0
+; CHECK-NEXT: xvld $xr1, $a2, 0
+; CHECK-NEXT: xvor.v $xr2, $xr0, $xr1
+; CHECK-NEXT: xvxor.v $xr0, $xr0, $xr1
+; CHECK-NEXT: xvsrai.d $xr0, $xr0, 1
+; CHECK-NEXT: xvsub.d $xr0, $xr2, $xr0
+; CHECK-NEXT: xvst $xr0, $a0, 0
+; CHECK-NEXT: ret
+entry:
+ %va = load <4 x i64>, ptr %a
+ %vb = load <4 x i64>, ptr %b
+ %ea = sext <4 x i64> %va to <4 x i128>
+ %eb = sext <4 x i64> %vb to <4 x i128>
+ %add = add <4 x i128> %ea, %eb
+ %add1 = add <4 x i128> %add, <i128 1, i128 1, i128 1, i128 1>
+ %shr = lshr <4 x i128> %add1, <i128 1, i128 1, i128 1, i128 1>
+ %r = trunc <4 x i128> %shr to <4 x i64>
+ store <4 x i64> %r, ptr %res
+ ret void
+}
+
+define void @xvavgr_bu(ptr %res, ptr %a, ptr %b) nounwind {
+; CHECK-LABEL: xvavgr_bu:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: xvld $xr0, $a1, 0
+; CHECK-NEXT: xvld $xr1, $a2, 0
+; CHECK-NEXT: xvor.v $xr2, $xr0, $xr1
+; CHECK-NEXT: xvxor.v $xr0, $xr0, $xr1
+; CHECK-NEXT: xvsrli.b $xr0, $xr0, 1
+; CHECK-NEXT: xvsub.b $xr0, $xr2, $xr0
+; CHECK-NEXT: xvst $xr0, $a0, 0
+; CHECK-NEXT: ret
+entry:
+ %va = load <32 x i8>, ptr %a
+ %vb = load <32 x i8>, ptr %b
+ %ea = zext <32 x i8> %va to <32 x i16>
+ %eb = zext <32 x i8> %vb to <32 x i16>
+ %add = add <32 x i16> %ea, %eb
+ %add1 = add <32 x i16> %add, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
+ %shr = lshr <32 x i16> %add1, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
+ %r = trunc <32 x i16> %shr to <32 x i8>
+ store <32 x i8> %r, ptr %res
+ ret void
+}
+
+define void @xvavgr_hu(ptr %res, ptr %a, ptr %b) nounwind {
+; CHECK-LABEL: xvavgr_hu:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: xvld $xr0, $a1, 0
+; CHECK-NEXT: xvld $xr1, $a2, 0
+; CHECK-NEXT: xvor.v $xr2, $xr0, $xr1
+; CHECK-NEXT: xvxor.v $xr0, $xr0, $xr1
+; CHECK-NEXT: xvsrli.h $xr0, $xr0, 1
+; CHECK-NEXT: xvsub.h $xr0, $xr2, $xr0
+; CHECK-NEXT: xvst $xr0, $a0, 0
+; CHECK-NEXT: ret
+entry:
+ %va = load <16 x i16>, ptr %a
+ %vb = load <16 x i16>, ptr %b
+ %ea = zext <16 x i16> %va to <16 x i32>
+ %eb = zext <16 x i16> %vb to <16 x i32>
+ %add = add <16 x i32> %ea, %eb
+ %add1 = add <16 x i32> %add, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
+ %shr = lshr <16 x i32> %add1, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
+ %r = trunc <16 x i32> %shr to <16 x i16>
+ store <16 x i16> %r, ptr %res
+ ret void
+}
+
+define void @xvavgr_wu(ptr %res, ptr %a, ptr %b) nounwind {
+; CHECK-LABEL: xvavgr_wu:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: xvld $xr0, $a1, 0
+; CHECK-NEXT: xvld $xr1, $a2, 0
+; CHECK-NEXT: xvor.v $xr2, $xr0, $xr1
+; CHECK-NEXT: xvxor.v $xr0, $xr0, $xr1
+; CHECK-NEXT: xvsrli.w $xr0, $xr0, 1
+; CHECK-NEXT: xvsub.w $xr0, $xr2, $xr0
+; CHECK-NEXT: xvst $xr0, $a0, 0
+; CHECK-NEXT: ret
+entry:
+ %va = load <8 x i32>, ptr %a
+ %vb = load <8 x i32>, ptr %b
+ %ea = zext <8 x i32> %va to <8 x i64>
+ %eb = zext <8 x i32> %vb to <8 x i64>
+ %add = add <8 x i64> %ea, %eb
+ %add1 = add <8 x i64> %add, <i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1>
+ %shr = lshr <8 x i64> %add1, <i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1>
+ %r = trunc <8 x i64> %shr to <8 x i32>
+ store <8 x i32> %r, ptr %res
+ ret void
+}
+
+define void @xvavgr_du(ptr %res, ptr %a, ptr %b) nounwind {
+; CHECK-LABEL: xvavgr_du:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: xvld $xr0, $a1, 0
+; CHECK-NEXT: xvld $xr1, $a2, 0
+; CHECK-NEXT: xvor.v $xr2, $xr0, $xr1
+; CHECK-NEXT: xvxor.v $xr0, $xr0, $xr1
+; CHECK-NEXT: xvsrli.d $xr0, $xr0, 1
+; CHECK-NEXT: xvsub.d $xr0, $xr2, $xr0
+; CHECK-NEXT: xvst $xr0, $a0, 0
+; CHECK-NEXT: ret
+entry:
+ %va = load <4 x i64>, ptr %a
+ %vb = load <4 x i64>, ptr %b
+ %ea = zext <4 x i64> %va to <4 x i128>
+ %eb = zext <4 x i64> %vb to <4 x i128>
+ %add = add <4 x i128> %ea, %eb
+ %add1 = add <4 x i128> %add, <i128 1, i128 1, i128 1, i128 1>
+ %shr = lshr <4 x i128> %add1, <i128 1, i128 1, i128 1, i128 1>
+ %r = trunc <4 x i128> %shr to <4 x i64>
+ store <4 x i64> %r, ptr %res
+ ret void
+}
diff --git a/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/flog2.ll b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/flog2.ll
index 68f2e3a..6b5f575 100644
--- a/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/flog2.ll
+++ b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/flog2.ll
@@ -1,166 +1,17 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6
-; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lasx < %s | FileCheck %s --check-prefix=LA32
-; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s --check-prefix=LA64
+; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lasx < %s | FileCheck %s
+; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s
declare <8 x float> @llvm.log2.v8f32(<8 x float>)
declare <4 x double> @llvm.log2.v4f64(<4 x double>)
define void @flog2_v8f32(ptr %res, ptr %a) nounwind {
-; LA32-LABEL: flog2_v8f32:
-; LA32: # %bb.0: # %entry
-; LA32-NEXT: addi.w $sp, $sp, -128
-; LA32-NEXT: st.w $ra, $sp, 124 # 4-byte Folded Spill
-; LA32-NEXT: st.w $fp, $sp, 120 # 4-byte Folded Spill
-; LA32-NEXT: xvld $xr0, $a1, 0
-; LA32-NEXT: xvst $xr0, $sp, 80 # 32-byte Folded Spill
-; LA32-NEXT: move $fp, $a0
-; LA32-NEXT: xvpickve.w $xr0, $xr0, 5
-; LA32-NEXT: # kill: def $f0 killed $f0 killed $xr0
-; LA32-NEXT: bl log2f
-; LA32-NEXT: # kill: def $f0 killed $f0 def $vr0
-; LA32-NEXT: vst $vr0, $sp, 48 # 16-byte Folded Spill
-; LA32-NEXT: xvld $xr0, $sp, 80 # 32-byte Folded Reload
-; LA32-NEXT: xvpickve.w $xr0, $xr0, 4
-; LA32-NEXT: # kill: def $f0 killed $f0 killed $xr0
-; LA32-NEXT: bl log2f
-; LA32-NEXT: # kill: def $f0 killed $f0 def $xr0
-; LA32-NEXT: vld $vr1, $sp, 48 # 16-byte Folded Reload
-; LA32-NEXT: vextrins.w $vr0, $vr1, 16
-; LA32-NEXT: xvst $xr0, $sp, 48 # 32-byte Folded Spill
-; LA32-NEXT: xvld $xr0, $sp, 80 # 32-byte Folded Reload
-; LA32-NEXT: xvpickve.w $xr0, $xr0, 6
-; LA32-NEXT: # kill: def $f0 killed $f0 killed $xr0
-; LA32-NEXT: bl log2f
-; LA32-NEXT: # kill: def $f0 killed $f0 def $vr0
-; LA32-NEXT: xvld $xr1, $sp, 48 # 32-byte Folded Reload
-; LA32-NEXT: vextrins.w $vr1, $vr0, 32
-; LA32-NEXT: xvst $xr1, $sp, 48 # 32-byte Folded Spill
-; LA32-NEXT: xvld $xr0, $sp, 80 # 32-byte Folded Reload
-; LA32-NEXT: xvpickve.w $xr0, $xr0, 7
-; LA32-NEXT: # kill: def $f0 killed $f0 killed $xr0
-; LA32-NEXT: bl log2f
-; LA32-NEXT: # kill: def $f0 killed $f0 def $vr0
-; LA32-NEXT: xvld $xr1, $sp, 48 # 32-byte Folded Reload
-; LA32-NEXT: vextrins.w $vr1, $vr0, 48
-; LA32-NEXT: xvst $xr1, $sp, 48 # 32-byte Folded Spill
-; LA32-NEXT: xvld $xr0, $sp, 80 # 32-byte Folded Reload
-; LA32-NEXT: xvpickve.w $xr0, $xr0, 1
-; LA32-NEXT: # kill: def $f0 killed $f0 killed $xr0
-; LA32-NEXT: bl log2f
-; LA32-NEXT: # kill: def $f0 killed $f0 def $vr0
-; LA32-NEXT: vst $vr0, $sp, 16 # 16-byte Folded Spill
-; LA32-NEXT: xvld $xr0, $sp, 80 # 32-byte Folded Reload
-; LA32-NEXT: xvpickve.w $xr0, $xr0, 0
-; LA32-NEXT: # kill: def $f0 killed $f0 killed $xr0
-; LA32-NEXT: bl log2f
-; LA32-NEXT: # kill: def $f0 killed $f0 def $xr0
-; LA32-NEXT: vld $vr1, $sp, 16 # 16-byte Folded Reload
-; LA32-NEXT: vextrins.w $vr0, $vr1, 16
-; LA32-NEXT: xvst $xr0, $sp, 16 # 32-byte Folded Spill
-; LA32-NEXT: xvld $xr0, $sp, 80 # 32-byte Folded Reload
-; LA32-NEXT: xvpickve.w $xr0, $xr0, 2
-; LA32-NEXT: # kill: def $f0 killed $f0 killed $xr0
-; LA32-NEXT: bl log2f
-; LA32-NEXT: # kill: def $f0 killed $f0 def $vr0
-; LA32-NEXT: xvld $xr1, $sp, 16 # 32-byte Folded Reload
-; LA32-NEXT: vextrins.w $vr1, $vr0, 32
-; LA32-NEXT: xvst $xr1, $sp, 16 # 32-byte Folded Spill
-; LA32-NEXT: xvld $xr0, $sp, 80 # 32-byte Folded Reload
-; LA32-NEXT: xvpickve.w $xr0, $xr0, 3
-; LA32-NEXT: # kill: def $f0 killed $f0 killed $xr0
-; LA32-NEXT: bl log2f
-; LA32-NEXT: # kill: def $f0 killed $f0 def $vr0
-; LA32-NEXT: xvld $xr1, $sp, 16 # 32-byte Folded Reload
-; LA32-NEXT: vextrins.w $vr1, $vr0, 48
-; LA32-NEXT: xvld $xr0, $sp, 48 # 32-byte Folded Reload
-; LA32-NEXT: xvpermi.q $xr1, $xr0, 2
-; LA32-NEXT: xvst $xr1, $fp, 0
-; LA32-NEXT: ld.w $fp, $sp, 120 # 4-byte Folded Reload
-; LA32-NEXT: ld.w $ra, $sp, 124 # 4-byte Folded Reload
-; LA32-NEXT: addi.w $sp, $sp, 128
-; LA32-NEXT: ret
-;
-; LA64-LABEL: flog2_v8f32:
-; LA64: # %bb.0: # %entry
-; LA64-NEXT: addi.d $sp, $sp, -128
-; LA64-NEXT: st.d $ra, $sp, 120 # 8-byte Folded Spill
-; LA64-NEXT: st.d $fp, $sp, 112 # 8-byte Folded Spill
-; LA64-NEXT: xvld $xr0, $a1, 0
-; LA64-NEXT: xvst $xr0, $sp, 80 # 32-byte Folded Spill
-; LA64-NEXT: move $fp, $a0
-; LA64-NEXT: xvpickve.w $xr0, $xr0, 5
-; LA64-NEXT: # kill: def $f0 killed $f0 killed $xr0
-; LA64-NEXT: pcaddu18i $ra, %call36(log2f)
-; LA64-NEXT: jirl $ra, $ra, 0
-; LA64-NEXT: # kill: def $f0 killed $f0 def $vr0
-; LA64-NEXT: vst $vr0, $sp, 48 # 16-byte Folded Spill
-; LA64-NEXT: xvld $xr0, $sp, 80 # 32-byte Folded Reload
-; LA64-NEXT: xvpickve.w $xr0, $xr0, 4
-; LA64-NEXT: # kill: def $f0 killed $f0 killed $xr0
-; LA64-NEXT: pcaddu18i $ra, %call36(log2f)
-; LA64-NEXT: jirl $ra, $ra, 0
-; LA64-NEXT: # kill: def $f0 killed $f0 def $xr0
-; LA64-NEXT: vld $vr1, $sp, 48 # 16-byte Folded Reload
-; LA64-NEXT: vextrins.w $vr0, $vr1, 16
-; LA64-NEXT: xvst $xr0, $sp, 48 # 32-byte Folded Spill
-; LA64-NEXT: xvld $xr0, $sp, 80 # 32-byte Folded Reload
-; LA64-NEXT: xvpickve.w $xr0, $xr0, 6
-; LA64-NEXT: # kill: def $f0 killed $f0 killed $xr0
-; LA64-NEXT: pcaddu18i $ra, %call36(log2f)
-; LA64-NEXT: jirl $ra, $ra, 0
-; LA64-NEXT: # kill: def $f0 killed $f0 def $vr0
-; LA64-NEXT: xvld $xr1, $sp, 48 # 32-byte Folded Reload
-; LA64-NEXT: vextrins.w $vr1, $vr0, 32
-; LA64-NEXT: xvst $xr1, $sp, 48 # 32-byte Folded Spill
-; LA64-NEXT: xvld $xr0, $sp, 80 # 32-byte Folded Reload
-; LA64-NEXT: xvpickve.w $xr0, $xr0, 7
-; LA64-NEXT: # kill: def $f0 killed $f0 killed $xr0
-; LA64-NEXT: pcaddu18i $ra, %call36(log2f)
-; LA64-NEXT: jirl $ra, $ra, 0
-; LA64-NEXT: # kill: def $f0 killed $f0 def $vr0
-; LA64-NEXT: xvld $xr1, $sp, 48 # 32-byte Folded Reload
-; LA64-NEXT: vextrins.w $vr1, $vr0, 48
-; LA64-NEXT: xvst $xr1, $sp, 48 # 32-byte Folded Spill
-; LA64-NEXT: xvld $xr0, $sp, 80 # 32-byte Folded Reload
-; LA64-NEXT: xvpickve.w $xr0, $xr0, 1
-; LA64-NEXT: # kill: def $f0 killed $f0 killed $xr0
-; LA64-NEXT: pcaddu18i $ra, %call36(log2f)
-; LA64-NEXT: jirl $ra, $ra, 0
-; LA64-NEXT: # kill: def $f0 killed $f0 def $vr0
-; LA64-NEXT: vst $vr0, $sp, 16 # 16-byte Folded Spill
-; LA64-NEXT: xvld $xr0, $sp, 80 # 32-byte Folded Reload
-; LA64-NEXT: xvpickve.w $xr0, $xr0, 0
-; LA64-NEXT: # kill: def $f0 killed $f0 killed $xr0
-; LA64-NEXT: pcaddu18i $ra, %call36(log2f)
-; LA64-NEXT: jirl $ra, $ra, 0
-; LA64-NEXT: # kill: def $f0 killed $f0 def $xr0
-; LA64-NEXT: vld $vr1, $sp, 16 # 16-byte Folded Reload
-; LA64-NEXT: vextrins.w $vr0, $vr1, 16
-; LA64-NEXT: xvst $xr0, $sp, 16 # 32-byte Folded Spill
-; LA64-NEXT: xvld $xr0, $sp, 80 # 32-byte Folded Reload
-; LA64-NEXT: xvpickve.w $xr0, $xr0, 2
-; LA64-NEXT: # kill: def $f0 killed $f0 killed $xr0
-; LA64-NEXT: pcaddu18i $ra, %call36(log2f)
-; LA64-NEXT: jirl $ra, $ra, 0
-; LA64-NEXT: # kill: def $f0 killed $f0 def $vr0
-; LA64-NEXT: xvld $xr1, $sp, 16 # 32-byte Folded Reload
-; LA64-NEXT: vextrins.w $vr1, $vr0, 32
-; LA64-NEXT: xvst $xr1, $sp, 16 # 32-byte Folded Spill
-; LA64-NEXT: xvld $xr0, $sp, 80 # 32-byte Folded Reload
-; LA64-NEXT: xvpickve.w $xr0, $xr0, 3
-; LA64-NEXT: # kill: def $f0 killed $f0 killed $xr0
-; LA64-NEXT: pcaddu18i $ra, %call36(log2f)
-; LA64-NEXT: jirl $ra, $ra, 0
-; LA64-NEXT: # kill: def $f0 killed $f0 def $vr0
-; LA64-NEXT: xvld $xr1, $sp, 16 # 32-byte Folded Reload
-; LA64-NEXT: vextrins.w $vr1, $vr0, 48
-; LA64-NEXT: xvld $xr0, $sp, 48 # 32-byte Folded Reload
-; LA64-NEXT: xvpermi.q $xr1, $xr0, 2
-; LA64-NEXT: xvst $xr1, $fp, 0
-; LA64-NEXT: ld.d $fp, $sp, 112 # 8-byte Folded Reload
-; LA64-NEXT: ld.d $ra, $sp, 120 # 8-byte Folded Reload
-; LA64-NEXT: addi.d $sp, $sp, 128
-; LA64-NEXT: ret
+; CHECK-LABEL: flog2_v8f32:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: xvld $xr0, $a1, 0
+; CHECK-NEXT: xvflogb.s $xr0, $xr0
+; CHECK-NEXT: xvst $xr0, $a0, 0
+; CHECK-NEXT: ret
entry:
%v = load <8 x float>, ptr %a
%r = call <8 x float> @llvm.log2.v8f32(<8 x float> %v)
@@ -169,93 +20,12 @@ entry:
}
define void @flog2_v4f64(ptr %res, ptr %a) nounwind {
-; LA32-LABEL: flog2_v4f64:
-; LA32: # %bb.0: # %entry
-; LA32-NEXT: addi.w $sp, $sp, -112
-; LA32-NEXT: st.w $ra, $sp, 108 # 4-byte Folded Spill
-; LA32-NEXT: st.w $fp, $sp, 104 # 4-byte Folded Spill
-; LA32-NEXT: xvld $xr0, $a1, 0
-; LA32-NEXT: xvst $xr0, $sp, 64 # 32-byte Folded Spill
-; LA32-NEXT: move $fp, $a0
-; LA32-NEXT: xvpickve.d $xr0, $xr0, 3
-; LA32-NEXT: # kill: def $f0_64 killed $f0_64 killed $xr0
-; LA32-NEXT: bl log2
-; LA32-NEXT: # kill: def $f0_64 killed $f0_64 def $vr0
-; LA32-NEXT: vst $vr0, $sp, 32 # 16-byte Folded Spill
-; LA32-NEXT: xvld $xr0, $sp, 64 # 32-byte Folded Reload
-; LA32-NEXT: xvpickve.d $xr0, $xr0, 2
-; LA32-NEXT: # kill: def $f0_64 killed $f0_64 killed $xr0
-; LA32-NEXT: bl log2
-; LA32-NEXT: # kill: def $f0_64 killed $f0_64 def $xr0
-; LA32-NEXT: vld $vr1, $sp, 32 # 16-byte Folded Reload
-; LA32-NEXT: vextrins.d $vr0, $vr1, 16
-; LA32-NEXT: xvst $xr0, $sp, 32 # 32-byte Folded Spill
-; LA32-NEXT: xvld $xr0, $sp, 64 # 32-byte Folded Reload
-; LA32-NEXT: xvpickve.d $xr0, $xr0, 1
-; LA32-NEXT: # kill: def $f0_64 killed $f0_64 killed $xr0
-; LA32-NEXT: bl log2
-; LA32-NEXT: # kill: def $f0_64 killed $f0_64 def $vr0
-; LA32-NEXT: vst $vr0, $sp, 16 # 16-byte Folded Spill
-; LA32-NEXT: xvld $xr0, $sp, 64 # 32-byte Folded Reload
-; LA32-NEXT: xvpickve.d $xr0, $xr0, 0
-; LA32-NEXT: # kill: def $f0_64 killed $f0_64 killed $xr0
-; LA32-NEXT: bl log2
-; LA32-NEXT: # kill: def $f0_64 killed $f0_64 def $xr0
-; LA32-NEXT: vld $vr1, $sp, 16 # 16-byte Folded Reload
-; LA32-NEXT: vextrins.d $vr0, $vr1, 16
-; LA32-NEXT: xvld $xr1, $sp, 32 # 32-byte Folded Reload
-; LA32-NEXT: xvpermi.q $xr0, $xr1, 2
-; LA32-NEXT: xvst $xr0, $fp, 0
-; LA32-NEXT: ld.w $fp, $sp, 104 # 4-byte Folded Reload
-; LA32-NEXT: ld.w $ra, $sp, 108 # 4-byte Folded Reload
-; LA32-NEXT: addi.w $sp, $sp, 112
-; LA32-NEXT: ret
-;
-; LA64-LABEL: flog2_v4f64:
-; LA64: # %bb.0: # %entry
-; LA64-NEXT: addi.d $sp, $sp, -112
-; LA64-NEXT: st.d $ra, $sp, 104 # 8-byte Folded Spill
-; LA64-NEXT: st.d $fp, $sp, 96 # 8-byte Folded Spill
-; LA64-NEXT: xvld $xr0, $a1, 0
-; LA64-NEXT: xvst $xr0, $sp, 64 # 32-byte Folded Spill
-; LA64-NEXT: move $fp, $a0
-; LA64-NEXT: xvpickve.d $xr0, $xr0, 3
-; LA64-NEXT: # kill: def $f0_64 killed $f0_64 killed $xr0
-; LA64-NEXT: pcaddu18i $ra, %call36(log2)
-; LA64-NEXT: jirl $ra, $ra, 0
-; LA64-NEXT: # kill: def $f0_64 killed $f0_64 def $vr0
-; LA64-NEXT: vst $vr0, $sp, 32 # 16-byte Folded Spill
-; LA64-NEXT: xvld $xr0, $sp, 64 # 32-byte Folded Reload
-; LA64-NEXT: xvpickve.d $xr0, $xr0, 2
-; LA64-NEXT: # kill: def $f0_64 killed $f0_64 killed $xr0
-; LA64-NEXT: pcaddu18i $ra, %call36(log2)
-; LA64-NEXT: jirl $ra, $ra, 0
-; LA64-NEXT: # kill: def $f0_64 killed $f0_64 def $xr0
-; LA64-NEXT: vld $vr1, $sp, 32 # 16-byte Folded Reload
-; LA64-NEXT: vextrins.d $vr0, $vr1, 16
-; LA64-NEXT: xvst $xr0, $sp, 32 # 32-byte Folded Spill
-; LA64-NEXT: xvld $xr0, $sp, 64 # 32-byte Folded Reload
-; LA64-NEXT: xvpickve.d $xr0, $xr0, 1
-; LA64-NEXT: # kill: def $f0_64 killed $f0_64 killed $xr0
-; LA64-NEXT: pcaddu18i $ra, %call36(log2)
-; LA64-NEXT: jirl $ra, $ra, 0
-; LA64-NEXT: # kill: def $f0_64 killed $f0_64 def $vr0
-; LA64-NEXT: vst $vr0, $sp, 16 # 16-byte Folded Spill
-; LA64-NEXT: xvld $xr0, $sp, 64 # 32-byte Folded Reload
-; LA64-NEXT: xvpickve.d $xr0, $xr0, 0
-; LA64-NEXT: # kill: def $f0_64 killed $f0_64 killed $xr0
-; LA64-NEXT: pcaddu18i $ra, %call36(log2)
-; LA64-NEXT: jirl $ra, $ra, 0
-; LA64-NEXT: # kill: def $f0_64 killed $f0_64 def $xr0
-; LA64-NEXT: vld $vr1, $sp, 16 # 16-byte Folded Reload
-; LA64-NEXT: vextrins.d $vr0, $vr1, 16
-; LA64-NEXT: xvld $xr1, $sp, 32 # 32-byte Folded Reload
-; LA64-NEXT: xvpermi.q $xr0, $xr1, 2
-; LA64-NEXT: xvst $xr0, $fp, 0
-; LA64-NEXT: ld.d $fp, $sp, 96 # 8-byte Folded Reload
-; LA64-NEXT: ld.d $ra, $sp, 104 # 8-byte Folded Reload
-; LA64-NEXT: addi.d $sp, $sp, 112
-; LA64-NEXT: ret
+; CHECK-LABEL: flog2_v4f64:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: xvld $xr0, $a1, 0
+; CHECK-NEXT: xvflogb.d $xr0, $xr0
+; CHECK-NEXT: xvst $xr0, $a0, 0
+; CHECK-NEXT: ret
entry:
%v = load <4 x double>, ptr %a
%r = call <4 x double> @llvm.log2.v4f64(<4 x double> %v)
diff --git a/llvm/test/CodeGen/LoongArch/lsx/fp-rounding.ll b/llvm/test/CodeGen/LoongArch/lsx/fp-rounding.ll
new file mode 100644
index 0000000..1ca6290
--- /dev/null
+++ b/llvm/test/CodeGen/LoongArch/lsx/fp-rounding.ll
@@ -0,0 +1,212 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6
+; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lsx < %s | FileCheck %s
+; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s
+
+;; ceilf
+define void @ceil_v4f32(ptr %res, ptr %a0) nounwind {
+; CHECK-LABEL: ceil_v4f32:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vld $vr0, $a1, 0
+; CHECK-NEXT: vreplvei.w $vr1, $vr0, 1
+; CHECK-NEXT: vreplvei.w $vr1, $vr1, 0
+; CHECK-NEXT: vfrintrp.s $vr1, $vr1
+; CHECK-NEXT: vreplvei.w $vr2, $vr0, 0
+; CHECK-NEXT: vreplvei.w $vr2, $vr2, 0
+; CHECK-NEXT: vfrintrp.s $vr2, $vr2
+; CHECK-NEXT: vextrins.w $vr2, $vr1, 16
+; CHECK-NEXT: vreplvei.w $vr1, $vr0, 2
+; CHECK-NEXT: vreplvei.w $vr1, $vr1, 0
+; CHECK-NEXT: vfrintrp.s $vr1, $vr1
+; CHECK-NEXT: vextrins.w $vr2, $vr1, 32
+; CHECK-NEXT: vreplvei.w $vr0, $vr0, 3
+; CHECK-NEXT: vreplvei.w $vr0, $vr0, 0
+; CHECK-NEXT: vfrintrp.s $vr0, $vr0
+; CHECK-NEXT: vextrins.w $vr2, $vr0, 48
+; CHECK-NEXT: vst $vr2, $a0, 0
+; CHECK-NEXT: ret
+entry:
+ %v0 = load <4 x float>, ptr %a0
+ %r = call <4 x float> @llvm.ceil.v4f32(<4 x float> %v0)
+ store <4 x float> %r, ptr %res
+ ret void
+}
+
+;; ceil
+define void @ceil_v2f64(ptr %res, ptr %a0) nounwind {
+; CHECK-LABEL: ceil_v2f64:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vld $vr0, $a1, 0
+; CHECK-NEXT: vreplvei.d $vr1, $vr0, 1
+; CHECK-NEXT: vreplvei.d $vr1, $vr1, 0
+; CHECK-NEXT: vfrintrp.d $vr1, $vr1
+; CHECK-NEXT: vreplvei.d $vr0, $vr0, 0
+; CHECK-NEXT: vreplvei.d $vr0, $vr0, 0
+; CHECK-NEXT: vfrintrp.d $vr0, $vr0
+; CHECK-NEXT: vextrins.d $vr0, $vr1, 16
+; CHECK-NEXT: vst $vr0, $a0, 0
+; CHECK-NEXT: ret
+entry:
+ %v0 = load <2 x double>, ptr %a0
+ %r = call <2 x double> @llvm.ceil.v2f64(<2 x double> %v0)
+ store <2 x double> %r, ptr %res
+ ret void
+}
+
+;; floorf
+define void @floor_v4f32(ptr %res, ptr %a0) nounwind {
+; CHECK-LABEL: floor_v4f32:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vld $vr0, $a1, 0
+; CHECK-NEXT: vreplvei.w $vr1, $vr0, 1
+; CHECK-NEXT: vreplvei.w $vr1, $vr1, 0
+; CHECK-NEXT: vfrintrm.s $vr1, $vr1
+; CHECK-NEXT: vreplvei.w $vr2, $vr0, 0
+; CHECK-NEXT: vreplvei.w $vr2, $vr2, 0
+; CHECK-NEXT: vfrintrm.s $vr2, $vr2
+; CHECK-NEXT: vextrins.w $vr2, $vr1, 16
+; CHECK-NEXT: vreplvei.w $vr1, $vr0, 2
+; CHECK-NEXT: vreplvei.w $vr1, $vr1, 0
+; CHECK-NEXT: vfrintrm.s $vr1, $vr1
+; CHECK-NEXT: vextrins.w $vr2, $vr1, 32
+; CHECK-NEXT: vreplvei.w $vr0, $vr0, 3
+; CHECK-NEXT: vreplvei.w $vr0, $vr0, 0
+; CHECK-NEXT: vfrintrm.s $vr0, $vr0
+; CHECK-NEXT: vextrins.w $vr2, $vr0, 48
+; CHECK-NEXT: vst $vr2, $a0, 0
+; CHECK-NEXT: ret
+entry:
+ %v0 = load <4 x float>, ptr %a0
+ %r = call <4 x float> @llvm.floor.v4f32(<4 x float> %v0)
+ store <4 x float> %r, ptr %res
+ ret void
+}
+
+;; floor
+define void @floor_v2f64(ptr %res, ptr %a0) nounwind {
+; CHECK-LABEL: floor_v2f64:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vld $vr0, $a1, 0
+; CHECK-NEXT: vreplvei.d $vr1, $vr0, 1
+; CHECK-NEXT: vreplvei.d $vr1, $vr1, 0
+; CHECK-NEXT: vfrintrm.d $vr1, $vr1
+; CHECK-NEXT: vreplvei.d $vr0, $vr0, 0
+; CHECK-NEXT: vreplvei.d $vr0, $vr0, 0
+; CHECK-NEXT: vfrintrm.d $vr0, $vr0
+; CHECK-NEXT: vextrins.d $vr0, $vr1, 16
+; CHECK-NEXT: vst $vr0, $a0, 0
+; CHECK-NEXT: ret
+entry:
+ %v0 = load <2 x double>, ptr %a0
+ %r = call <2 x double> @llvm.floor.v2f64(<2 x double> %v0)
+ store <2 x double> %r, ptr %res
+ ret void
+}
+
+;; truncf
+define void @trunc_v4f32(ptr %res, ptr %a0) nounwind {
+; CHECK-LABEL: trunc_v4f32:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vld $vr0, $a1, 0
+; CHECK-NEXT: vreplvei.w $vr1, $vr0, 1
+; CHECK-NEXT: vreplvei.w $vr1, $vr1, 0
+; CHECK-NEXT: vfrintrz.s $vr1, $vr1
+; CHECK-NEXT: vreplvei.w $vr2, $vr0, 0
+; CHECK-NEXT: vreplvei.w $vr2, $vr2, 0
+; CHECK-NEXT: vfrintrz.s $vr2, $vr2
+; CHECK-NEXT: vextrins.w $vr2, $vr1, 16
+; CHECK-NEXT: vreplvei.w $vr1, $vr0, 2
+; CHECK-NEXT: vreplvei.w $vr1, $vr1, 0
+; CHECK-NEXT: vfrintrz.s $vr1, $vr1
+; CHECK-NEXT: vextrins.w $vr2, $vr1, 32
+; CHECK-NEXT: vreplvei.w $vr0, $vr0, 3
+; CHECK-NEXT: vreplvei.w $vr0, $vr0, 0
+; CHECK-NEXT: vfrintrz.s $vr0, $vr0
+; CHECK-NEXT: vextrins.w $vr2, $vr0, 48
+; CHECK-NEXT: vst $vr2, $a0, 0
+; CHECK-NEXT: ret
+entry:
+ %v0 = load <4 x float>, ptr %a0
+ %r = call <4 x float> @llvm.trunc.v4f32(<4 x float> %v0)
+ store <4 x float> %r, ptr %res
+ ret void
+}
+
+;; trunc
+define void @trunc_v2f64(ptr %res, ptr %a0) nounwind {
+; CHECK-LABEL: trunc_v2f64:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vld $vr0, $a1, 0
+; CHECK-NEXT: vreplvei.d $vr1, $vr0, 1
+; CHECK-NEXT: vreplvei.d $vr1, $vr1, 0
+; CHECK-NEXT: vfrintrz.d $vr1, $vr1
+; CHECK-NEXT: vreplvei.d $vr0, $vr0, 0
+; CHECK-NEXT: vreplvei.d $vr0, $vr0, 0
+; CHECK-NEXT: vfrintrz.d $vr0, $vr0
+; CHECK-NEXT: vextrins.d $vr0, $vr1, 16
+; CHECK-NEXT: vst $vr0, $a0, 0
+; CHECK-NEXT: ret
+entry:
+ %v0 = load <2 x double>, ptr %a0
+ %r = call <2 x double> @llvm.trunc.v2f64(<2 x double> %v0)
+ store <2 x double> %r, ptr %res
+ ret void
+}
+
+;; roundevenf
+define void @roundeven_v4f32(ptr %res, ptr %a0) nounwind {
+; CHECK-LABEL: roundeven_v4f32:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vld $vr0, $a1, 0
+; CHECK-NEXT: vreplvei.w $vr1, $vr0, 1
+; CHECK-NEXT: vreplvei.w $vr1, $vr1, 0
+; CHECK-NEXT: vfrintrne.s $vr1, $vr1
+; CHECK-NEXT: vreplvei.w $vr2, $vr0, 0
+; CHECK-NEXT: vreplvei.w $vr2, $vr2, 0
+; CHECK-NEXT: vfrintrne.s $vr2, $vr2
+; CHECK-NEXT: vextrins.w $vr2, $vr1, 16
+; CHECK-NEXT: vreplvei.w $vr1, $vr0, 2
+; CHECK-NEXT: vreplvei.w $vr1, $vr1, 0
+; CHECK-NEXT: vfrintrne.s $vr1, $vr1
+; CHECK-NEXT: vextrins.w $vr2, $vr1, 32
+; CHECK-NEXT: vreplvei.w $vr0, $vr0, 3
+; CHECK-NEXT: vreplvei.w $vr0, $vr0, 0
+; CHECK-NEXT: vfrintrne.s $vr0, $vr0
+; CHECK-NEXT: vextrins.w $vr2, $vr0, 48
+; CHECK-NEXT: vst $vr2, $a0, 0
+; CHECK-NEXT: ret
+entry:
+ %v0 = load <4 x float>, ptr %a0
+ %r = call <4 x float> @llvm.roundeven.v4f32(<4 x float> %v0)
+ store <4 x float> %r, ptr %res
+ ret void
+}
+
+;; roundeven
+define void @roundeven_v2f64(ptr %res, ptr %a0) nounwind {
+; CHECK-LABEL: roundeven_v2f64:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vld $vr0, $a1, 0
+; CHECK-NEXT: vreplvei.d $vr1, $vr0, 1
+; CHECK-NEXT: vreplvei.d $vr1, $vr1, 0
+; CHECK-NEXT: vfrintrne.d $vr1, $vr1
+; CHECK-NEXT: vreplvei.d $vr0, $vr0, 0
+; CHECK-NEXT: vreplvei.d $vr0, $vr0, 0
+; CHECK-NEXT: vfrintrne.d $vr0, $vr0
+; CHECK-NEXT: vextrins.d $vr0, $vr1, 16
+; CHECK-NEXT: vst $vr0, $a0, 0
+; CHECK-NEXT: ret
+entry:
+ %v0 = load <2 x double>, ptr %a0
+ %r = call <2 x double> @llvm.roundeven.v2f64(<2 x double> %v0)
+ store <2 x double> %r, ptr %res
+ ret void
+}
+
+declare <4 x float> @llvm.ceil.v4f32(<4 x float>)
+declare <2 x double> @llvm.ceil.v2f64(<2 x double>)
+declare <4 x float> @llvm.floor.v4f32(<4 x float>)
+declare <2 x double> @llvm.floor.v2f64(<2 x double>)
+declare <4 x float> @llvm.trunc.v4f32(<4 x float>)
+declare <2 x double> @llvm.trunc.v2f64(<2 x double>)
+declare <4 x float> @llvm.roundeven.v4f32(<4 x float>)
+declare <2 x double> @llvm.roundeven.v2f64(<2 x double>)
diff --git a/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/avg.ll b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/avg.ll
index 20b88984..334af22 100644
--- a/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/avg.ll
+++ b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/avg.ll
@@ -1,14 +1,13 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6
-; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lsx < %s | FileCheck %s
-; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s
+; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lsx < %s | FileCheck %s --check-prefixes=CHECK,LA32
+; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s --check-prefixes=CHECK,LA64
define void @vavg_b(ptr %res, ptr %a, ptr %b) nounwind {
; CHECK-LABEL: vavg_b:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vld $vr0, $a1, 0
; CHECK-NEXT: vld $vr1, $a2, 0
-; CHECK-NEXT: vadd.b $vr0, $vr0, $vr1
-; CHECK-NEXT: vsrai.b $vr0, $vr0, 1
+; CHECK-NEXT: vavg.b $vr0, $vr0, $vr1
; CHECK-NEXT: vst $vr0, $a0, 0
; CHECK-NEXT: ret
entry:
@@ -25,8 +24,7 @@ define void @vavg_h(ptr %res, ptr %a, ptr %b) nounwind {
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vld $vr0, $a1, 0
; CHECK-NEXT: vld $vr1, $a2, 0
-; CHECK-NEXT: vadd.h $vr0, $vr0, $vr1
-; CHECK-NEXT: vsrai.h $vr0, $vr0, 1
+; CHECK-NEXT: vavg.h $vr0, $vr0, $vr1
; CHECK-NEXT: vst $vr0, $a0, 0
; CHECK-NEXT: ret
entry:
@@ -43,8 +41,7 @@ define void @vavg_w(ptr %res, ptr %a, ptr %b) nounwind {
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vld $vr0, $a1, 0
; CHECK-NEXT: vld $vr1, $a2, 0
-; CHECK-NEXT: vadd.w $vr0, $vr0, $vr1
-; CHECK-NEXT: vsrai.w $vr0, $vr0, 1
+; CHECK-NEXT: vavg.w $vr0, $vr0, $vr1
; CHECK-NEXT: vst $vr0, $a0, 0
; CHECK-NEXT: ret
entry:
@@ -57,14 +54,22 @@ entry:
}
define void @vavg_d(ptr %res, ptr %a, ptr %b) nounwind {
-; CHECK-LABEL: vavg_d:
-; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vld $vr0, $a1, 0
-; CHECK-NEXT: vld $vr1, $a2, 0
-; CHECK-NEXT: vadd.d $vr0, $vr0, $vr1
-; CHECK-NEXT: vsrai.d $vr0, $vr0, 1
-; CHECK-NEXT: vst $vr0, $a0, 0
-; CHECK-NEXT: ret
+; LA32-LABEL: vavg_d:
+; LA32: # %bb.0: # %entry
+; LA32-NEXT: vld $vr0, $a1, 0
+; LA32-NEXT: vld $vr1, $a2, 0
+; LA32-NEXT: vadd.d $vr0, $vr0, $vr1
+; LA32-NEXT: vsrai.d $vr0, $vr0, 1
+; LA32-NEXT: vst $vr0, $a0, 0
+; LA32-NEXT: ret
+;
+; LA64-LABEL: vavg_d:
+; LA64: # %bb.0: # %entry
+; LA64-NEXT: vld $vr0, $a1, 0
+; LA64-NEXT: vld $vr1, $a2, 0
+; LA64-NEXT: vavg.d $vr0, $vr0, $vr1
+; LA64-NEXT: vst $vr0, $a0, 0
+; LA64-NEXT: ret
entry:
%va = load <2 x i64>, ptr %a
%vb = load <2 x i64>, ptr %b
@@ -79,8 +84,7 @@ define void @vavg_bu(ptr %res, ptr %a, ptr %b) nounwind {
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vld $vr0, $a1, 0
; CHECK-NEXT: vld $vr1, $a2, 0
-; CHECK-NEXT: vadd.b $vr0, $vr0, $vr1
-; CHECK-NEXT: vsrli.b $vr0, $vr0, 1
+; CHECK-NEXT: vavg.bu $vr0, $vr0, $vr1
; CHECK-NEXT: vst $vr0, $a0, 0
; CHECK-NEXT: ret
entry:
@@ -97,8 +101,7 @@ define void @vavg_hu(ptr %res, ptr %a, ptr %b) nounwind {
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vld $vr0, $a1, 0
; CHECK-NEXT: vld $vr1, $a2, 0
-; CHECK-NEXT: vadd.h $vr0, $vr0, $vr1
-; CHECK-NEXT: vsrli.h $vr0, $vr0, 1
+; CHECK-NEXT: vavg.hu $vr0, $vr0, $vr1
; CHECK-NEXT: vst $vr0, $a0, 0
; CHECK-NEXT: ret
entry:
@@ -115,8 +118,7 @@ define void @vavg_wu(ptr %res, ptr %a, ptr %b) nounwind {
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vld $vr0, $a1, 0
; CHECK-NEXT: vld $vr1, $a2, 0
-; CHECK-NEXT: vadd.w $vr0, $vr0, $vr1
-; CHECK-NEXT: vsrli.w $vr0, $vr0, 1
+; CHECK-NEXT: vavg.wu $vr0, $vr0, $vr1
; CHECK-NEXT: vst $vr0, $a0, 0
; CHECK-NEXT: ret
entry:
@@ -129,14 +131,22 @@ entry:
}
define void @vavg_du(ptr %res, ptr %a, ptr %b) nounwind {
-; CHECK-LABEL: vavg_du:
-; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vld $vr0, $a1, 0
-; CHECK-NEXT: vld $vr1, $a2, 0
-; CHECK-NEXT: vadd.d $vr0, $vr0, $vr1
-; CHECK-NEXT: vsrli.d $vr0, $vr0, 1
-; CHECK-NEXT: vst $vr0, $a0, 0
-; CHECK-NEXT: ret
+; LA32-LABEL: vavg_du:
+; LA32: # %bb.0: # %entry
+; LA32-NEXT: vld $vr0, $a1, 0
+; LA32-NEXT: vld $vr1, $a2, 0
+; LA32-NEXT: vadd.d $vr0, $vr0, $vr1
+; LA32-NEXT: vsrli.d $vr0, $vr0, 1
+; LA32-NEXT: vst $vr0, $a0, 0
+; LA32-NEXT: ret
+;
+; LA64-LABEL: vavg_du:
+; LA64: # %bb.0: # %entry
+; LA64-NEXT: vld $vr0, $a1, 0
+; LA64-NEXT: vld $vr1, $a2, 0
+; LA64-NEXT: vavg.du $vr0, $vr0, $vr1
+; LA64-NEXT: vst $vr0, $a0, 0
+; LA64-NEXT: ret
entry:
%va = load <2 x i64>, ptr %a
%vb = load <2 x i64>, ptr %b
@@ -151,9 +161,7 @@ define void @vavgr_b(ptr %res, ptr %a, ptr %b) nounwind {
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vld $vr0, $a1, 0
; CHECK-NEXT: vld $vr1, $a2, 0
-; CHECK-NEXT: vadd.b $vr0, $vr0, $vr1
-; CHECK-NEXT: vaddi.bu $vr0, $vr0, 1
-; CHECK-NEXT: vsrai.b $vr0, $vr0, 1
+; CHECK-NEXT: vavgr.b $vr0, $vr0, $vr1
; CHECK-NEXT: vst $vr0, $a0, 0
; CHECK-NEXT: ret
entry:
@@ -171,9 +179,7 @@ define void @vavgr_h(ptr %res, ptr %a, ptr %b) nounwind {
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vld $vr0, $a1, 0
; CHECK-NEXT: vld $vr1, $a2, 0
-; CHECK-NEXT: vadd.h $vr0, $vr0, $vr1
-; CHECK-NEXT: vaddi.hu $vr0, $vr0, 1
-; CHECK-NEXT: vsrai.h $vr0, $vr0, 1
+; CHECK-NEXT: vavgr.h $vr0, $vr0, $vr1
; CHECK-NEXT: vst $vr0, $a0, 0
; CHECK-NEXT: ret
entry:
@@ -191,9 +197,7 @@ define void @vavgr_w(ptr %res, ptr %a, ptr %b) nounwind {
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vld $vr0, $a1, 0
; CHECK-NEXT: vld $vr1, $a2, 0
-; CHECK-NEXT: vadd.w $vr0, $vr0, $vr1
-; CHECK-NEXT: vaddi.wu $vr0, $vr0, 1
-; CHECK-NEXT: vsrai.w $vr0, $vr0, 1
+; CHECK-NEXT: vavgr.w $vr0, $vr0, $vr1
; CHECK-NEXT: vst $vr0, $a0, 0
; CHECK-NEXT: ret
entry:
@@ -207,15 +211,23 @@ entry:
}
define void @vavgr_d(ptr %res, ptr %a, ptr %b) nounwind {
-; CHECK-LABEL: vavgr_d:
-; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vld $vr0, $a1, 0
-; CHECK-NEXT: vld $vr1, $a2, 0
-; CHECK-NEXT: vadd.d $vr0, $vr0, $vr1
-; CHECK-NEXT: vaddi.du $vr0, $vr0, 1
-; CHECK-NEXT: vsrai.d $vr0, $vr0, 1
-; CHECK-NEXT: vst $vr0, $a0, 0
-; CHECK-NEXT: ret
+; LA32-LABEL: vavgr_d:
+; LA32: # %bb.0: # %entry
+; LA32-NEXT: vld $vr0, $a1, 0
+; LA32-NEXT: vld $vr1, $a2, 0
+; LA32-NEXT: vadd.d $vr0, $vr0, $vr1
+; LA32-NEXT: vaddi.du $vr0, $vr0, 1
+; LA32-NEXT: vsrai.d $vr0, $vr0, 1
+; LA32-NEXT: vst $vr0, $a0, 0
+; LA32-NEXT: ret
+;
+; LA64-LABEL: vavgr_d:
+; LA64: # %bb.0: # %entry
+; LA64-NEXT: vld $vr0, $a1, 0
+; LA64-NEXT: vld $vr1, $a2, 0
+; LA64-NEXT: vavgr.d $vr0, $vr0, $vr1
+; LA64-NEXT: vst $vr0, $a0, 0
+; LA64-NEXT: ret
entry:
%va = load <2 x i64>, ptr %a
%vb = load <2 x i64>, ptr %b
@@ -231,9 +243,7 @@ define void @vavgr_bu(ptr %res, ptr %a, ptr %b) nounwind {
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vld $vr0, $a1, 0
; CHECK-NEXT: vld $vr1, $a2, 0
-; CHECK-NEXT: vadd.b $vr0, $vr0, $vr1
-; CHECK-NEXT: vaddi.bu $vr0, $vr0, 1
-; CHECK-NEXT: vsrli.b $vr0, $vr0, 1
+; CHECK-NEXT: vavgr.bu $vr0, $vr0, $vr1
; CHECK-NEXT: vst $vr0, $a0, 0
; CHECK-NEXT: ret
entry:
@@ -251,9 +261,7 @@ define void @vavgr_hu(ptr %res, ptr %a, ptr %b) nounwind {
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vld $vr0, $a1, 0
; CHECK-NEXT: vld $vr1, $a2, 0
-; CHECK-NEXT: vadd.h $vr0, $vr0, $vr1
-; CHECK-NEXT: vaddi.hu $vr0, $vr0, 1
-; CHECK-NEXT: vsrli.h $vr0, $vr0, 1
+; CHECK-NEXT: vavgr.hu $vr0, $vr0, $vr1
; CHECK-NEXT: vst $vr0, $a0, 0
; CHECK-NEXT: ret
entry:
@@ -271,9 +279,7 @@ define void @vavgr_wu(ptr %res, ptr %a, ptr %b) nounwind {
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vld $vr0, $a1, 0
; CHECK-NEXT: vld $vr1, $a2, 0
-; CHECK-NEXT: vadd.w $vr0, $vr0, $vr1
-; CHECK-NEXT: vaddi.wu $vr0, $vr0, 1
-; CHECK-NEXT: vsrli.w $vr0, $vr0, 1
+; CHECK-NEXT: vavgr.wu $vr0, $vr0, $vr1
; CHECK-NEXT: vst $vr0, $a0, 0
; CHECK-NEXT: ret
entry:
@@ -287,15 +293,23 @@ entry:
}
define void @vavgr_du(ptr %res, ptr %a, ptr %b) nounwind {
-; CHECK-LABEL: vavgr_du:
-; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vld $vr0, $a1, 0
-; CHECK-NEXT: vld $vr1, $a2, 0
-; CHECK-NEXT: vadd.d $vr0, $vr0, $vr1
-; CHECK-NEXT: vaddi.du $vr0, $vr0, 1
-; CHECK-NEXT: vsrli.d $vr0, $vr0, 1
-; CHECK-NEXT: vst $vr0, $a0, 0
-; CHECK-NEXT: ret
+; LA32-LABEL: vavgr_du:
+; LA32: # %bb.0: # %entry
+; LA32-NEXT: vld $vr0, $a1, 0
+; LA32-NEXT: vld $vr1, $a2, 0
+; LA32-NEXT: vadd.d $vr0, $vr0, $vr1
+; LA32-NEXT: vaddi.du $vr0, $vr0, 1
+; LA32-NEXT: vsrli.d $vr0, $vr0, 1
+; LA32-NEXT: vst $vr0, $a0, 0
+; LA32-NEXT: ret
+;
+; LA64-LABEL: vavgr_du:
+; LA64: # %bb.0: # %entry
+; LA64-NEXT: vld $vr0, $a1, 0
+; LA64-NEXT: vld $vr1, $a2, 0
+; LA64-NEXT: vavgr.du $vr0, $vr0, $vr1
+; LA64-NEXT: vst $vr0, $a0, 0
+; LA64-NEXT: ret
entry:
%va = load <2 x i64>, ptr %a
%vb = load <2 x i64>, ptr %b
diff --git a/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/avgfloor-ceil.ll b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/avgfloor-ceil.ll
new file mode 100644
index 0000000..bb4df64
--- /dev/null
+++ b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/avgfloor-ceil.ll
@@ -0,0 +1,379 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6
+; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lsx < %s | FileCheck %s
+; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s
+
+define void @vavg_b(ptr %res, ptr %a, ptr %b) nounwind {
+; CHECK-LABEL: vavg_b:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vld $vr0, $a1, 0
+; CHECK-NEXT: vld $vr1, $a2, 0
+; CHECK-NEXT: vand.v $vr2, $vr0, $vr1
+; CHECK-NEXT: vxor.v $vr0, $vr0, $vr1
+; CHECK-NEXT: vsrai.b $vr0, $vr0, 1
+; CHECK-NEXT: vadd.b $vr0, $vr2, $vr0
+; CHECK-NEXT: vst $vr0, $a0, 0
+; CHECK-NEXT: ret
+entry:
+ %va = load <16 x i8>, ptr %a
+ %vb = load <16 x i8>, ptr %b
+ %ea = sext <16 x i8> %va to <16 x i16>
+ %eb = sext <16 x i8> %vb to <16 x i16>
+ %add = add <16 x i16> %ea, %eb
+ %shr = lshr <16 x i16> %add, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
+ %r = trunc <16 x i16> %shr to <16 x i8>
+ store <16 x i8> %r, ptr %res
+ ret void
+}
+
+define void @vavg_h(ptr %res, ptr %a, ptr %b) nounwind {
+; CHECK-LABEL: vavg_h:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vld $vr0, $a1, 0
+; CHECK-NEXT: vld $vr1, $a2, 0
+; CHECK-NEXT: vand.v $vr2, $vr0, $vr1
+; CHECK-NEXT: vxor.v $vr0, $vr0, $vr1
+; CHECK-NEXT: vsrai.h $vr0, $vr0, 1
+; CHECK-NEXT: vadd.h $vr0, $vr2, $vr0
+; CHECK-NEXT: vst $vr0, $a0, 0
+; CHECK-NEXT: ret
+entry:
+ %va = load <8 x i16>, ptr %a
+ %vb = load <8 x i16>, ptr %b
+ %ea = sext <8 x i16> %va to <8 x i32>
+ %eb = sext <8 x i16> %vb to <8 x i32>
+ %add = add <8 x i32> %ea, %eb
+ %shr = lshr <8 x i32> %add, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
+ %r = trunc <8 x i32> %shr to <8 x i16>
+ store <8 x i16> %r, ptr %res
+ ret void
+}
+
+define void @vavg_w(ptr %res, ptr %a, ptr %b) nounwind {
+; CHECK-LABEL: vavg_w:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vld $vr0, $a1, 0
+; CHECK-NEXT: vld $vr1, $a2, 0
+; CHECK-NEXT: vand.v $vr2, $vr0, $vr1
+; CHECK-NEXT: vxor.v $vr0, $vr0, $vr1
+; CHECK-NEXT: vsrai.w $vr0, $vr0, 1
+; CHECK-NEXT: vadd.w $vr0, $vr2, $vr0
+; CHECK-NEXT: vst $vr0, $a0, 0
+; CHECK-NEXT: ret
+entry:
+ %va = load <4 x i32>, ptr %a
+ %vb = load <4 x i32>, ptr %b
+ %ea = sext <4 x i32> %va to <4 x i64>
+ %eb = sext <4 x i32> %vb to <4 x i64>
+ %add = add <4 x i64> %ea, %eb
+ %shr = lshr <4 x i64> %add, <i64 1, i64 1, i64 1, i64 1>
+ %r = trunc <4 x i64> %shr to <4 x i32>
+ store <4 x i32> %r, ptr %res
+ ret void
+}
+
+define void @vavg_d(ptr %res, ptr %a, ptr %b) nounwind {
+; CHECK-LABEL: vavg_d:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vld $vr0, $a1, 0
+; CHECK-NEXT: vld $vr1, $a2, 0
+; CHECK-NEXT: vand.v $vr2, $vr0, $vr1
+; CHECK-NEXT: vxor.v $vr0, $vr0, $vr1
+; CHECK-NEXT: vsrai.d $vr0, $vr0, 1
+; CHECK-NEXT: vadd.d $vr0, $vr2, $vr0
+; CHECK-NEXT: vst $vr0, $a0, 0
+; CHECK-NEXT: ret
+entry:
+ %va = load <2 x i64>, ptr %a
+ %vb = load <2 x i64>, ptr %b
+ %ea = sext <2 x i64> %va to <2 x i128>
+ %eb = sext <2 x i64> %vb to <2 x i128>
+ %add = add <2 x i128> %ea, %eb
+ %shr = lshr <2 x i128> %add, <i128 1, i128 1>
+ %r = trunc <2 x i128> %shr to <2 x i64>
+ store <2 x i64> %r, ptr %res
+ ret void
+}
+
+define void @vavg_bu(ptr %res, ptr %a, ptr %b) nounwind {
+; CHECK-LABEL: vavg_bu:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vld $vr0, $a1, 0
+; CHECK-NEXT: vld $vr1, $a2, 0
+; CHECK-NEXT: vand.v $vr2, $vr0, $vr1
+; CHECK-NEXT: vxor.v $vr0, $vr0, $vr1
+; CHECK-NEXT: vsrli.b $vr0, $vr0, 1
+; CHECK-NEXT: vadd.b $vr0, $vr2, $vr0
+; CHECK-NEXT: vst $vr0, $a0, 0
+; CHECK-NEXT: ret
+entry:
+ %va = load <16 x i8>, ptr %a
+ %vb = load <16 x i8>, ptr %b
+ %ea = zext <16 x i8> %va to <16 x i16>
+ %eb = zext <16 x i8> %vb to <16 x i16>
+ %add = add <16 x i16> %ea, %eb
+ %shr = lshr <16 x i16> %add, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
+ %r = trunc <16 x i16> %shr to <16 x i8>
+ store <16 x i8> %r, ptr %res
+ ret void
+}
+
+define void @vavg_hu(ptr %res, ptr %a, ptr %b) nounwind {
+; CHECK-LABEL: vavg_hu:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vld $vr0, $a1, 0
+; CHECK-NEXT: vld $vr1, $a2, 0
+; CHECK-NEXT: vand.v $vr2, $vr0, $vr1
+; CHECK-NEXT: vxor.v $vr0, $vr0, $vr1
+; CHECK-NEXT: vsrli.h $vr0, $vr0, 1
+; CHECK-NEXT: vadd.h $vr0, $vr2, $vr0
+; CHECK-NEXT: vst $vr0, $a0, 0
+; CHECK-NEXT: ret
+entry:
+ %va = load <8 x i16>, ptr %a
+ %vb = load <8 x i16>, ptr %b
+ %ea = zext <8 x i16> %va to <8 x i32>
+ %eb = zext <8 x i16> %vb to <8 x i32>
+ %add = add <8 x i32> %ea, %eb
+ %shr = lshr <8 x i32> %add, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
+ %r = trunc <8 x i32> %shr to <8 x i16>
+ store <8 x i16> %r, ptr %res
+ ret void
+}
+
+define void @vavg_wu(ptr %res, ptr %a, ptr %b) nounwind {
+; CHECK-LABEL: vavg_wu:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vld $vr0, $a1, 0
+; CHECK-NEXT: vld $vr1, $a2, 0
+; CHECK-NEXT: vand.v $vr2, $vr0, $vr1
+; CHECK-NEXT: vxor.v $vr0, $vr0, $vr1
+; CHECK-NEXT: vsrli.w $vr0, $vr0, 1
+; CHECK-NEXT: vadd.w $vr0, $vr2, $vr0
+; CHECK-NEXT: vst $vr0, $a0, 0
+; CHECK-NEXT: ret
+entry:
+ %va = load <4 x i32>, ptr %a
+ %vb = load <4 x i32>, ptr %b
+ %ea = zext <4 x i32> %va to <4 x i64>
+ %eb = zext <4 x i32> %vb to <4 x i64>
+ %add = add <4 x i64> %ea, %eb
+ %shr = lshr <4 x i64> %add, <i64 1, i64 1, i64 1, i64 1>
+ %r = trunc <4 x i64> %shr to <4 x i32>
+ store <4 x i32> %r, ptr %res
+ ret void
+}
+
+define void @vavg_du(ptr %res, ptr %a, ptr %b) nounwind {
+; CHECK-LABEL: vavg_du:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vld $vr0, $a1, 0
+; CHECK-NEXT: vld $vr1, $a2, 0
+; CHECK-NEXT: vand.v $vr2, $vr0, $vr1
+; CHECK-NEXT: vxor.v $vr0, $vr0, $vr1
+; CHECK-NEXT: vsrli.d $vr0, $vr0, 1
+; CHECK-NEXT: vadd.d $vr0, $vr2, $vr0
+; CHECK-NEXT: vst $vr0, $a0, 0
+; CHECK-NEXT: ret
+entry:
+ %va = load <2 x i64>, ptr %a
+ %vb = load <2 x i64>, ptr %b
+ %ea = zext <2 x i64> %va to <2 x i128>
+ %eb = zext <2 x i64> %vb to <2 x i128>
+ %add = add <2 x i128> %ea, %eb
+ %shr = lshr <2 x i128> %add, <i128 1, i128 1>
+ %r = trunc <2 x i128> %shr to <2 x i64>
+ store <2 x i64> %r, ptr %res
+ ret void
+}
+
+define void @vavgr_b(ptr %res, ptr %a, ptr %b) nounwind {
+; CHECK-LABEL: vavgr_b:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vld $vr0, $a1, 0
+; CHECK-NEXT: vld $vr1, $a2, 0
+; CHECK-NEXT: vor.v $vr2, $vr0, $vr1
+; CHECK-NEXT: vxor.v $vr0, $vr0, $vr1
+; CHECK-NEXT: vsrai.b $vr0, $vr0, 1
+; CHECK-NEXT: vsub.b $vr0, $vr2, $vr0
+; CHECK-NEXT: vst $vr0, $a0, 0
+; CHECK-NEXT: ret
+entry:
+ %va = load <16 x i8>, ptr %a
+ %vb = load <16 x i8>, ptr %b
+ %ea = sext <16 x i8> %va to <16 x i16>
+ %eb = sext <16 x i8> %vb to <16 x i16>
+ %add = add <16 x i16> %ea, %eb
+ %add1 = add <16 x i16> %add, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
+ %shr = lshr <16 x i16> %add1, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
+ %r = trunc <16 x i16> %shr to <16 x i8>
+ store <16 x i8> %r, ptr %res
+ ret void
+}
+
+define void @vavgr_h(ptr %res, ptr %a, ptr %b) nounwind {
+; CHECK-LABEL: vavgr_h:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vld $vr0, $a1, 0
+; CHECK-NEXT: vld $vr1, $a2, 0
+; CHECK-NEXT: vor.v $vr2, $vr0, $vr1
+; CHECK-NEXT: vxor.v $vr0, $vr0, $vr1
+; CHECK-NEXT: vsrai.h $vr0, $vr0, 1
+; CHECK-NEXT: vsub.h $vr0, $vr2, $vr0
+; CHECK-NEXT: vst $vr0, $a0, 0
+; CHECK-NEXT: ret
+entry:
+ %va = load <8 x i16>, ptr %a
+ %vb = load <8 x i16>, ptr %b
+ %ea = sext <8 x i16> %va to <8 x i32>
+ %eb = sext <8 x i16> %vb to <8 x i32>
+ %add = add <8 x i32> %ea, %eb
+ %add1 = add <8 x i32> %add, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
+ %shr = lshr <8 x i32> %add1, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
+ %r = trunc <8 x i32> %shr to <8 x i16>
+ store <8 x i16> %r, ptr %res
+ ret void
+}
+
+define void @vavgr_w(ptr %res, ptr %a, ptr %b) nounwind {
+; CHECK-LABEL: vavgr_w:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vld $vr0, $a1, 0
+; CHECK-NEXT: vld $vr1, $a2, 0
+; CHECK-NEXT: vor.v $vr2, $vr0, $vr1
+; CHECK-NEXT: vxor.v $vr0, $vr0, $vr1
+; CHECK-NEXT: vsrai.w $vr0, $vr0, 1
+; CHECK-NEXT: vsub.w $vr0, $vr2, $vr0
+; CHECK-NEXT: vst $vr0, $a0, 0
+; CHECK-NEXT: ret
+entry:
+ %va = load <4 x i32>, ptr %a
+ %vb = load <4 x i32>, ptr %b
+ %ea = sext <4 x i32> %va to <4 x i64>
+ %eb = sext <4 x i32> %vb to <4 x i64>
+ %add = add <4 x i64> %ea, %eb
+ %add1 = add <4 x i64> %add, <i64 1, i64 1, i64 1, i64 1>
+ %shr = lshr <4 x i64> %add1, <i64 1, i64 1, i64 1, i64 1>
+ %r = trunc <4 x i64> %shr to <4 x i32>
+ store <4 x i32> %r, ptr %res
+ ret void
+}
+
+define void @vavgr_d(ptr %res, ptr %a, ptr %b) nounwind {
+; CHECK-LABEL: vavgr_d:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vld $vr0, $a1, 0
+; CHECK-NEXT: vld $vr1, $a2, 0
+; CHECK-NEXT: vor.v $vr2, $vr0, $vr1
+; CHECK-NEXT: vxor.v $vr0, $vr0, $vr1
+; CHECK-NEXT: vsrai.d $vr0, $vr0, 1
+; CHECK-NEXT: vsub.d $vr0, $vr2, $vr0
+; CHECK-NEXT: vst $vr0, $a0, 0
+; CHECK-NEXT: ret
+entry:
+ %va = load <2 x i64>, ptr %a
+ %vb = load <2 x i64>, ptr %b
+ %ea = sext <2 x i64> %va to <2 x i128>
+ %eb = sext <2 x i64> %vb to <2 x i128>
+ %add = add <2 x i128> %ea, %eb
+ %add1 = add <2 x i128> %add, <i128 1, i128 1>
+ %shr = lshr <2 x i128> %add1, <i128 1, i128 1>
+ %r = trunc <2 x i128> %shr to <2 x i64>
+ store <2 x i64> %r, ptr %res
+ ret void
+}
+
+define void @vavgr_bu(ptr %res, ptr %a, ptr %b) nounwind {
+; CHECK-LABEL: vavgr_bu:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vld $vr0, $a1, 0
+; CHECK-NEXT: vld $vr1, $a2, 0
+; CHECK-NEXT: vor.v $vr2, $vr0, $vr1
+; CHECK-NEXT: vxor.v $vr0, $vr0, $vr1
+; CHECK-NEXT: vsrli.b $vr0, $vr0, 1
+; CHECK-NEXT: vsub.b $vr0, $vr2, $vr0
+; CHECK-NEXT: vst $vr0, $a0, 0
+; CHECK-NEXT: ret
+entry:
+ %va = load <16 x i8>, ptr %a
+ %vb = load <16 x i8>, ptr %b
+ %ea = zext <16 x i8> %va to <16 x i16>
+ %eb = zext <16 x i8> %vb to <16 x i16>
+ %add = add <16 x i16> %ea, %eb
+ %add1 = add <16 x i16> %add, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
+ %shr = lshr <16 x i16> %add1, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
+ %r = trunc <16 x i16> %shr to <16 x i8>
+ store <16 x i8> %r, ptr %res
+ ret void
+}
+
+define void @vavgr_hu(ptr %res, ptr %a, ptr %b) nounwind {
+; CHECK-LABEL: vavgr_hu:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vld $vr0, $a1, 0
+; CHECK-NEXT: vld $vr1, $a2, 0
+; CHECK-NEXT: vor.v $vr2, $vr0, $vr1
+; CHECK-NEXT: vxor.v $vr0, $vr0, $vr1
+; CHECK-NEXT: vsrli.h $vr0, $vr0, 1
+; CHECK-NEXT: vsub.h $vr0, $vr2, $vr0
+; CHECK-NEXT: vst $vr0, $a0, 0
+; CHECK-NEXT: ret
+entry:
+ %va = load <8 x i16>, ptr %a
+ %vb = load <8 x i16>, ptr %b
+ %ea = zext <8 x i16> %va to <8 x i32>
+ %eb = zext <8 x i16> %vb to <8 x i32>
+ %add = add <8 x i32> %ea, %eb
+ %add1 = add <8 x i32> %add, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
+ %shr = lshr <8 x i32> %add1, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
+ %r = trunc <8 x i32> %shr to <8 x i16>
+ store <8 x i16> %r, ptr %res
+ ret void
+}
+
+define void @vavgr_wu(ptr %res, ptr %a, ptr %b) nounwind {
+; CHECK-LABEL: vavgr_wu:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vld $vr0, $a1, 0
+; CHECK-NEXT: vld $vr1, $a2, 0
+; CHECK-NEXT: vor.v $vr2, $vr0, $vr1
+; CHECK-NEXT: vxor.v $vr0, $vr0, $vr1
+; CHECK-NEXT: vsrli.w $vr0, $vr0, 1
+; CHECK-NEXT: vsub.w $vr0, $vr2, $vr0
+; CHECK-NEXT: vst $vr0, $a0, 0
+; CHECK-NEXT: ret
+entry:
+ %va = load <4 x i32>, ptr %a
+ %vb = load <4 x i32>, ptr %b
+ %ea = zext <4 x i32> %va to <4 x i64>
+ %eb = zext <4 x i32> %vb to <4 x i64>
+ %add = add <4 x i64> %ea, %eb
+ %add1 = add <4 x i64> %add, <i64 1, i64 1, i64 1, i64 1>
+ %shr = lshr <4 x i64> %add1, <i64 1, i64 1, i64 1, i64 1>
+ %r = trunc <4 x i64> %shr to <4 x i32>
+ store <4 x i32> %r, ptr %res
+ ret void
+}
+
+define void @vavgr_du(ptr %res, ptr %a, ptr %b) nounwind {
+; CHECK-LABEL: vavgr_du:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vld $vr0, $a1, 0
+; CHECK-NEXT: vld $vr1, $a2, 0
+; CHECK-NEXT: vor.v $vr2, $vr0, $vr1
+; CHECK-NEXT: vxor.v $vr0, $vr0, $vr1
+; CHECK-NEXT: vsrli.d $vr0, $vr0, 1
+; CHECK-NEXT: vsub.d $vr0, $vr2, $vr0
+; CHECK-NEXT: vst $vr0, $a0, 0
+; CHECK-NEXT: ret
+entry:
+ %va = load <2 x i64>, ptr %a
+ %vb = load <2 x i64>, ptr %b
+ %ea = zext <2 x i64> %va to <2 x i128>
+ %eb = zext <2 x i64> %vb to <2 x i128>
+ %add = add <2 x i128> %ea, %eb
+ %add1 = add <2 x i128> %add, <i128 1, i128 1>
+ %shr = lshr <2 x i128> %add1, <i128 1, i128 1>
+ %r = trunc <2 x i128> %shr to <2 x i64>
+ store <2 x i64> %r, ptr %res
+ ret void
+}
diff --git a/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/flog2.ll b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/flog2.ll
index e5e75ec..87cc7c6 100644
--- a/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/flog2.ll
+++ b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/flog2.ll
@@ -1,98 +1,17 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6
-; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lsx < %s | FileCheck %s --check-prefix=LA32
-; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s --check-prefix=LA64
+; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lsx < %s | FileCheck %s
+; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s
declare <4 x float> @llvm.log2.v4f32(<4 x float>)
declare <2 x double> @llvm.log2.v2f64(<2 x double>)
define void @flog2_v4f32(ptr %res, ptr %a) nounwind {
-; LA32-LABEL: flog2_v4f32:
-; LA32: # %bb.0: # %entry
-; LA32-NEXT: addi.w $sp, $sp, -48
-; LA32-NEXT: st.w $ra, $sp, 44 # 4-byte Folded Spill
-; LA32-NEXT: st.w $fp, $sp, 40 # 4-byte Folded Spill
-; LA32-NEXT: vld $vr0, $a1, 0
-; LA32-NEXT: vst $vr0, $sp, 16 # 16-byte Folded Spill
-; LA32-NEXT: move $fp, $a0
-; LA32-NEXT: vreplvei.w $vr0, $vr0, 1
-; LA32-NEXT: # kill: def $f0 killed $f0 killed $vr0
-; LA32-NEXT: bl log2f
-; LA32-NEXT: # kill: def $f0 killed $f0 def $vr0
-; LA32-NEXT: vst $vr0, $sp, 0 # 16-byte Folded Spill
-; LA32-NEXT: vld $vr0, $sp, 16 # 16-byte Folded Reload
-; LA32-NEXT: vreplvei.w $vr0, $vr0, 0
-; LA32-NEXT: # kill: def $f0 killed $f0 killed $vr0
-; LA32-NEXT: bl log2f
-; LA32-NEXT: # kill: def $f0 killed $f0 def $vr0
-; LA32-NEXT: vld $vr1, $sp, 0 # 16-byte Folded Reload
-; LA32-NEXT: vextrins.w $vr0, $vr1, 16
-; LA32-NEXT: vst $vr0, $sp, 0 # 16-byte Folded Spill
-; LA32-NEXT: vld $vr0, $sp, 16 # 16-byte Folded Reload
-; LA32-NEXT: vreplvei.w $vr0, $vr0, 2
-; LA32-NEXT: # kill: def $f0 killed $f0 killed $vr0
-; LA32-NEXT: bl log2f
-; LA32-NEXT: # kill: def $f0 killed $f0 def $vr0
-; LA32-NEXT: vld $vr1, $sp, 0 # 16-byte Folded Reload
-; LA32-NEXT: vextrins.w $vr1, $vr0, 32
-; LA32-NEXT: vst $vr1, $sp, 0 # 16-byte Folded Spill
-; LA32-NEXT: vld $vr0, $sp, 16 # 16-byte Folded Reload
-; LA32-NEXT: vreplvei.w $vr0, $vr0, 3
-; LA32-NEXT: # kill: def $f0 killed $f0 killed $vr0
-; LA32-NEXT: bl log2f
-; LA32-NEXT: # kill: def $f0 killed $f0 def $vr0
-; LA32-NEXT: vld $vr1, $sp, 0 # 16-byte Folded Reload
-; LA32-NEXT: vextrins.w $vr1, $vr0, 48
-; LA32-NEXT: vst $vr1, $fp, 0
-; LA32-NEXT: ld.w $fp, $sp, 40 # 4-byte Folded Reload
-; LA32-NEXT: ld.w $ra, $sp, 44 # 4-byte Folded Reload
-; LA32-NEXT: addi.w $sp, $sp, 48
-; LA32-NEXT: ret
-;
-; LA64-LABEL: flog2_v4f32:
-; LA64: # %bb.0: # %entry
-; LA64-NEXT: addi.d $sp, $sp, -48
-; LA64-NEXT: st.d $ra, $sp, 40 # 8-byte Folded Spill
-; LA64-NEXT: st.d $fp, $sp, 32 # 8-byte Folded Spill
-; LA64-NEXT: vld $vr0, $a1, 0
-; LA64-NEXT: vst $vr0, $sp, 16 # 16-byte Folded Spill
-; LA64-NEXT: move $fp, $a0
-; LA64-NEXT: vreplvei.w $vr0, $vr0, 1
-; LA64-NEXT: # kill: def $f0 killed $f0 killed $vr0
-; LA64-NEXT: pcaddu18i $ra, %call36(log2f)
-; LA64-NEXT: jirl $ra, $ra, 0
-; LA64-NEXT: # kill: def $f0 killed $f0 def $vr0
-; LA64-NEXT: vst $vr0, $sp, 0 # 16-byte Folded Spill
-; LA64-NEXT: vld $vr0, $sp, 16 # 16-byte Folded Reload
-; LA64-NEXT: vreplvei.w $vr0, $vr0, 0
-; LA64-NEXT: # kill: def $f0 killed $f0 killed $vr0
-; LA64-NEXT: pcaddu18i $ra, %call36(log2f)
-; LA64-NEXT: jirl $ra, $ra, 0
-; LA64-NEXT: # kill: def $f0 killed $f0 def $vr0
-; LA64-NEXT: vld $vr1, $sp, 0 # 16-byte Folded Reload
-; LA64-NEXT: vextrins.w $vr0, $vr1, 16
-; LA64-NEXT: vst $vr0, $sp, 0 # 16-byte Folded Spill
-; LA64-NEXT: vld $vr0, $sp, 16 # 16-byte Folded Reload
-; LA64-NEXT: vreplvei.w $vr0, $vr0, 2
-; LA64-NEXT: # kill: def $f0 killed $f0 killed $vr0
-; LA64-NEXT: pcaddu18i $ra, %call36(log2f)
-; LA64-NEXT: jirl $ra, $ra, 0
-; LA64-NEXT: # kill: def $f0 killed $f0 def $vr0
-; LA64-NEXT: vld $vr1, $sp, 0 # 16-byte Folded Reload
-; LA64-NEXT: vextrins.w $vr1, $vr0, 32
-; LA64-NEXT: vst $vr1, $sp, 0 # 16-byte Folded Spill
-; LA64-NEXT: vld $vr0, $sp, 16 # 16-byte Folded Reload
-; LA64-NEXT: vreplvei.w $vr0, $vr0, 3
-; LA64-NEXT: # kill: def $f0 killed $f0 killed $vr0
-; LA64-NEXT: pcaddu18i $ra, %call36(log2f)
-; LA64-NEXT: jirl $ra, $ra, 0
-; LA64-NEXT: # kill: def $f0 killed $f0 def $vr0
-; LA64-NEXT: vld $vr1, $sp, 0 # 16-byte Folded Reload
-; LA64-NEXT: vextrins.w $vr1, $vr0, 48
-; LA64-NEXT: vst $vr1, $fp, 0
-; LA64-NEXT: ld.d $fp, $sp, 32 # 8-byte Folded Reload
-; LA64-NEXT: ld.d $ra, $sp, 40 # 8-byte Folded Reload
-; LA64-NEXT: addi.d $sp, $sp, 48
-; LA64-NEXT: ret
+; CHECK-LABEL: flog2_v4f32:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vld $vr0, $a1, 0
+; CHECK-NEXT: vflogb.s $vr0, $vr0
+; CHECK-NEXT: vst $vr0, $a0, 0
+; CHECK-NEXT: ret
entry:
%v = load <4 x float>, ptr %a
%r = call <4 x float> @llvm.log2.v4f32(<4 x float> %v)
@@ -101,59 +20,12 @@ entry:
}
define void @flog2_v2f64(ptr %res, ptr %a) nounwind {
-; LA32-LABEL: flog2_v2f64:
-; LA32: # %bb.0: # %entry
-; LA32-NEXT: addi.w $sp, $sp, -48
-; LA32-NEXT: st.w $ra, $sp, 44 # 4-byte Folded Spill
-; LA32-NEXT: st.w $fp, $sp, 40 # 4-byte Folded Spill
-; LA32-NEXT: vld $vr0, $a1, 0
-; LA32-NEXT: vst $vr0, $sp, 0 # 16-byte Folded Spill
-; LA32-NEXT: move $fp, $a0
-; LA32-NEXT: vreplvei.d $vr0, $vr0, 1
-; LA32-NEXT: # kill: def $f0_64 killed $f0_64 killed $vr0
-; LA32-NEXT: bl log2
-; LA32-NEXT: # kill: def $f0_64 killed $f0_64 def $vr0
-; LA32-NEXT: vst $vr0, $sp, 16 # 16-byte Folded Spill
-; LA32-NEXT: vld $vr0, $sp, 0 # 16-byte Folded Reload
-; LA32-NEXT: vreplvei.d $vr0, $vr0, 0
-; LA32-NEXT: # kill: def $f0_64 killed $f0_64 killed $vr0
-; LA32-NEXT: bl log2
-; LA32-NEXT: # kill: def $f0_64 killed $f0_64 def $vr0
-; LA32-NEXT: vld $vr1, $sp, 16 # 16-byte Folded Reload
-; LA32-NEXT: vextrins.d $vr0, $vr1, 16
-; LA32-NEXT: vst $vr0, $fp, 0
-; LA32-NEXT: ld.w $fp, $sp, 40 # 4-byte Folded Reload
-; LA32-NEXT: ld.w $ra, $sp, 44 # 4-byte Folded Reload
-; LA32-NEXT: addi.w $sp, $sp, 48
-; LA32-NEXT: ret
-;
-; LA64-LABEL: flog2_v2f64:
-; LA64: # %bb.0: # %entry
-; LA64-NEXT: addi.d $sp, $sp, -48
-; LA64-NEXT: st.d $ra, $sp, 40 # 8-byte Folded Spill
-; LA64-NEXT: st.d $fp, $sp, 32 # 8-byte Folded Spill
-; LA64-NEXT: vld $vr0, $a1, 0
-; LA64-NEXT: vst $vr0, $sp, 0 # 16-byte Folded Spill
-; LA64-NEXT: move $fp, $a0
-; LA64-NEXT: vreplvei.d $vr0, $vr0, 1
-; LA64-NEXT: # kill: def $f0_64 killed $f0_64 killed $vr0
-; LA64-NEXT: pcaddu18i $ra, %call36(log2)
-; LA64-NEXT: jirl $ra, $ra, 0
-; LA64-NEXT: # kill: def $f0_64 killed $f0_64 def $vr0
-; LA64-NEXT: vst $vr0, $sp, 16 # 16-byte Folded Spill
-; LA64-NEXT: vld $vr0, $sp, 0 # 16-byte Folded Reload
-; LA64-NEXT: vreplvei.d $vr0, $vr0, 0
-; LA64-NEXT: # kill: def $f0_64 killed $f0_64 killed $vr0
-; LA64-NEXT: pcaddu18i $ra, %call36(log2)
-; LA64-NEXT: jirl $ra, $ra, 0
-; LA64-NEXT: # kill: def $f0_64 killed $f0_64 def $vr0
-; LA64-NEXT: vld $vr1, $sp, 16 # 16-byte Folded Reload
-; LA64-NEXT: vextrins.d $vr0, $vr1, 16
-; LA64-NEXT: vst $vr0, $fp, 0
-; LA64-NEXT: ld.d $fp, $sp, 32 # 8-byte Folded Reload
-; LA64-NEXT: ld.d $ra, $sp, 40 # 8-byte Folded Reload
-; LA64-NEXT: addi.d $sp, $sp, 48
-; LA64-NEXT: ret
+; CHECK-LABEL: flog2_v2f64:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vld $vr0, $a1, 0
+; CHECK-NEXT: vflogb.d $vr0, $vr0
+; CHECK-NEXT: vst $vr0, $a0, 0
+; CHECK-NEXT: ret
entry:
%v = load <2 x double>, ptr %a
%r = call <2 x double> @llvm.log2.v2f64(<2 x double> %v)
diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/rvv/vse.ll b/llvm/test/CodeGen/RISCV/GlobalISel/rvv/vse.ll
new file mode 100644
index 0000000..785d9fc
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/GlobalISel/rvv/vse.ll
@@ -0,0 +1,1575 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v,+zvfhmin,+zvfbfmin \
+; RUN: -global-isel -verify-machineinstrs -target-abi=ilp32d | FileCheck %s
+; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v,+zvfhmin,+zvfbfmin \
+; RUN: -global-isel -verify-machineinstrs -target-abi=lp64d | FileCheck %s
+
+declare void @llvm.riscv.vse.nxv1i64(
+ <vscale x 1 x i64>,
+ ptr,
+ iXLen);
+
+define void @intrinsic_vse_v_nxv1i64_nxv1i64(<vscale x 1 x i64> %0, ptr %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vse_v_nxv1i64_nxv1i64:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a1, e64, m1, ta, ma
+; CHECK-NEXT: vse64.v v8, (a0)
+; CHECK-NEXT: ret
+entry:
+ call void @llvm.riscv.vse.nxv1i64(
+ <vscale x 1 x i64> %0,
+ ptr %1,
+ iXLen %2)
+
+ ret void
+}
+
+declare void @llvm.riscv.vse.mask.nxv1i64(
+ <vscale x 1 x i64>,
+ ptr,
+ <vscale x 1 x i1>,
+ iXLen);
+
+define void @intrinsic_vse_mask_v_nxv1i64_nxv1i64(<vscale x 1 x i64> %0, ptr %1, <vscale x 1 x i1> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vse_mask_v_nxv1i64_nxv1i64:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a1, e64, m1, ta, ma
+; CHECK-NEXT: vse64.v v8, (a0), v0.t
+; CHECK-NEXT: ret
+entry:
+ call void @llvm.riscv.vse.mask.nxv1i64(
+ <vscale x 1 x i64> %0,
+ ptr %1,
+ <vscale x 1 x i1> %2,
+ iXLen %3)
+
+ ret void
+}
+
+define void @intrinsic_vse_allonesmask_v_nxv1i64_nxv1i64(<vscale x 1 x i64> %0, ptr %1, <vscale x 1 x i1> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vse_allonesmask_v_nxv1i64_nxv1i64:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a1, e64, m1, ta, ma
+; CHECK-NEXT: vse64.v v8, (a0)
+; CHECK-NEXT: ret
+entry:
+ call void @llvm.riscv.vse.mask.nxv1i64(
+ <vscale x 1 x i64> %0,
+ ptr %1,
+ <vscale x 1 x i1> splat (i1 true),
+ iXLen %3)
+
+ ret void
+}
+
+declare void @llvm.riscv.vse.nxv2i64(
+ <vscale x 2 x i64>,
+ ptr,
+ iXLen);
+
+define void @intrinsic_vse_v_nxv2i64_nxv2i64(<vscale x 2 x i64> %0, ptr %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vse_v_nxv2i64_nxv2i64:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a1, e64, m2, ta, ma
+; CHECK-NEXT: vse64.v v8, (a0)
+; CHECK-NEXT: ret
+entry:
+ call void @llvm.riscv.vse.nxv2i64(
+ <vscale x 2 x i64> %0,
+ ptr %1,
+ iXLen %2)
+
+ ret void
+}
+
+declare void @llvm.riscv.vse.mask.nxv2i64(
+ <vscale x 2 x i64>,
+ ptr,
+ <vscale x 2 x i1>,
+ iXLen);
+
+define void @intrinsic_vse_mask_v_nxv2i64_nxv2i64(<vscale x 2 x i64> %0, ptr %1, <vscale x 2 x i1> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vse_mask_v_nxv2i64_nxv2i64:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a1, e64, m2, ta, ma
+; CHECK-NEXT: vse64.v v8, (a0), v0.t
+; CHECK-NEXT: ret
+entry:
+ call void @llvm.riscv.vse.mask.nxv2i64(
+ <vscale x 2 x i64> %0,
+ ptr %1,
+ <vscale x 2 x i1> %2,
+ iXLen %3)
+
+ ret void
+}
+
+declare void @llvm.riscv.vse.nxv4i64(
+ <vscale x 4 x i64>,
+ ptr,
+ iXLen);
+
+define void @intrinsic_vse_v_nxv4i64_nxv4i64(<vscale x 4 x i64> %0, ptr %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vse_v_nxv4i64_nxv4i64:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a1, e64, m4, ta, ma
+; CHECK-NEXT: vse64.v v8, (a0)
+; CHECK-NEXT: ret
+entry:
+ call void @llvm.riscv.vse.nxv4i64(
+ <vscale x 4 x i64> %0,
+ ptr %1,
+ iXLen %2)
+
+ ret void
+}
+
+declare void @llvm.riscv.vse.mask.nxv4i64(
+ <vscale x 4 x i64>,
+ ptr,
+ <vscale x 4 x i1>,
+ iXLen);
+
+define void @intrinsic_vse_mask_v_nxv4i64_nxv4i64(<vscale x 4 x i64> %0, ptr %1, <vscale x 4 x i1> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vse_mask_v_nxv4i64_nxv4i64:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a1, e64, m4, ta, ma
+; CHECK-NEXT: vse64.v v8, (a0), v0.t
+; CHECK-NEXT: ret
+entry:
+ call void @llvm.riscv.vse.mask.nxv4i64(
+ <vscale x 4 x i64> %0,
+ ptr %1,
+ <vscale x 4 x i1> %2,
+ iXLen %3)
+
+ ret void
+}
+
+declare void @llvm.riscv.vse.nxv8i64(
+ <vscale x 8 x i64>,
+ ptr,
+ iXLen);
+
+define void @intrinsic_vse_v_nxv8i64_nxv8i64(<vscale x 8 x i64> %0, ptr %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vse_v_nxv8i64_nxv8i64:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, ma
+; CHECK-NEXT: vse64.v v8, (a0)
+; CHECK-NEXT: ret
+entry:
+ call void @llvm.riscv.vse.nxv8i64(
+ <vscale x 8 x i64> %0,
+ ptr %1,
+ iXLen %2)
+
+ ret void
+}
+
+declare void @llvm.riscv.vse.mask.nxv8i64(
+ <vscale x 8 x i64>,
+ ptr,
+ <vscale x 8 x i1>,
+ iXLen);
+
+define void @intrinsic_vse_mask_v_nxv8i64_nxv8i64(<vscale x 8 x i64> %0, ptr %1, <vscale x 8 x i1> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vse_mask_v_nxv8i64_nxv8i64:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, ma
+; CHECK-NEXT: vse64.v v8, (a0), v0.t
+; CHECK-NEXT: ret
+entry:
+ call void @llvm.riscv.vse.mask.nxv8i64(
+ <vscale x 8 x i64> %0,
+ ptr %1,
+ <vscale x 8 x i1> %2,
+ iXLen %3)
+
+ ret void
+}
+
+declare void @llvm.riscv.vse.nxv1f64(
+ <vscale x 1 x double>,
+ ptr,
+ iXLen);
+
+define void @intrinsic_vse_v_nxv1f64_nxv1f64(<vscale x 1 x double> %0, ptr %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vse_v_nxv1f64_nxv1f64:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a1, e64, m1, ta, ma
+; CHECK-NEXT: vse64.v v8, (a0)
+; CHECK-NEXT: ret
+entry:
+ call void @llvm.riscv.vse.nxv1f64(
+ <vscale x 1 x double> %0,
+ ptr %1,
+ iXLen %2)
+
+ ret void
+}
+
+declare void @llvm.riscv.vse.mask.nxv1f64(
+ <vscale x 1 x double>,
+ ptr,
+ <vscale x 1 x i1>,
+ iXLen);
+
+define void @intrinsic_vse_mask_v_nxv1f64_nxv1f64(<vscale x 1 x double> %0, ptr %1, <vscale x 1 x i1> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vse_mask_v_nxv1f64_nxv1f64:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a1, e64, m1, ta, ma
+; CHECK-NEXT: vse64.v v8, (a0), v0.t
+; CHECK-NEXT: ret
+entry:
+ call void @llvm.riscv.vse.mask.nxv1f64(
+ <vscale x 1 x double> %0,
+ ptr %1,
+ <vscale x 1 x i1> %2,
+ iXLen %3)
+
+ ret void
+}
+
+declare void @llvm.riscv.vse.nxv2f64(
+ <vscale x 2 x double>,
+ ptr,
+ iXLen);
+
+define void @intrinsic_vse_v_nxv2f64_nxv2f64(<vscale x 2 x double> %0, ptr %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vse_v_nxv2f64_nxv2f64:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a1, e64, m2, ta, ma
+; CHECK-NEXT: vse64.v v8, (a0)
+; CHECK-NEXT: ret
+entry:
+ call void @llvm.riscv.vse.nxv2f64(
+ <vscale x 2 x double> %0,
+ ptr %1,
+ iXLen %2)
+
+ ret void
+}
+
+declare void @llvm.riscv.vse.mask.nxv2f64(
+ <vscale x 2 x double>,
+ ptr,
+ <vscale x 2 x i1>,
+ iXLen);
+
+define void @intrinsic_vse_mask_v_nxv2f64_nxv2f64(<vscale x 2 x double> %0, ptr %1, <vscale x 2 x i1> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vse_mask_v_nxv2f64_nxv2f64:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a1, e64, m2, ta, ma
+; CHECK-NEXT: vse64.v v8, (a0), v0.t
+; CHECK-NEXT: ret
+entry:
+ call void @llvm.riscv.vse.mask.nxv2f64(
+ <vscale x 2 x double> %0,
+ ptr %1,
+ <vscale x 2 x i1> %2,
+ iXLen %3)
+
+ ret void
+}
+
+declare void @llvm.riscv.vse.nxv4f64(
+ <vscale x 4 x double>,
+ ptr,
+ iXLen);
+
+define void @intrinsic_vse_v_nxv4f64_nxv4f64(<vscale x 4 x double> %0, ptr %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vse_v_nxv4f64_nxv4f64:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a1, e64, m4, ta, ma
+; CHECK-NEXT: vse64.v v8, (a0)
+; CHECK-NEXT: ret
+entry:
+ call void @llvm.riscv.vse.nxv4f64(
+ <vscale x 4 x double> %0,
+ ptr %1,
+ iXLen %2)
+
+ ret void
+}
+
+declare void @llvm.riscv.vse.mask.nxv4f64(
+ <vscale x 4 x double>,
+ ptr,
+ <vscale x 4 x i1>,
+ iXLen);
+
+define void @intrinsic_vse_mask_v_nxv4f64_nxv4f64(<vscale x 4 x double> %0, ptr %1, <vscale x 4 x i1> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vse_mask_v_nxv4f64_nxv4f64:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a1, e64, m4, ta, ma
+; CHECK-NEXT: vse64.v v8, (a0), v0.t
+; CHECK-NEXT: ret
+entry:
+ call void @llvm.riscv.vse.mask.nxv4f64(
+ <vscale x 4 x double> %0,
+ ptr %1,
+ <vscale x 4 x i1> %2,
+ iXLen %3)
+
+ ret void
+}
+
+declare void @llvm.riscv.vse.nxv8f64(
+ <vscale x 8 x double>,
+ ptr,
+ iXLen);
+
+define void @intrinsic_vse_v_nxv8f64_nxv8f64(<vscale x 8 x double> %0, ptr %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vse_v_nxv8f64_nxv8f64:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, ma
+; CHECK-NEXT: vse64.v v8, (a0)
+; CHECK-NEXT: ret
+entry:
+ call void @llvm.riscv.vse.nxv8f64(
+ <vscale x 8 x double> %0,
+ ptr %1,
+ iXLen %2)
+
+ ret void
+}
+
+declare void @llvm.riscv.vse.mask.nxv8f64(
+ <vscale x 8 x double>,
+ ptr,
+ <vscale x 8 x i1>,
+ iXLen);
+
+define void @intrinsic_vse_mask_v_nxv8f64_nxv8f64(<vscale x 8 x double> %0, ptr %1, <vscale x 8 x i1> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vse_mask_v_nxv8f64_nxv8f64:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, ma
+; CHECK-NEXT: vse64.v v8, (a0), v0.t
+; CHECK-NEXT: ret
+entry:
+ call void @llvm.riscv.vse.mask.nxv8f64(
+ <vscale x 8 x double> %0,
+ ptr %1,
+ <vscale x 8 x i1> %2,
+ iXLen %3)
+
+ ret void
+}
+
+declare void @llvm.riscv.vse.nxv1i32(
+ <vscale x 1 x i32>,
+ ptr,
+ iXLen);
+
+define void @intrinsic_vse_v_nxv1i32_nxv1i32(<vscale x 1 x i32> %0, ptr %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vse_v_nxv1i32_nxv1i32:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, ma
+; CHECK-NEXT: vse32.v v8, (a0)
+; CHECK-NEXT: ret
+entry:
+ call void @llvm.riscv.vse.nxv1i32(
+ <vscale x 1 x i32> %0,
+ ptr %1,
+ iXLen %2)
+
+ ret void
+}
+
+declare void @llvm.riscv.vse.mask.nxv1i32(
+ <vscale x 1 x i32>,
+ ptr,
+ <vscale x 1 x i1>,
+ iXLen);
+
+define void @intrinsic_vse_mask_v_nxv1i32_nxv1i32(<vscale x 1 x i32> %0, ptr %1, <vscale x 1 x i1> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vse_mask_v_nxv1i32_nxv1i32:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, ma
+; CHECK-NEXT: vse32.v v8, (a0), v0.t
+; CHECK-NEXT: ret
+entry:
+ call void @llvm.riscv.vse.mask.nxv1i32(
+ <vscale x 1 x i32> %0,
+ ptr %1,
+ <vscale x 1 x i1> %2,
+ iXLen %3)
+
+ ret void
+}
+
+declare void @llvm.riscv.vse.nxv2i32(
+ <vscale x 2 x i32>,
+ ptr,
+ iXLen);
+
+define void @intrinsic_vse_v_nxv2i32_nxv2i32(<vscale x 2 x i32> %0, ptr %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vse_v_nxv2i32_nxv2i32:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma
+; CHECK-NEXT: vse32.v v8, (a0)
+; CHECK-NEXT: ret
+entry:
+ call void @llvm.riscv.vse.nxv2i32(
+ <vscale x 2 x i32> %0,
+ ptr %1,
+ iXLen %2)
+
+ ret void
+}
+
+declare void @llvm.riscv.vse.mask.nxv2i32(
+ <vscale x 2 x i32>,
+ ptr,
+ <vscale x 2 x i1>,
+ iXLen);
+
+define void @intrinsic_vse_mask_v_nxv2i32_nxv2i32(<vscale x 2 x i32> %0, ptr %1, <vscale x 2 x i1> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vse_mask_v_nxv2i32_nxv2i32:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma
+; CHECK-NEXT: vse32.v v8, (a0), v0.t
+; CHECK-NEXT: ret
+entry:
+ call void @llvm.riscv.vse.mask.nxv2i32(
+ <vscale x 2 x i32> %0,
+ ptr %1,
+ <vscale x 2 x i1> %2,
+ iXLen %3)
+
+ ret void
+}
+
+declare void @llvm.riscv.vse.nxv4i32(
+ <vscale x 4 x i32>,
+ ptr,
+ iXLen);
+
+define void @intrinsic_vse_v_nxv4i32_nxv4i32(<vscale x 4 x i32> %0, ptr %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vse_v_nxv4i32_nxv4i32:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, ma
+; CHECK-NEXT: vse32.v v8, (a0)
+; CHECK-NEXT: ret
+entry:
+ call void @llvm.riscv.vse.nxv4i32(
+ <vscale x 4 x i32> %0,
+ ptr %1,
+ iXLen %2)
+
+ ret void
+}
+
+declare void @llvm.riscv.vse.mask.nxv4i32(
+ <vscale x 4 x i32>,
+ ptr,
+ <vscale x 4 x i1>,
+ iXLen);
+
+define void @intrinsic_vse_mask_v_nxv4i32_nxv4i32(<vscale x 4 x i32> %0, ptr %1, <vscale x 4 x i1> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vse_mask_v_nxv4i32_nxv4i32:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, ma
+; CHECK-NEXT: vse32.v v8, (a0), v0.t
+; CHECK-NEXT: ret
+entry:
+ call void @llvm.riscv.vse.mask.nxv4i32(
+ <vscale x 4 x i32> %0,
+ ptr %1,
+ <vscale x 4 x i1> %2,
+ iXLen %3)
+
+ ret void
+}
+
+declare void @llvm.riscv.vse.nxv8i32(
+ <vscale x 8 x i32>,
+ ptr,
+ iXLen);
+
+define void @intrinsic_vse_v_nxv8i32_nxv8i32(<vscale x 8 x i32> %0, ptr %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vse_v_nxv8i32_nxv8i32:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, ma
+; CHECK-NEXT: vse32.v v8, (a0)
+; CHECK-NEXT: ret
+entry:
+ call void @llvm.riscv.vse.nxv8i32(
+ <vscale x 8 x i32> %0,
+ ptr %1,
+ iXLen %2)
+
+ ret void
+}
+
+declare void @llvm.riscv.vse.mask.nxv8i32(
+ <vscale x 8 x i32>,
+ ptr,
+ <vscale x 8 x i1>,
+ iXLen);
+
+define void @intrinsic_vse_mask_v_nxv8i32_nxv8i32(<vscale x 8 x i32> %0, ptr %1, <vscale x 8 x i1> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vse_mask_v_nxv8i32_nxv8i32:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, ma
+; CHECK-NEXT: vse32.v v8, (a0), v0.t
+; CHECK-NEXT: ret
+entry:
+ call void @llvm.riscv.vse.mask.nxv8i32(
+ <vscale x 8 x i32> %0,
+ ptr %1,
+ <vscale x 8 x i1> %2,
+ iXLen %3)
+
+ ret void
+}
+
+declare void @llvm.riscv.vse.nxv16i32(
+ <vscale x 16 x i32>,
+ ptr,
+ iXLen);
+
+define void @intrinsic_vse_v_nxv16i32_nxv16i32(<vscale x 16 x i32> %0, ptr %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vse_v_nxv16i32_nxv16i32:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma
+; CHECK-NEXT: vse32.v v8, (a0)
+; CHECK-NEXT: ret
+entry:
+ call void @llvm.riscv.vse.nxv16i32(
+ <vscale x 16 x i32> %0,
+ ptr %1,
+ iXLen %2)
+
+ ret void
+}
+
+declare void @llvm.riscv.vse.mask.nxv16i32(
+ <vscale x 16 x i32>,
+ ptr,
+ <vscale x 16 x i1>,
+ iXLen);
+
+define void @intrinsic_vse_mask_v_nxv16i32_nxv16i32(<vscale x 16 x i32> %0, ptr %1, <vscale x 16 x i1> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vse_mask_v_nxv16i32_nxv16i32:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma
+; CHECK-NEXT: vse32.v v8, (a0), v0.t
+; CHECK-NEXT: ret
+entry:
+ call void @llvm.riscv.vse.mask.nxv16i32(
+ <vscale x 16 x i32> %0,
+ ptr %1,
+ <vscale x 16 x i1> %2,
+ iXLen %3)
+
+ ret void
+}
+
+declare void @llvm.riscv.vse.nxv1f32(
+ <vscale x 1 x float>,
+ ptr,
+ iXLen);
+
+define void @intrinsic_vse_v_nxv1f32_nxv1f32(<vscale x 1 x float> %0, ptr %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vse_v_nxv1f32_nxv1f32:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, ma
+; CHECK-NEXT: vse32.v v8, (a0)
+; CHECK-NEXT: ret
+entry:
+ call void @llvm.riscv.vse.nxv1f32(
+ <vscale x 1 x float> %0,
+ ptr %1,
+ iXLen %2)
+
+ ret void
+}
+
+declare void @llvm.riscv.vse.mask.nxv1f32(
+ <vscale x 1 x float>,
+ ptr,
+ <vscale x 1 x i1>,
+ iXLen);
+
+define void @intrinsic_vse_mask_v_nxv1f32_nxv1f32(<vscale x 1 x float> %0, ptr %1, <vscale x 1 x i1> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vse_mask_v_nxv1f32_nxv1f32:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, ma
+; CHECK-NEXT: vse32.v v8, (a0), v0.t
+; CHECK-NEXT: ret
+entry:
+ call void @llvm.riscv.vse.mask.nxv1f32(
+ <vscale x 1 x float> %0,
+ ptr %1,
+ <vscale x 1 x i1> %2,
+ iXLen %3)
+
+ ret void
+}
+
+declare void @llvm.riscv.vse.nxv2f32(
+ <vscale x 2 x float>,
+ ptr,
+ iXLen);
+
+define void @intrinsic_vse_v_nxv2f32_nxv2f32(<vscale x 2 x float> %0, ptr %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vse_v_nxv2f32_nxv2f32:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma
+; CHECK-NEXT: vse32.v v8, (a0)
+; CHECK-NEXT: ret
+entry:
+ call void @llvm.riscv.vse.nxv2f32(
+ <vscale x 2 x float> %0,
+ ptr %1,
+ iXLen %2)
+
+ ret void
+}
+
+declare void @llvm.riscv.vse.mask.nxv2f32(
+ <vscale x 2 x float>,
+ ptr,
+ <vscale x 2 x i1>,
+ iXLen);
+
+define void @intrinsic_vse_mask_v_nxv2f32_nxv2f32(<vscale x 2 x float> %0, ptr %1, <vscale x 2 x i1> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vse_mask_v_nxv2f32_nxv2f32:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma
+; CHECK-NEXT: vse32.v v8, (a0), v0.t
+; CHECK-NEXT: ret
+entry:
+ call void @llvm.riscv.vse.mask.nxv2f32(
+ <vscale x 2 x float> %0,
+ ptr %1,
+ <vscale x 2 x i1> %2,
+ iXLen %3)
+
+ ret void
+}
+
+declare void @llvm.riscv.vse.nxv4f32(
+ <vscale x 4 x float>,
+ ptr,
+ iXLen);
+
+define void @intrinsic_vse_v_nxv4f32_nxv4f32(<vscale x 4 x float> %0, ptr %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vse_v_nxv4f32_nxv4f32:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, ma
+; CHECK-NEXT: vse32.v v8, (a0)
+; CHECK-NEXT: ret
+entry:
+ call void @llvm.riscv.vse.nxv4f32(
+ <vscale x 4 x float> %0,
+ ptr %1,
+ iXLen %2)
+
+ ret void
+}
+
+declare void @llvm.riscv.vse.mask.nxv4f32(
+ <vscale x 4 x float>,
+ ptr,
+ <vscale x 4 x i1>,
+ iXLen);
+
+define void @intrinsic_vse_mask_v_nxv4f32_nxv4f32(<vscale x 4 x float> %0, ptr %1, <vscale x 4 x i1> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vse_mask_v_nxv4f32_nxv4f32:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, ma
+; CHECK-NEXT: vse32.v v8, (a0), v0.t
+; CHECK-NEXT: ret
+entry:
+ call void @llvm.riscv.vse.mask.nxv4f32(
+ <vscale x 4 x float> %0,
+ ptr %1,
+ <vscale x 4 x i1> %2,
+ iXLen %3)
+
+ ret void
+}
+
+declare void @llvm.riscv.vse.nxv8f32(
+ <vscale x 8 x float>,
+ ptr,
+ iXLen);
+
+define void @intrinsic_vse_v_nxv8f32_nxv8f32(<vscale x 8 x float> %0, ptr %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vse_v_nxv8f32_nxv8f32:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, ma
+; CHECK-NEXT: vse32.v v8, (a0)
+; CHECK-NEXT: ret
+entry:
+ call void @llvm.riscv.vse.nxv8f32(
+ <vscale x 8 x float> %0,
+ ptr %1,
+ iXLen %2)
+
+ ret void
+}
+
+declare void @llvm.riscv.vse.mask.nxv8f32(
+ <vscale x 8 x float>,
+ ptr,
+ <vscale x 8 x i1>,
+ iXLen);
+
+define void @intrinsic_vse_mask_v_nxv8f32_nxv8f32(<vscale x 8 x float> %0, ptr %1, <vscale x 8 x i1> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vse_mask_v_nxv8f32_nxv8f32:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, ma
+; CHECK-NEXT: vse32.v v8, (a0), v0.t
+; CHECK-NEXT: ret
+entry:
+ call void @llvm.riscv.vse.mask.nxv8f32(
+ <vscale x 8 x float> %0,
+ ptr %1,
+ <vscale x 8 x i1> %2,
+ iXLen %3)
+
+ ret void
+}
+
+declare void @llvm.riscv.vse.nxv16f32(
+ <vscale x 16 x float>,
+ ptr,
+ iXLen);
+
+define void @intrinsic_vse_v_nxv16f32_nxv16f32(<vscale x 16 x float> %0, ptr %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vse_v_nxv16f32_nxv16f32:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma
+; CHECK-NEXT: vse32.v v8, (a0)
+; CHECK-NEXT: ret
+entry:
+ call void @llvm.riscv.vse.nxv16f32(
+ <vscale x 16 x float> %0,
+ ptr %1,
+ iXLen %2)
+
+ ret void
+}
+
+declare void @llvm.riscv.vse.mask.nxv16f32(
+ <vscale x 16 x float>,
+ ptr,
+ <vscale x 16 x i1>,
+ iXLen);
+
+define void @intrinsic_vse_mask_v_nxv16f32_nxv16f32(<vscale x 16 x float> %0, ptr %1, <vscale x 16 x i1> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vse_mask_v_nxv16f32_nxv16f32:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma
+; CHECK-NEXT: vse32.v v8, (a0), v0.t
+; CHECK-NEXT: ret
+entry:
+ call void @llvm.riscv.vse.mask.nxv16f32(
+ <vscale x 16 x float> %0,
+ ptr %1,
+ <vscale x 16 x i1> %2,
+ iXLen %3)
+
+ ret void
+}
+
+declare void @llvm.riscv.vse.nxv1i16(
+ <vscale x 1 x i16>,
+ ptr,
+ iXLen);
+
+define void @intrinsic_vse_v_nxv1i16_nxv1i16(<vscale x 1 x i16> %0, ptr %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vse_v_nxv1i16_nxv1i16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, ma
+; CHECK-NEXT: vse16.v v8, (a0)
+; CHECK-NEXT: ret
+entry:
+ call void @llvm.riscv.vse.nxv1i16(
+ <vscale x 1 x i16> %0,
+ ptr %1,
+ iXLen %2)
+
+ ret void
+}
+
+declare void @llvm.riscv.vse.mask.nxv1i16(
+ <vscale x 1 x i16>,
+ ptr,
+ <vscale x 1 x i1>,
+ iXLen);
+
+define void @intrinsic_vse_mask_v_nxv1i16_nxv1i16(<vscale x 1 x i16> %0, ptr %1, <vscale x 1 x i1> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vse_mask_v_nxv1i16_nxv1i16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, ma
+; CHECK-NEXT: vse16.v v8, (a0), v0.t
+; CHECK-NEXT: ret
+entry:
+ call void @llvm.riscv.vse.mask.nxv1i16(
+ <vscale x 1 x i16> %0,
+ ptr %1,
+ <vscale x 1 x i1> %2,
+ iXLen %3)
+
+ ret void
+}
+
+declare void @llvm.riscv.vse.nxv2i16(
+ <vscale x 2 x i16>,
+ ptr,
+ iXLen);
+
+define void @intrinsic_vse_v_nxv2i16_nxv2i16(<vscale x 2 x i16> %0, ptr %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vse_v_nxv2i16_nxv2i16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, ma
+; CHECK-NEXT: vse16.v v8, (a0)
+; CHECK-NEXT: ret
+entry:
+ call void @llvm.riscv.vse.nxv2i16(
+ <vscale x 2 x i16> %0,
+ ptr %1,
+ iXLen %2)
+
+ ret void
+}
+
+declare void @llvm.riscv.vse.mask.nxv2i16(
+ <vscale x 2 x i16>,
+ ptr,
+ <vscale x 2 x i1>,
+ iXLen);
+
+define void @intrinsic_vse_mask_v_nxv2i16_nxv2i16(<vscale x 2 x i16> %0, ptr %1, <vscale x 2 x i1> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vse_mask_v_nxv2i16_nxv2i16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, ma
+; CHECK-NEXT: vse16.v v8, (a0), v0.t
+; CHECK-NEXT: ret
+entry:
+ call void @llvm.riscv.vse.mask.nxv2i16(
+ <vscale x 2 x i16> %0,
+ ptr %1,
+ <vscale x 2 x i1> %2,
+ iXLen %3)
+
+ ret void
+}
+
+declare void @llvm.riscv.vse.nxv4i16(
+ <vscale x 4 x i16>,
+ ptr,
+ iXLen);
+
+define void @intrinsic_vse_v_nxv4i16_nxv4i16(<vscale x 4 x i16> %0, ptr %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vse_v_nxv4i16_nxv4i16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma
+; CHECK-NEXT: vse16.v v8, (a0)
+; CHECK-NEXT: ret
+entry:
+ call void @llvm.riscv.vse.nxv4i16(
+ <vscale x 4 x i16> %0,
+ ptr %1,
+ iXLen %2)
+
+ ret void
+}
+
+declare void @llvm.riscv.vse.mask.nxv4i16(
+ <vscale x 4 x i16>,
+ ptr,
+ <vscale x 4 x i1>,
+ iXLen);
+
+define void @intrinsic_vse_mask_v_nxv4i16_nxv4i16(<vscale x 4 x i16> %0, ptr %1, <vscale x 4 x i1> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vse_mask_v_nxv4i16_nxv4i16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma
+; CHECK-NEXT: vse16.v v8, (a0), v0.t
+; CHECK-NEXT: ret
+entry:
+ call void @llvm.riscv.vse.mask.nxv4i16(
+ <vscale x 4 x i16> %0,
+ ptr %1,
+ <vscale x 4 x i1> %2,
+ iXLen %3)
+
+ ret void
+}
+
+declare void @llvm.riscv.vse.nxv8i16(
+ <vscale x 8 x i16>,
+ ptr,
+ iXLen);
+
+define void @intrinsic_vse_v_nxv8i16_nxv8i16(<vscale x 8 x i16> %0, ptr %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vse_v_nxv8i16_nxv8i16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, ma
+; CHECK-NEXT: vse16.v v8, (a0)
+; CHECK-NEXT: ret
+entry:
+ call void @llvm.riscv.vse.nxv8i16(
+ <vscale x 8 x i16> %0,
+ ptr %1,
+ iXLen %2)
+
+ ret void
+}
+
+declare void @llvm.riscv.vse.mask.nxv8i16(
+ <vscale x 8 x i16>,
+ ptr,
+ <vscale x 8 x i1>,
+ iXLen);
+
+define void @intrinsic_vse_mask_v_nxv8i16_nxv8i16(<vscale x 8 x i16> %0, ptr %1, <vscale x 8 x i1> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vse_mask_v_nxv8i16_nxv8i16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, ma
+; CHECK-NEXT: vse16.v v8, (a0), v0.t
+; CHECK-NEXT: ret
+entry:
+ call void @llvm.riscv.vse.mask.nxv8i16(
+ <vscale x 8 x i16> %0,
+ ptr %1,
+ <vscale x 8 x i1> %2,
+ iXLen %3)
+
+ ret void
+}
+
+declare void @llvm.riscv.vse.nxv16i16(
+ <vscale x 16 x i16>,
+ ptr,
+ iXLen);
+
+define void @intrinsic_vse_v_nxv16i16_nxv16i16(<vscale x 16 x i16> %0, ptr %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vse_v_nxv16i16_nxv16i16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, ma
+; CHECK-NEXT: vse16.v v8, (a0)
+; CHECK-NEXT: ret
+entry:
+ call void @llvm.riscv.vse.nxv16i16(
+ <vscale x 16 x i16> %0,
+ ptr %1,
+ iXLen %2)
+
+ ret void
+}
+
+declare void @llvm.riscv.vse.mask.nxv16i16(
+ <vscale x 16 x i16>,
+ ptr,
+ <vscale x 16 x i1>,
+ iXLen);
+
+define void @intrinsic_vse_mask_v_nxv16i16_nxv16i16(<vscale x 16 x i16> %0, ptr %1, <vscale x 16 x i1> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vse_mask_v_nxv16i16_nxv16i16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, ma
+; CHECK-NEXT: vse16.v v8, (a0), v0.t
+; CHECK-NEXT: ret
+entry:
+ call void @llvm.riscv.vse.mask.nxv16i16(
+ <vscale x 16 x i16> %0,
+ ptr %1,
+ <vscale x 16 x i1> %2,
+ iXLen %3)
+
+ ret void
+}
+
+declare void @llvm.riscv.vse.nxv32i16(
+ <vscale x 32 x i16>,
+ ptr,
+ iXLen);
+
+define void @intrinsic_vse_v_nxv32i16_nxv32i16(<vscale x 32 x i16> %0, ptr %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vse_v_nxv32i16_nxv32i16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, ma
+; CHECK-NEXT: vse16.v v8, (a0)
+; CHECK-NEXT: ret
+entry:
+ call void @llvm.riscv.vse.nxv32i16(
+ <vscale x 32 x i16> %0,
+ ptr %1,
+ iXLen %2)
+
+ ret void
+}
+
+declare void @llvm.riscv.vse.mask.nxv32i16(
+ <vscale x 32 x i16>,
+ ptr,
+ <vscale x 32 x i1>,
+ iXLen);
+
+define void @intrinsic_vse_mask_v_nxv32i16_nxv32i16(<vscale x 32 x i16> %0, ptr %1, <vscale x 32 x i1> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vse_mask_v_nxv32i16_nxv32i16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, ma
+; CHECK-NEXT: vse16.v v8, (a0), v0.t
+; CHECK-NEXT: ret
+entry:
+ call void @llvm.riscv.vse.mask.nxv32i16(
+ <vscale x 32 x i16> %0,
+ ptr %1,
+ <vscale x 32 x i1> %2,
+ iXLen %3)
+
+ ret void
+}
+
+declare void @llvm.riscv.vse.nxv1f16(
+ <vscale x 1 x half>,
+ ptr,
+ iXLen);
+
+define void @intrinsic_vse_v_nxv1f16_nxv1f16(<vscale x 1 x half> %0, ptr %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vse_v_nxv1f16_nxv1f16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, ma
+; CHECK-NEXT: vse16.v v8, (a0)
+; CHECK-NEXT: ret
+entry:
+ call void @llvm.riscv.vse.nxv1f16(
+ <vscale x 1 x half> %0,
+ ptr %1,
+ iXLen %2)
+
+ ret void
+}
+
+declare void @llvm.riscv.vse.mask.nxv1f16(
+ <vscale x 1 x half>,
+ ptr,
+ <vscale x 1 x i1>,
+ iXLen);
+
+define void @intrinsic_vse_mask_v_nxv1f16_nxv1f16(<vscale x 1 x half> %0, ptr %1, <vscale x 1 x i1> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vse_mask_v_nxv1f16_nxv1f16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, ma
+; CHECK-NEXT: vse16.v v8, (a0), v0.t
+; CHECK-NEXT: ret
+entry:
+ call void @llvm.riscv.vse.mask.nxv1f16(
+ <vscale x 1 x half> %0,
+ ptr %1,
+ <vscale x 1 x i1> %2,
+ iXLen %3)
+
+ ret void
+}
+
+declare void @llvm.riscv.vse.nxv2f16(
+ <vscale x 2 x half>,
+ ptr,
+ iXLen);
+
+define void @intrinsic_vse_v_nxv2f16_nxv2f16(<vscale x 2 x half> %0, ptr %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vse_v_nxv2f16_nxv2f16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, ma
+; CHECK-NEXT: vse16.v v8, (a0)
+; CHECK-NEXT: ret
+entry:
+ call void @llvm.riscv.vse.nxv2f16(
+ <vscale x 2 x half> %0,
+ ptr %1,
+ iXLen %2)
+
+ ret void
+}
+
+declare void @llvm.riscv.vse.mask.nxv2f16(
+ <vscale x 2 x half>,
+ ptr,
+ <vscale x 2 x i1>,
+ iXLen);
+
+define void @intrinsic_vse_mask_v_nxv2f16_nxv2f16(<vscale x 2 x half> %0, ptr %1, <vscale x 2 x i1> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vse_mask_v_nxv2f16_nxv2f16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, ma
+; CHECK-NEXT: vse16.v v8, (a0), v0.t
+; CHECK-NEXT: ret
+entry:
+ call void @llvm.riscv.vse.mask.nxv2f16(
+ <vscale x 2 x half> %0,
+ ptr %1,
+ <vscale x 2 x i1> %2,
+ iXLen %3)
+
+ ret void
+}
+
+declare void @llvm.riscv.vse.nxv4f16(
+ <vscale x 4 x half>,
+ ptr,
+ iXLen);
+
+define void @intrinsic_vse_v_nxv4f16_nxv4f16(<vscale x 4 x half> %0, ptr %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vse_v_nxv4f16_nxv4f16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma
+; CHECK-NEXT: vse16.v v8, (a0)
+; CHECK-NEXT: ret
+entry:
+ call void @llvm.riscv.vse.nxv4f16(
+ <vscale x 4 x half> %0,
+ ptr %1,
+ iXLen %2)
+
+ ret void
+}
+
+declare void @llvm.riscv.vse.mask.nxv4f16(
+ <vscale x 4 x half>,
+ ptr,
+ <vscale x 4 x i1>,
+ iXLen);
+
+define void @intrinsic_vse_mask_v_nxv4f16_nxv4f16(<vscale x 4 x half> %0, ptr %1, <vscale x 4 x i1> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vse_mask_v_nxv4f16_nxv4f16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma
+; CHECK-NEXT: vse16.v v8, (a0), v0.t
+; CHECK-NEXT: ret
+entry:
+ call void @llvm.riscv.vse.mask.nxv4f16(
+ <vscale x 4 x half> %0,
+ ptr %1,
+ <vscale x 4 x i1> %2,
+ iXLen %3)
+
+ ret void
+}
+
+declare void @llvm.riscv.vse.nxv8f16(
+ <vscale x 8 x half>,
+ ptr,
+ iXLen);
+
+define void @intrinsic_vse_v_nxv8f16_nxv8f16(<vscale x 8 x half> %0, ptr %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vse_v_nxv8f16_nxv8f16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, ma
+; CHECK-NEXT: vse16.v v8, (a0)
+; CHECK-NEXT: ret
+entry:
+ call void @llvm.riscv.vse.nxv8f16(
+ <vscale x 8 x half> %0,
+ ptr %1,
+ iXLen %2)
+
+ ret void
+}
+
+declare void @llvm.riscv.vse.mask.nxv8f16(
+ <vscale x 8 x half>,
+ ptr,
+ <vscale x 8 x i1>,
+ iXLen);
+
+define void @intrinsic_vse_mask_v_nxv8f16_nxv8f16(<vscale x 8 x half> %0, ptr %1, <vscale x 8 x i1> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vse_mask_v_nxv8f16_nxv8f16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, ma
+; CHECK-NEXT: vse16.v v8, (a0), v0.t
+; CHECK-NEXT: ret
+entry:
+ call void @llvm.riscv.vse.mask.nxv8f16(
+ <vscale x 8 x half> %0,
+ ptr %1,
+ <vscale x 8 x i1> %2,
+ iXLen %3)
+
+ ret void
+}
+
+declare void @llvm.riscv.vse.nxv16f16(
+ <vscale x 16 x half>,
+ ptr,
+ iXLen);
+
+define void @intrinsic_vse_v_nxv16f16_nxv16f16(<vscale x 16 x half> %0, ptr %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vse_v_nxv16f16_nxv16f16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, ma
+; CHECK-NEXT: vse16.v v8, (a0)
+; CHECK-NEXT: ret
+entry:
+ call void @llvm.riscv.vse.nxv16f16(
+ <vscale x 16 x half> %0,
+ ptr %1,
+ iXLen %2)
+
+ ret void
+}
+
+declare void @llvm.riscv.vse.mask.nxv16f16(
+ <vscale x 16 x half>,
+ ptr,
+ <vscale x 16 x i1>,
+ iXLen);
+
+define void @intrinsic_vse_mask_v_nxv16f16_nxv16f16(<vscale x 16 x half> %0, ptr %1, <vscale x 16 x i1> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vse_mask_v_nxv16f16_nxv16f16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, ma
+; CHECK-NEXT: vse16.v v8, (a0), v0.t
+; CHECK-NEXT: ret
+entry:
+ call void @llvm.riscv.vse.mask.nxv16f16(
+ <vscale x 16 x half> %0,
+ ptr %1,
+ <vscale x 16 x i1> %2,
+ iXLen %3)
+
+ ret void
+}
+
+declare void @llvm.riscv.vse.nxv32f16(
+ <vscale x 32 x half>,
+ ptr,
+ iXLen);
+
+define void @intrinsic_vse_v_nxv32f16_nxv32f16(<vscale x 32 x half> %0, ptr %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vse_v_nxv32f16_nxv32f16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, ma
+; CHECK-NEXT: vse16.v v8, (a0)
+; CHECK-NEXT: ret
+entry:
+ call void @llvm.riscv.vse.nxv32f16(
+ <vscale x 32 x half> %0,
+ ptr %1,
+ iXLen %2)
+
+ ret void
+}
+
+declare void @llvm.riscv.vse.mask.nxv32f16(
+ <vscale x 32 x half>,
+ ptr,
+ <vscale x 32 x i1>,
+ iXLen);
+
+define void @intrinsic_vse_mask_v_nxv32f16_nxv32f16(<vscale x 32 x half> %0, ptr %1, <vscale x 32 x i1> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vse_mask_v_nxv32f16_nxv32f16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, ma
+; CHECK-NEXT: vse16.v v8, (a0), v0.t
+; CHECK-NEXT: ret
+entry:
+ call void @llvm.riscv.vse.mask.nxv32f16(
+ <vscale x 32 x half> %0,
+ ptr %1,
+ <vscale x 32 x i1> %2,
+ iXLen %3)
+
+ ret void
+}
+
+declare void @llvm.riscv.vse.nxv1i8(
+ <vscale x 1 x i8>,
+ ptr,
+ iXLen);
+
+define void @intrinsic_vse_v_nxv1i8_nxv1i8(<vscale x 1 x i8> %0, ptr %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vse_v_nxv1i8_nxv1i8:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, ma
+; CHECK-NEXT: vse8.v v8, (a0)
+; CHECK-NEXT: ret
+entry:
+ call void @llvm.riscv.vse.nxv1i8(
+ <vscale x 1 x i8> %0,
+ ptr %1,
+ iXLen %2)
+
+ ret void
+}
+
+declare void @llvm.riscv.vse.mask.nxv1i8(
+ <vscale x 1 x i8>,
+ ptr,
+ <vscale x 1 x i1>,
+ iXLen);
+
+define void @intrinsic_vse_mask_v_nxv1i8_nxv1i8(<vscale x 1 x i8> %0, ptr %1, <vscale x 1 x i1> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vse_mask_v_nxv1i8_nxv1i8:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, ma
+; CHECK-NEXT: vse8.v v8, (a0), v0.t
+; CHECK-NEXT: ret
+entry:
+ call void @llvm.riscv.vse.mask.nxv1i8(
+ <vscale x 1 x i8> %0,
+ ptr %1,
+ <vscale x 1 x i1> %2,
+ iXLen %3)
+
+ ret void
+}
+
+declare void @llvm.riscv.vse.nxv2i8(
+ <vscale x 2 x i8>,
+ ptr,
+ iXLen);
+
+define void @intrinsic_vse_v_nxv2i8_nxv2i8(<vscale x 2 x i8> %0, ptr %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vse_v_nxv2i8_nxv2i8:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, ma
+; CHECK-NEXT: vse8.v v8, (a0)
+; CHECK-NEXT: ret
+entry:
+ call void @llvm.riscv.vse.nxv2i8(
+ <vscale x 2 x i8> %0,
+ ptr %1,
+ iXLen %2)
+
+ ret void
+}
+
+declare void @llvm.riscv.vse.mask.nxv2i8(
+ <vscale x 2 x i8>,
+ ptr,
+ <vscale x 2 x i1>,
+ iXLen);
+
+define void @intrinsic_vse_mask_v_nxv2i8_nxv2i8(<vscale x 2 x i8> %0, ptr %1, <vscale x 2 x i1> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vse_mask_v_nxv2i8_nxv2i8:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, ma
+; CHECK-NEXT: vse8.v v8, (a0), v0.t
+; CHECK-NEXT: ret
+entry:
+ call void @llvm.riscv.vse.mask.nxv2i8(
+ <vscale x 2 x i8> %0,
+ ptr %1,
+ <vscale x 2 x i1> %2,
+ iXLen %3)
+
+ ret void
+}
+
+declare void @llvm.riscv.vse.nxv4i8(
+ <vscale x 4 x i8>,
+ ptr,
+ iXLen);
+
+define void @intrinsic_vse_v_nxv4i8_nxv4i8(<vscale x 4 x i8> %0, ptr %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vse_v_nxv4i8_nxv4i8:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, ma
+; CHECK-NEXT: vse8.v v8, (a0)
+; CHECK-NEXT: ret
+entry:
+ call void @llvm.riscv.vse.nxv4i8(
+ <vscale x 4 x i8> %0,
+ ptr %1,
+ iXLen %2)
+
+ ret void
+}
+
+declare void @llvm.riscv.vse.mask.nxv4i8(
+ <vscale x 4 x i8>,
+ ptr,
+ <vscale x 4 x i1>,
+ iXLen);
+
+define void @intrinsic_vse_mask_v_nxv4i8_nxv4i8(<vscale x 4 x i8> %0, ptr %1, <vscale x 4 x i1> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vse_mask_v_nxv4i8_nxv4i8:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, ma
+; CHECK-NEXT: vse8.v v8, (a0), v0.t
+; CHECK-NEXT: ret
+entry:
+ call void @llvm.riscv.vse.mask.nxv4i8(
+ <vscale x 4 x i8> %0,
+ ptr %1,
+ <vscale x 4 x i1> %2,
+ iXLen %3)
+
+ ret void
+}
+
+declare void @llvm.riscv.vse.nxv8i8(
+ <vscale x 8 x i8>,
+ ptr,
+ iXLen);
+
+define void @intrinsic_vse_v_nxv8i8_nxv8i8(<vscale x 8 x i8> %0, ptr %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vse_v_nxv8i8_nxv8i8:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, ma
+; CHECK-NEXT: vse8.v v8, (a0)
+; CHECK-NEXT: ret
+entry:
+ call void @llvm.riscv.vse.nxv8i8(
+ <vscale x 8 x i8> %0,
+ ptr %1,
+ iXLen %2)
+
+ ret void
+}
+
+declare void @llvm.riscv.vse.mask.nxv8i8(
+ <vscale x 8 x i8>,
+ ptr,
+ <vscale x 8 x i1>,
+ iXLen);
+
+define void @intrinsic_vse_mask_v_nxv8i8_nxv8i8(<vscale x 8 x i8> %0, ptr %1, <vscale x 8 x i1> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vse_mask_v_nxv8i8_nxv8i8:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, ma
+; CHECK-NEXT: vse8.v v8, (a0), v0.t
+; CHECK-NEXT: ret
+entry:
+ call void @llvm.riscv.vse.mask.nxv8i8(
+ <vscale x 8 x i8> %0,
+ ptr %1,
+ <vscale x 8 x i1> %2,
+ iXLen %3)
+
+ ret void
+}
+
+declare void @llvm.riscv.vse.nxv16i8(
+ <vscale x 16 x i8>,
+ ptr,
+ iXLen);
+
+define void @intrinsic_vse_v_nxv16i8_nxv16i8(<vscale x 16 x i8> %0, ptr %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vse_v_nxv16i8_nxv16i8:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a1, e8, m2, ta, ma
+; CHECK-NEXT: vse8.v v8, (a0)
+; CHECK-NEXT: ret
+entry:
+ call void @llvm.riscv.vse.nxv16i8(
+ <vscale x 16 x i8> %0,
+ ptr %1,
+ iXLen %2)
+
+ ret void
+}
+
+declare void @llvm.riscv.vse.mask.nxv16i8(
+ <vscale x 16 x i8>,
+ ptr,
+ <vscale x 16 x i1>,
+ iXLen);
+
+define void @intrinsic_vse_mask_v_nxv16i8_nxv16i8(<vscale x 16 x i8> %0, ptr %1, <vscale x 16 x i1> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vse_mask_v_nxv16i8_nxv16i8:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a1, e8, m2, ta, ma
+; CHECK-NEXT: vse8.v v8, (a0), v0.t
+; CHECK-NEXT: ret
+entry:
+ call void @llvm.riscv.vse.mask.nxv16i8(
+ <vscale x 16 x i8> %0,
+ ptr %1,
+ <vscale x 16 x i1> %2,
+ iXLen %3)
+
+ ret void
+}
+
+declare void @llvm.riscv.vse.nxv32i8(
+ <vscale x 32 x i8>,
+ ptr,
+ iXLen);
+
+define void @intrinsic_vse_v_nxv32i8_nxv32i8(<vscale x 32 x i8> %0, ptr %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vse_v_nxv32i8_nxv32i8:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a1, e8, m4, ta, ma
+; CHECK-NEXT: vse8.v v8, (a0)
+; CHECK-NEXT: ret
+entry:
+ call void @llvm.riscv.vse.nxv32i8(
+ <vscale x 32 x i8> %0,
+ ptr %1,
+ iXLen %2)
+
+ ret void
+}
+
+declare void @llvm.riscv.vse.mask.nxv32i8(
+ <vscale x 32 x i8>,
+ ptr,
+ <vscale x 32 x i1>,
+ iXLen);
+
+define void @intrinsic_vse_mask_v_nxv32i8_nxv32i8(<vscale x 32 x i8> %0, ptr %1, <vscale x 32 x i1> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vse_mask_v_nxv32i8_nxv32i8:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a1, e8, m4, ta, ma
+; CHECK-NEXT: vse8.v v8, (a0), v0.t
+; CHECK-NEXT: ret
+entry:
+ call void @llvm.riscv.vse.mask.nxv32i8(
+ <vscale x 32 x i8> %0,
+ ptr %1,
+ <vscale x 32 x i1> %2,
+ iXLen %3)
+
+ ret void
+}
+
+declare void @llvm.riscv.vse.nxv64i8(
+ <vscale x 64 x i8>,
+ ptr,
+ iXLen);
+
+define void @intrinsic_vse_v_nxv64i8_nxv64i8(<vscale x 64 x i8> %0, ptr %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vse_v_nxv64i8_nxv64i8:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, ma
+; CHECK-NEXT: vse8.v v8, (a0)
+; CHECK-NEXT: ret
+entry:
+ call void @llvm.riscv.vse.nxv64i8(
+ <vscale x 64 x i8> %0,
+ ptr %1,
+ iXLen %2)
+
+ ret void
+}
+
+declare void @llvm.riscv.vse.mask.nxv64i8(
+ <vscale x 64 x i8>,
+ ptr,
+ <vscale x 64 x i1>,
+ iXLen);
+
+define void @intrinsic_vse_mask_v_nxv64i8_nxv64i8(<vscale x 64 x i8> %0, ptr %1, <vscale x 64 x i1> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vse_mask_v_nxv64i8_nxv64i8:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, ma
+; CHECK-NEXT: vse8.v v8, (a0), v0.t
+; CHECK-NEXT: ret
+entry:
+ call void @llvm.riscv.vse.mask.nxv64i8(
+ <vscale x 64 x i8> %0,
+ ptr %1,
+ <vscale x 64 x i1> %2,
+ iXLen %3)
+
+ ret void
+}
diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/rvv/vsm.ll b/llvm/test/CodeGen/RISCV/GlobalISel/rvv/vsm.ll
new file mode 100644
index 0000000..5237536
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/GlobalISel/rvv/vsm.ll
@@ -0,0 +1,139 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v \
+; RUN: -global-isel -verify-machineinstrs | FileCheck %s
+; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v \
+; RUN: -global-isel -verify-machineinstrs | FileCheck %s
+
+declare void @llvm.riscv.vsm.nxv1i1(<vscale x 1 x i1>, ptr, iXLen);
+
+define void @intrinsic_vsm_v_nxv1i1(<vscale x 1 x i1> %0, ptr %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vsm_v_nxv1i1:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, ma
+; CHECK-NEXT: vsm.v v0, (a0)
+; CHECK-NEXT: ret
+entry:
+ call void @llvm.riscv.vsm.nxv1i1(<vscale x 1 x i1> %0, ptr %1, iXLen %2)
+ ret void
+}
+
+declare void @llvm.riscv.vsm.nxv2i1(<vscale x 2 x i1>, ptr, iXLen);
+
+define void @intrinsic_vsm_v_nxv2i1(<vscale x 2 x i1> %0, ptr %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vsm_v_nxv2i1:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, ma
+; CHECK-NEXT: vsm.v v0, (a0)
+; CHECK-NEXT: ret
+entry:
+ call void @llvm.riscv.vsm.nxv2i1(<vscale x 2 x i1> %0, ptr %1, iXLen %2)
+ ret void
+}
+
+declare void @llvm.riscv.vsm.nxv4i1(<vscale x 4 x i1>, ptr, iXLen);
+
+define void @intrinsic_vsm_v_nxv4i1(<vscale x 4 x i1> %0, ptr %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vsm_v_nxv4i1:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, ma
+; CHECK-NEXT: vsm.v v0, (a0)
+; CHECK-NEXT: ret
+entry:
+ call void @llvm.riscv.vsm.nxv4i1(<vscale x 4 x i1> %0, ptr %1, iXLen %2)
+ ret void
+}
+
+declare void @llvm.riscv.vsm.nxv8i1(<vscale x 8 x i1>, ptr, iXLen);
+
+define void @intrinsic_vsm_v_nxv8i1(<vscale x 8 x i1> %0, ptr %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vsm_v_nxv8i1:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, ma
+; CHECK-NEXT: vsm.v v0, (a0)
+; CHECK-NEXT: ret
+entry:
+ call void @llvm.riscv.vsm.nxv8i1(<vscale x 8 x i1> %0, ptr %1, iXLen %2)
+ ret void
+}
+
+declare void @llvm.riscv.vsm.nxv16i1(<vscale x 16 x i1>, ptr, iXLen);
+
+define void @intrinsic_vsm_v_nxv16i1(<vscale x 16 x i1> %0, ptr %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vsm_v_nxv16i1:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a1, e8, m2, ta, ma
+; CHECK-NEXT: vsm.v v0, (a0)
+; CHECK-NEXT: ret
+entry:
+ call void @llvm.riscv.vsm.nxv16i1(<vscale x 16 x i1> %0, ptr %1, iXLen %2)
+ ret void
+}
+
+declare void @llvm.riscv.vsm.nxv32i1(<vscale x 32 x i1>, ptr, iXLen);
+
+define void @intrinsic_vsm_v_nxv32i1(<vscale x 32 x i1> %0, ptr %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vsm_v_nxv32i1:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a1, e8, m4, ta, ma
+; CHECK-NEXT: vsm.v v0, (a0)
+; CHECK-NEXT: ret
+entry:
+ call void @llvm.riscv.vsm.nxv32i1(<vscale x 32 x i1> %0, ptr %1, iXLen %2)
+ ret void
+}
+
+declare void @llvm.riscv.vsm.nxv64i1(<vscale x 64 x i1>, ptr, iXLen);
+
+define void @intrinsic_vsm_v_nxv64i1(<vscale x 64 x i1> %0, ptr %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vsm_v_nxv64i1:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, ma
+; CHECK-NEXT: vsm.v v0, (a0)
+; CHECK-NEXT: ret
+entry:
+ call void @llvm.riscv.vsm.nxv64i1(<vscale x 64 x i1> %0, ptr %1, iXLen %2)
+ ret void
+}
+
+declare <vscale x 1 x i1> @llvm.riscv.vmseq.nxv1i16(
+ <vscale x 1 x i16>,
+ <vscale x 1 x i16>,
+ iXLen);
+
+; Make sure we can use the vsetvli from the producing instruction.
+define void @test_vsetvli_i16(<vscale x 1 x i16> %0, <vscale x 1 x i16> %1, ptr %2, iXLen %3) nounwind {
+; CHECK-LABEL: test_vsetvli_i16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, ma
+; CHECK-NEXT: vmseq.vv v8, v8, v9
+; CHECK-NEXT: vsm.v v8, (a0)
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 1 x i1> @llvm.riscv.vmseq.nxv1i16(
+ <vscale x 1 x i16> %0,
+ <vscale x 1 x i16> %1,
+ iXLen %3)
+ call void @llvm.riscv.vsm.nxv1i1(<vscale x 1 x i1> %a, ptr %2, iXLen %3)
+ ret void
+}
+
+declare <vscale x 1 x i1> @llvm.riscv.vmseq.nxv1i32(
+ <vscale x 1 x i32>,
+ <vscale x 1 x i32>,
+ iXLen);
+
+define void @test_vsetvli_i32(<vscale x 1 x i32> %0, <vscale x 1 x i32> %1, ptr %2, iXLen %3) nounwind {
+; CHECK-LABEL: test_vsetvli_i32:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, ma
+; CHECK-NEXT: vmseq.vv v8, v8, v9
+; CHECK-NEXT: vsm.v v8, (a0)
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 1 x i1> @llvm.riscv.vmseq.nxv1i32(
+ <vscale x 1 x i32> %0,
+ <vscale x 1 x i32> %1,
+ iXLen %3)
+ call void @llvm.riscv.vsm.nxv1i1(<vscale x 1 x i1> %a, ptr %2, iXLen %3)
+ ret void
+}
diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/rvv/vsse.ll b/llvm/test/CodeGen/RISCV/GlobalISel/rvv/vsse.ll
new file mode 100644
index 0000000..b7609ff
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/GlobalISel/rvv/vsse.ll
@@ -0,0 +1,1724 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v,+zvfhmin,+zvfbfmin \
+; RUN: -global-isel -verify-machineinstrs -target-abi=ilp32d | FileCheck %s
+; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v,+zvfhmin,+zvfbfmin \
+; RUN: -global-isel -verify-machineinstrs -target-abi=lp64d | FileCheck %s
+
+declare void @llvm.riscv.vsse.nxv1i64(
+ <vscale x 1 x i64>,
+ ptr,
+ iXLen,
+ iXLen);
+
+define void @intrinsic_vsse_v_nxv1i64_nxv1i64(<vscale x 1 x i64> %0, ptr %1, iXLen %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vsse_v_nxv1i64_nxv1i64:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a2, e64, m1, ta, ma
+; CHECK-NEXT: vsse64.v v8, (a0), a1
+; CHECK-NEXT: ret
+entry:
+ call void @llvm.riscv.vsse.nxv1i64(
+ <vscale x 1 x i64> %0,
+ ptr %1,
+ iXLen %2,
+ iXLen %3)
+
+ ret void
+}
+
+declare void @llvm.riscv.vsse.mask.nxv1i64(
+ <vscale x 1 x i64>,
+ ptr,
+ iXLen,
+ <vscale x 1 x i1>,
+ iXLen);
+
+define void @intrinsic_vsse_mask_v_nxv1i64_nxv1i64(<vscale x 1 x i64> %0, ptr %1, iXLen %2, <vscale x 1 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vsse_mask_v_nxv1i64_nxv1i64:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a2, e64, m1, ta, ma
+; CHECK-NEXT: vsse64.v v8, (a0), a1, v0.t
+; CHECK-NEXT: ret
+entry:
+ call void @llvm.riscv.vsse.mask.nxv1i64(
+ <vscale x 1 x i64> %0,
+ ptr %1,
+ iXLen %2,
+ <vscale x 1 x i1> %3,
+ iXLen %4)
+
+ ret void
+}
+
+define void @intrinsic_vsse_allonesmask_v_nxv1i64_nxv1i64(<vscale x 1 x i64> %0, ptr %1, iXLen %2, <vscale x 1 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vsse_allonesmask_v_nxv1i64_nxv1i64:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a2, e64, m1, ta, ma
+; CHECK-NEXT: vsse64.v v8, (a0), a1
+; CHECK-NEXT: ret
+entry:
+ call void @llvm.riscv.vsse.mask.nxv1i64(
+ <vscale x 1 x i64> %0,
+ ptr %1,
+ iXLen %2,
+ <vscale x 1 x i1> splat (i1 true),
+ iXLen %4)
+
+ ret void
+}
+
+declare void @llvm.riscv.vsse.nxv2i64(
+ <vscale x 2 x i64>,
+ ptr,
+ iXLen,
+ iXLen);
+
+define void @intrinsic_vsse_v_nxv2i64_nxv2i64(<vscale x 2 x i64> %0, ptr %1, iXLen %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vsse_v_nxv2i64_nxv2i64:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a2, e64, m2, ta, ma
+; CHECK-NEXT: vsse64.v v8, (a0), a1
+; CHECK-NEXT: ret
+entry:
+ call void @llvm.riscv.vsse.nxv2i64(
+ <vscale x 2 x i64> %0,
+ ptr %1,
+ iXLen %2,
+ iXLen %3)
+
+ ret void
+}
+
+declare void @llvm.riscv.vsse.mask.nxv2i64(
+ <vscale x 2 x i64>,
+ ptr,
+ iXLen,
+ <vscale x 2 x i1>,
+ iXLen);
+
+define void @intrinsic_vsse_mask_v_nxv2i64_nxv2i64(<vscale x 2 x i64> %0, ptr %1, iXLen %2, <vscale x 2 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vsse_mask_v_nxv2i64_nxv2i64:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a2, e64, m2, ta, ma
+; CHECK-NEXT: vsse64.v v8, (a0), a1, v0.t
+; CHECK-NEXT: ret
+entry:
+ call void @llvm.riscv.vsse.mask.nxv2i64(
+ <vscale x 2 x i64> %0,
+ ptr %1,
+ iXLen %2,
+ <vscale x 2 x i1> %3,
+ iXLen %4)
+
+ ret void
+}
+
+declare void @llvm.riscv.vsse.nxv4i64(
+ <vscale x 4 x i64>,
+ ptr,
+ iXLen,
+ iXLen);
+
+define void @intrinsic_vsse_v_nxv4i64_nxv4i64(<vscale x 4 x i64> %0, ptr %1, iXLen %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vsse_v_nxv4i64_nxv4i64:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a2, e64, m4, ta, ma
+; CHECK-NEXT: vsse64.v v8, (a0), a1
+; CHECK-NEXT: ret
+entry:
+ call void @llvm.riscv.vsse.nxv4i64(
+ <vscale x 4 x i64> %0,
+ ptr %1,
+ iXLen %2,
+ iXLen %3)
+
+ ret void
+}
+
+declare void @llvm.riscv.vsse.mask.nxv4i64(
+ <vscale x 4 x i64>,
+ ptr,
+ iXLen,
+ <vscale x 4 x i1>,
+ iXLen);
+
+define void @intrinsic_vsse_mask_v_nxv4i64_nxv4i64(<vscale x 4 x i64> %0, ptr %1, iXLen %2, <vscale x 4 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vsse_mask_v_nxv4i64_nxv4i64:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a2, e64, m4, ta, ma
+; CHECK-NEXT: vsse64.v v8, (a0), a1, v0.t
+; CHECK-NEXT: ret
+entry:
+ call void @llvm.riscv.vsse.mask.nxv4i64(
+ <vscale x 4 x i64> %0,
+ ptr %1,
+ iXLen %2,
+ <vscale x 4 x i1> %3,
+ iXLen %4)
+
+ ret void
+}
+
+declare void @llvm.riscv.vsse.nxv8i64(
+ <vscale x 8 x i64>,
+ ptr,
+ iXLen,
+ iXLen);
+
+define void @intrinsic_vsse_v_nxv8i64_nxv8i64(<vscale x 8 x i64> %0, ptr %1, iXLen %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vsse_v_nxv8i64_nxv8i64:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a2, e64, m8, ta, ma
+; CHECK-NEXT: vsse64.v v8, (a0), a1
+; CHECK-NEXT: ret
+entry:
+ call void @llvm.riscv.vsse.nxv8i64(
+ <vscale x 8 x i64> %0,
+ ptr %1,
+ iXLen %2,
+ iXLen %3)
+
+ ret void
+}
+
+declare void @llvm.riscv.vsse.mask.nxv8i64(
+ <vscale x 8 x i64>,
+ ptr,
+ iXLen,
+ <vscale x 8 x i1>,
+ iXLen);
+
+define void @intrinsic_vsse_mask_v_nxv8i64_nxv8i64(<vscale x 8 x i64> %0, ptr %1, iXLen %2, <vscale x 8 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vsse_mask_v_nxv8i64_nxv8i64:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a2, e64, m8, ta, ma
+; CHECK-NEXT: vsse64.v v8, (a0), a1, v0.t
+; CHECK-NEXT: ret
+entry:
+ call void @llvm.riscv.vsse.mask.nxv8i64(
+ <vscale x 8 x i64> %0,
+ ptr %1,
+ iXLen %2,
+ <vscale x 8 x i1> %3,
+ iXLen %4)
+
+ ret void
+}
+
+declare void @llvm.riscv.vsse.nxv1f64(
+ <vscale x 1 x double>,
+ ptr,
+ iXLen,
+ iXLen);
+
+define void @intrinsic_vsse_v_nxv1f64_nxv1f64(<vscale x 1 x double> %0, ptr %1, iXLen %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vsse_v_nxv1f64_nxv1f64:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a2, e64, m1, ta, ma
+; CHECK-NEXT: vsse64.v v8, (a0), a1
+; CHECK-NEXT: ret
+entry:
+ call void @llvm.riscv.vsse.nxv1f64(
+ <vscale x 1 x double> %0,
+ ptr %1,
+ iXLen %2,
+ iXLen %3)
+
+ ret void
+}
+
+declare void @llvm.riscv.vsse.mask.nxv1f64(
+ <vscale x 1 x double>,
+ ptr,
+ iXLen,
+ <vscale x 1 x i1>,
+ iXLen);
+
+define void @intrinsic_vsse_mask_v_nxv1f64_nxv1f64(<vscale x 1 x double> %0, ptr %1, iXLen %2, <vscale x 1 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vsse_mask_v_nxv1f64_nxv1f64:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a2, e64, m1, ta, ma
+; CHECK-NEXT: vsse64.v v8, (a0), a1, v0.t
+; CHECK-NEXT: ret
+entry:
+ call void @llvm.riscv.vsse.mask.nxv1f64(
+ <vscale x 1 x double> %0,
+ ptr %1,
+ iXLen %2,
+ <vscale x 1 x i1> %3,
+ iXLen %4)
+
+ ret void
+}
+
+declare void @llvm.riscv.vsse.nxv2f64(
+ <vscale x 2 x double>,
+ ptr,
+ iXLen,
+ iXLen);
+
+define void @intrinsic_vsse_v_nxv2f64_nxv2f64(<vscale x 2 x double> %0, ptr %1, iXLen %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vsse_v_nxv2f64_nxv2f64:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a2, e64, m2, ta, ma
+; CHECK-NEXT: vsse64.v v8, (a0), a1
+; CHECK-NEXT: ret
+entry:
+ call void @llvm.riscv.vsse.nxv2f64(
+ <vscale x 2 x double> %0,
+ ptr %1,
+ iXLen %2,
+ iXLen %3)
+
+ ret void
+}
+
+declare void @llvm.riscv.vsse.mask.nxv2f64(
+ <vscale x 2 x double>,
+ ptr,
+ iXLen,
+ <vscale x 2 x i1>,
+ iXLen);
+
+define void @intrinsic_vsse_mask_v_nxv2f64_nxv2f64(<vscale x 2 x double> %0, ptr %1, iXLen %2, <vscale x 2 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vsse_mask_v_nxv2f64_nxv2f64:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a2, e64, m2, ta, ma
+; CHECK-NEXT: vsse64.v v8, (a0), a1, v0.t
+; CHECK-NEXT: ret
+entry:
+ call void @llvm.riscv.vsse.mask.nxv2f64(
+ <vscale x 2 x double> %0,
+ ptr %1,
+ iXLen %2,
+ <vscale x 2 x i1> %3,
+ iXLen %4)
+
+ ret void
+}
+
+declare void @llvm.riscv.vsse.nxv4f64(
+ <vscale x 4 x double>,
+ ptr,
+ iXLen,
+ iXLen);
+
+define void @intrinsic_vsse_v_nxv4f64_nxv4f64(<vscale x 4 x double> %0, ptr %1, iXLen %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vsse_v_nxv4f64_nxv4f64:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a2, e64, m4, ta, ma
+; CHECK-NEXT: vsse64.v v8, (a0), a1
+; CHECK-NEXT: ret
+entry:
+ call void @llvm.riscv.vsse.nxv4f64(
+ <vscale x 4 x double> %0,
+ ptr %1,
+ iXLen %2,
+ iXLen %3)
+
+ ret void
+}
+
+declare void @llvm.riscv.vsse.mask.nxv4f64(
+ <vscale x 4 x double>,
+ ptr,
+ iXLen,
+ <vscale x 4 x i1>,
+ iXLen);
+
+define void @intrinsic_vsse_mask_v_nxv4f64_nxv4f64(<vscale x 4 x double> %0, ptr %1, iXLen %2, <vscale x 4 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vsse_mask_v_nxv4f64_nxv4f64:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a2, e64, m4, ta, ma
+; CHECK-NEXT: vsse64.v v8, (a0), a1, v0.t
+; CHECK-NEXT: ret
+entry:
+ call void @llvm.riscv.vsse.mask.nxv4f64(
+ <vscale x 4 x double> %0,
+ ptr %1,
+ iXLen %2,
+ <vscale x 4 x i1> %3,
+ iXLen %4)
+
+ ret void
+}
+
+declare void @llvm.riscv.vsse.nxv8f64(
+ <vscale x 8 x double>,
+ ptr,
+ iXLen,
+ iXLen);
+
+define void @intrinsic_vsse_v_nxv8f64_nxv8f64(<vscale x 8 x double> %0, ptr %1, iXLen %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vsse_v_nxv8f64_nxv8f64:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a2, e64, m8, ta, ma
+; CHECK-NEXT: vsse64.v v8, (a0), a1
+; CHECK-NEXT: ret
+entry:
+ call void @llvm.riscv.vsse.nxv8f64(
+ <vscale x 8 x double> %0,
+ ptr %1,
+ iXLen %2,
+ iXLen %3)
+
+ ret void
+}
+
+declare void @llvm.riscv.vsse.mask.nxv8f64(
+ <vscale x 8 x double>,
+ ptr,
+ iXLen,
+ <vscale x 8 x i1>,
+ iXLen);
+
+define void @intrinsic_vsse_mask_v_nxv8f64_nxv8f64(<vscale x 8 x double> %0, ptr %1, iXLen %2, <vscale x 8 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vsse_mask_v_nxv8f64_nxv8f64:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a2, e64, m8, ta, ma
+; CHECK-NEXT: vsse64.v v8, (a0), a1, v0.t
+; CHECK-NEXT: ret
+entry:
+ call void @llvm.riscv.vsse.mask.nxv8f64(
+ <vscale x 8 x double> %0,
+ ptr %1,
+ iXLen %2,
+ <vscale x 8 x i1> %3,
+ iXLen %4)
+
+ ret void
+}
+
+declare void @llvm.riscv.vsse.nxv1i32(
+ <vscale x 1 x i32>,
+ ptr,
+ iXLen,
+ iXLen);
+
+define void @intrinsic_vsse_v_nxv1i32_nxv1i32(<vscale x 1 x i32> %0, ptr %1, iXLen %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vsse_v_nxv1i32_nxv1i32:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a2, e32, mf2, ta, ma
+; CHECK-NEXT: vsse32.v v8, (a0), a1
+; CHECK-NEXT: ret
+entry:
+ call void @llvm.riscv.vsse.nxv1i32(
+ <vscale x 1 x i32> %0,
+ ptr %1,
+ iXLen %2,
+ iXLen %3)
+
+ ret void
+}
+
+declare void @llvm.riscv.vsse.mask.nxv1i32(
+ <vscale x 1 x i32>,
+ ptr,
+ iXLen,
+ <vscale x 1 x i1>,
+ iXLen);
+
+define void @intrinsic_vsse_mask_v_nxv1i32_nxv1i32(<vscale x 1 x i32> %0, ptr %1, iXLen %2, <vscale x 1 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vsse_mask_v_nxv1i32_nxv1i32:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a2, e32, mf2, ta, ma
+; CHECK-NEXT: vsse32.v v8, (a0), a1, v0.t
+; CHECK-NEXT: ret
+entry:
+ call void @llvm.riscv.vsse.mask.nxv1i32(
+ <vscale x 1 x i32> %0,
+ ptr %1,
+ iXLen %2,
+ <vscale x 1 x i1> %3,
+ iXLen %4)
+
+ ret void
+}
+
+declare void @llvm.riscv.vsse.nxv2i32(
+ <vscale x 2 x i32>,
+ ptr,
+ iXLen,
+ iXLen);
+
+define void @intrinsic_vsse_v_nxv2i32_nxv2i32(<vscale x 2 x i32> %0, ptr %1, iXLen %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vsse_v_nxv2i32_nxv2i32:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a2, e32, m1, ta, ma
+; CHECK-NEXT: vsse32.v v8, (a0), a1
+; CHECK-NEXT: ret
+entry:
+ call void @llvm.riscv.vsse.nxv2i32(
+ <vscale x 2 x i32> %0,
+ ptr %1,
+ iXLen %2,
+ iXLen %3)
+
+ ret void
+}
+
+declare void @llvm.riscv.vsse.mask.nxv2i32(
+ <vscale x 2 x i32>,
+ ptr,
+ iXLen,
+ <vscale x 2 x i1>,
+ iXLen);
+
+define void @intrinsic_vsse_mask_v_nxv2i32_nxv2i32(<vscale x 2 x i32> %0, ptr %1, iXLen %2, <vscale x 2 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vsse_mask_v_nxv2i32_nxv2i32:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a2, e32, m1, ta, ma
+; CHECK-NEXT: vsse32.v v8, (a0), a1, v0.t
+; CHECK-NEXT: ret
+entry:
+ call void @llvm.riscv.vsse.mask.nxv2i32(
+ <vscale x 2 x i32> %0,
+ ptr %1,
+ iXLen %2,
+ <vscale x 2 x i1> %3,
+ iXLen %4)
+
+ ret void
+}
+
+declare void @llvm.riscv.vsse.nxv4i32(
+ <vscale x 4 x i32>,
+ ptr,
+ iXLen,
+ iXLen);
+
+define void @intrinsic_vsse_v_nxv4i32_nxv4i32(<vscale x 4 x i32> %0, ptr %1, iXLen %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vsse_v_nxv4i32_nxv4i32:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a2, e32, m2, ta, ma
+; CHECK-NEXT: vsse32.v v8, (a0), a1
+; CHECK-NEXT: ret
+entry:
+ call void @llvm.riscv.vsse.nxv4i32(
+ <vscale x 4 x i32> %0,
+ ptr %1,
+ iXLen %2,
+ iXLen %3)
+
+ ret void
+}
+
+declare void @llvm.riscv.vsse.mask.nxv4i32(
+ <vscale x 4 x i32>,
+ ptr,
+ iXLen,
+ <vscale x 4 x i1>,
+ iXLen);
+
+define void @intrinsic_vsse_mask_v_nxv4i32_nxv4i32(<vscale x 4 x i32> %0, ptr %1, iXLen %2, <vscale x 4 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vsse_mask_v_nxv4i32_nxv4i32:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a2, e32, m2, ta, ma
+; CHECK-NEXT: vsse32.v v8, (a0), a1, v0.t
+; CHECK-NEXT: ret
+entry:
+ call void @llvm.riscv.vsse.mask.nxv4i32(
+ <vscale x 4 x i32> %0,
+ ptr %1,
+ iXLen %2,
+ <vscale x 4 x i1> %3,
+ iXLen %4)
+
+ ret void
+}
+
+declare void @llvm.riscv.vsse.nxv8i32(
+ <vscale x 8 x i32>,
+ ptr,
+ iXLen,
+ iXLen);
+
+define void @intrinsic_vsse_v_nxv8i32_nxv8i32(<vscale x 8 x i32> %0, ptr %1, iXLen %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vsse_v_nxv8i32_nxv8i32:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a2, e32, m4, ta, ma
+; CHECK-NEXT: vsse32.v v8, (a0), a1
+; CHECK-NEXT: ret
+entry:
+ call void @llvm.riscv.vsse.nxv8i32(
+ <vscale x 8 x i32> %0,
+ ptr %1,
+ iXLen %2,
+ iXLen %3)
+
+ ret void
+}
+
+declare void @llvm.riscv.vsse.mask.nxv8i32(
+ <vscale x 8 x i32>,
+ ptr,
+ iXLen,
+ <vscale x 8 x i1>,
+ iXLen);
+
+define void @intrinsic_vsse_mask_v_nxv8i32_nxv8i32(<vscale x 8 x i32> %0, ptr %1, iXLen %2, <vscale x 8 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vsse_mask_v_nxv8i32_nxv8i32:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a2, e32, m4, ta, ma
+; CHECK-NEXT: vsse32.v v8, (a0), a1, v0.t
+; CHECK-NEXT: ret
+entry:
+ call void @llvm.riscv.vsse.mask.nxv8i32(
+ <vscale x 8 x i32> %0,
+ ptr %1,
+ iXLen %2,
+ <vscale x 8 x i1> %3,
+ iXLen %4)
+
+ ret void
+}
+
+declare void @llvm.riscv.vsse.nxv16i32(
+ <vscale x 16 x i32>,
+ ptr,
+ iXLen,
+ iXLen);
+
+define void @intrinsic_vsse_v_nxv16i32_nxv16i32(<vscale x 16 x i32> %0, ptr %1, iXLen %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vsse_v_nxv16i32_nxv16i32:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a2, e32, m8, ta, ma
+; CHECK-NEXT: vsse32.v v8, (a0), a1
+; CHECK-NEXT: ret
+entry:
+ call void @llvm.riscv.vsse.nxv16i32(
+ <vscale x 16 x i32> %0,
+ ptr %1,
+ iXLen %2,
+ iXLen %3)
+
+ ret void
+}
+
+declare void @llvm.riscv.vsse.mask.nxv16i32(
+ <vscale x 16 x i32>,
+ ptr,
+ iXLen,
+ <vscale x 16 x i1>,
+ iXLen);
+
+define void @intrinsic_vsse_mask_v_nxv16i32_nxv16i32(<vscale x 16 x i32> %0, ptr %1, iXLen %2, <vscale x 16 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vsse_mask_v_nxv16i32_nxv16i32:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a2, e32, m8, ta, ma
+; CHECK-NEXT: vsse32.v v8, (a0), a1, v0.t
+; CHECK-NEXT: ret
+entry:
+ call void @llvm.riscv.vsse.mask.nxv16i32(
+ <vscale x 16 x i32> %0,
+ ptr %1,
+ iXLen %2,
+ <vscale x 16 x i1> %3,
+ iXLen %4)
+
+ ret void
+}
+
+declare void @llvm.riscv.vsse.nxv1f32(
+ <vscale x 1 x float>,
+ ptr,
+ iXLen,
+ iXLen);
+
+define void @intrinsic_vsse_v_nxv1f32_nxv1f32(<vscale x 1 x float> %0, ptr %1, iXLen %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vsse_v_nxv1f32_nxv1f32:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a2, e32, mf2, ta, ma
+; CHECK-NEXT: vsse32.v v8, (a0), a1
+; CHECK-NEXT: ret
+entry:
+ call void @llvm.riscv.vsse.nxv1f32(
+ <vscale x 1 x float> %0,
+ ptr %1,
+ iXLen %2,
+ iXLen %3)
+
+ ret void
+}
+
+declare void @llvm.riscv.vsse.mask.nxv1f32(
+ <vscale x 1 x float>,
+ ptr,
+ iXLen,
+ <vscale x 1 x i1>,
+ iXLen);
+
+define void @intrinsic_vsse_mask_v_nxv1f32_nxv1f32(<vscale x 1 x float> %0, ptr %1, iXLen %2, <vscale x 1 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vsse_mask_v_nxv1f32_nxv1f32:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a2, e32, mf2, ta, ma
+; CHECK-NEXT: vsse32.v v8, (a0), a1, v0.t
+; CHECK-NEXT: ret
+entry:
+ call void @llvm.riscv.vsse.mask.nxv1f32(
+ <vscale x 1 x float> %0,
+ ptr %1,
+ iXLen %2,
+ <vscale x 1 x i1> %3,
+ iXLen %4)
+
+ ret void
+}
+
+declare void @llvm.riscv.vsse.nxv2f32(
+ <vscale x 2 x float>,
+ ptr,
+ iXLen,
+ iXLen);
+
+define void @intrinsic_vsse_v_nxv2f32_nxv2f32(<vscale x 2 x float> %0, ptr %1, iXLen %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vsse_v_nxv2f32_nxv2f32:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a2, e32, m1, ta, ma
+; CHECK-NEXT: vsse32.v v8, (a0), a1
+; CHECK-NEXT: ret
+entry:
+ call void @llvm.riscv.vsse.nxv2f32(
+ <vscale x 2 x float> %0,
+ ptr %1,
+ iXLen %2,
+ iXLen %3)
+
+ ret void
+}
+
+declare void @llvm.riscv.vsse.mask.nxv2f32(
+ <vscale x 2 x float>,
+ ptr,
+ iXLen,
+ <vscale x 2 x i1>,
+ iXLen);
+
+define void @intrinsic_vsse_mask_v_nxv2f32_nxv2f32(<vscale x 2 x float> %0, ptr %1, iXLen %2, <vscale x 2 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vsse_mask_v_nxv2f32_nxv2f32:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a2, e32, m1, ta, ma
+; CHECK-NEXT: vsse32.v v8, (a0), a1, v0.t
+; CHECK-NEXT: ret
+entry:
+ call void @llvm.riscv.vsse.mask.nxv2f32(
+ <vscale x 2 x float> %0,
+ ptr %1,
+ iXLen %2,
+ <vscale x 2 x i1> %3,
+ iXLen %4)
+
+ ret void
+}
+
+declare void @llvm.riscv.vsse.nxv4f32(
+ <vscale x 4 x float>,
+ ptr,
+ iXLen,
+ iXLen);
+
+define void @intrinsic_vsse_v_nxv4f32_nxv4f32(<vscale x 4 x float> %0, ptr %1, iXLen %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vsse_v_nxv4f32_nxv4f32:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a2, e32, m2, ta, ma
+; CHECK-NEXT: vsse32.v v8, (a0), a1
+; CHECK-NEXT: ret
+entry:
+ call void @llvm.riscv.vsse.nxv4f32(
+ <vscale x 4 x float> %0,
+ ptr %1,
+ iXLen %2,
+ iXLen %3)
+
+ ret void
+}
+
+declare void @llvm.riscv.vsse.mask.nxv4f32(
+ <vscale x 4 x float>,
+ ptr,
+ iXLen,
+ <vscale x 4 x i1>,
+ iXLen);
+
+define void @intrinsic_vsse_mask_v_nxv4f32_nxv4f32(<vscale x 4 x float> %0, ptr %1, iXLen %2, <vscale x 4 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vsse_mask_v_nxv4f32_nxv4f32:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a2, e32, m2, ta, ma
+; CHECK-NEXT: vsse32.v v8, (a0), a1, v0.t
+; CHECK-NEXT: ret
+entry:
+ call void @llvm.riscv.vsse.mask.nxv4f32(
+ <vscale x 4 x float> %0,
+ ptr %1,
+ iXLen %2,
+ <vscale x 4 x i1> %3,
+ iXLen %4)
+
+ ret void
+}
+
+declare void @llvm.riscv.vsse.nxv8f32(
+ <vscale x 8 x float>,
+ ptr,
+ iXLen,
+ iXLen);
+
+define void @intrinsic_vsse_v_nxv8f32_nxv8f32(<vscale x 8 x float> %0, ptr %1, iXLen %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vsse_v_nxv8f32_nxv8f32:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a2, e32, m4, ta, ma
+; CHECK-NEXT: vsse32.v v8, (a0), a1
+; CHECK-NEXT: ret
+entry:
+ call void @llvm.riscv.vsse.nxv8f32(
+ <vscale x 8 x float> %0,
+ ptr %1,
+ iXLen %2,
+ iXLen %3)
+
+ ret void
+}
+
+declare void @llvm.riscv.vsse.mask.nxv8f32(
+ <vscale x 8 x float>,
+ ptr,
+ iXLen,
+ <vscale x 8 x i1>,
+ iXLen);
+
+define void @intrinsic_vsse_mask_v_nxv8f32_nxv8f32(<vscale x 8 x float> %0, ptr %1, iXLen %2, <vscale x 8 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vsse_mask_v_nxv8f32_nxv8f32:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a2, e32, m4, ta, ma
+; CHECK-NEXT: vsse32.v v8, (a0), a1, v0.t
+; CHECK-NEXT: ret
+entry:
+ call void @llvm.riscv.vsse.mask.nxv8f32(
+ <vscale x 8 x float> %0,
+ ptr %1,
+ iXLen %2,
+ <vscale x 8 x i1> %3,
+ iXLen %4)
+
+ ret void
+}
+
+declare void @llvm.riscv.vsse.nxv16f32(
+ <vscale x 16 x float>,
+ ptr,
+ iXLen,
+ iXLen);
+
+define void @intrinsic_vsse_v_nxv16f32_nxv16f32(<vscale x 16 x float> %0, ptr %1, iXLen %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vsse_v_nxv16f32_nxv16f32:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a2, e32, m8, ta, ma
+; CHECK-NEXT: vsse32.v v8, (a0), a1
+; CHECK-NEXT: ret
+entry:
+ call void @llvm.riscv.vsse.nxv16f32(
+ <vscale x 16 x float> %0,
+ ptr %1,
+ iXLen %2,
+ iXLen %3)
+
+ ret void
+}
+
+declare void @llvm.riscv.vsse.mask.nxv16f32(
+ <vscale x 16 x float>,
+ ptr,
+ iXLen,
+ <vscale x 16 x i1>,
+ iXLen);
+
+define void @intrinsic_vsse_mask_v_nxv16f32_nxv16f32(<vscale x 16 x float> %0, ptr %1, iXLen %2, <vscale x 16 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vsse_mask_v_nxv16f32_nxv16f32:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a2, e32, m8, ta, ma
+; CHECK-NEXT: vsse32.v v8, (a0), a1, v0.t
+; CHECK-NEXT: ret
+entry:
+ call void @llvm.riscv.vsse.mask.nxv16f32(
+ <vscale x 16 x float> %0,
+ ptr %1,
+ iXLen %2,
+ <vscale x 16 x i1> %3,
+ iXLen %4)
+
+ ret void
+}
+
+declare void @llvm.riscv.vsse.nxv1i16(
+ <vscale x 1 x i16>,
+ ptr,
+ iXLen,
+ iXLen);
+
+define void @intrinsic_vsse_v_nxv1i16_nxv1i16(<vscale x 1 x i16> %0, ptr %1, iXLen %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vsse_v_nxv1i16_nxv1i16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a2, e16, mf4, ta, ma
+; CHECK-NEXT: vsse16.v v8, (a0), a1
+; CHECK-NEXT: ret
+entry:
+ call void @llvm.riscv.vsse.nxv1i16(
+ <vscale x 1 x i16> %0,
+ ptr %1,
+ iXLen %2,
+ iXLen %3)
+
+ ret void
+}
+
+declare void @llvm.riscv.vsse.mask.nxv1i16(
+ <vscale x 1 x i16>,
+ ptr,
+ iXLen,
+ <vscale x 1 x i1>,
+ iXLen);
+
+define void @intrinsic_vsse_mask_v_nxv1i16_nxv1i16(<vscale x 1 x i16> %0, ptr %1, iXLen %2, <vscale x 1 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vsse_mask_v_nxv1i16_nxv1i16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a2, e16, mf4, ta, ma
+; CHECK-NEXT: vsse16.v v8, (a0), a1, v0.t
+; CHECK-NEXT: ret
+entry:
+ call void @llvm.riscv.vsse.mask.nxv1i16(
+ <vscale x 1 x i16> %0,
+ ptr %1,
+ iXLen %2,
+ <vscale x 1 x i1> %3,
+ iXLen %4)
+
+ ret void
+}
+
+declare void @llvm.riscv.vsse.nxv2i16(
+ <vscale x 2 x i16>,
+ ptr,
+ iXLen,
+ iXLen);
+
+define void @intrinsic_vsse_v_nxv2i16_nxv2i16(<vscale x 2 x i16> %0, ptr %1, iXLen %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vsse_v_nxv2i16_nxv2i16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a2, e16, mf2, ta, ma
+; CHECK-NEXT: vsse16.v v8, (a0), a1
+; CHECK-NEXT: ret
+entry:
+ call void @llvm.riscv.vsse.nxv2i16(
+ <vscale x 2 x i16> %0,
+ ptr %1,
+ iXLen %2,
+ iXLen %3)
+
+ ret void
+}
+
+declare void @llvm.riscv.vsse.mask.nxv2i16(
+ <vscale x 2 x i16>,
+ ptr,
+ iXLen,
+ <vscale x 2 x i1>,
+ iXLen);
+
+define void @intrinsic_vsse_mask_v_nxv2i16_nxv2i16(<vscale x 2 x i16> %0, ptr %1, iXLen %2, <vscale x 2 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vsse_mask_v_nxv2i16_nxv2i16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a2, e16, mf2, ta, ma
+; CHECK-NEXT: vsse16.v v8, (a0), a1, v0.t
+; CHECK-NEXT: ret
+entry:
+ call void @llvm.riscv.vsse.mask.nxv2i16(
+ <vscale x 2 x i16> %0,
+ ptr %1,
+ iXLen %2,
+ <vscale x 2 x i1> %3,
+ iXLen %4)
+
+ ret void
+}
+
+declare void @llvm.riscv.vsse.nxv4i16(
+ <vscale x 4 x i16>,
+ ptr,
+ iXLen,
+ iXLen);
+
+define void @intrinsic_vsse_v_nxv4i16_nxv4i16(<vscale x 4 x i16> %0, ptr %1, iXLen %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vsse_v_nxv4i16_nxv4i16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a2, e16, m1, ta, ma
+; CHECK-NEXT: vsse16.v v8, (a0), a1
+; CHECK-NEXT: ret
+entry:
+ call void @llvm.riscv.vsse.nxv4i16(
+ <vscale x 4 x i16> %0,
+ ptr %1,
+ iXLen %2,
+ iXLen %3)
+
+ ret void
+}
+
+declare void @llvm.riscv.vsse.mask.nxv4i16(
+ <vscale x 4 x i16>,
+ ptr,
+ iXLen,
+ <vscale x 4 x i1>,
+ iXLen);
+
+define void @intrinsic_vsse_mask_v_nxv4i16_nxv4i16(<vscale x 4 x i16> %0, ptr %1, iXLen %2, <vscale x 4 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vsse_mask_v_nxv4i16_nxv4i16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a2, e16, m1, ta, ma
+; CHECK-NEXT: vsse16.v v8, (a0), a1, v0.t
+; CHECK-NEXT: ret
+entry:
+ call void @llvm.riscv.vsse.mask.nxv4i16(
+ <vscale x 4 x i16> %0,
+ ptr %1,
+ iXLen %2,
+ <vscale x 4 x i1> %3,
+ iXLen %4)
+
+ ret void
+}
+
+declare void @llvm.riscv.vsse.nxv8i16(
+ <vscale x 8 x i16>,
+ ptr,
+ iXLen,
+ iXLen);
+
+define void @intrinsic_vsse_v_nxv8i16_nxv8i16(<vscale x 8 x i16> %0, ptr %1, iXLen %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vsse_v_nxv8i16_nxv8i16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a2, e16, m2, ta, ma
+; CHECK-NEXT: vsse16.v v8, (a0), a1
+; CHECK-NEXT: ret
+entry:
+ call void @llvm.riscv.vsse.nxv8i16(
+ <vscale x 8 x i16> %0,
+ ptr %1,
+ iXLen %2,
+ iXLen %3)
+
+ ret void
+}
+
+declare void @llvm.riscv.vsse.mask.nxv8i16(
+ <vscale x 8 x i16>,
+ ptr,
+ iXLen,
+ <vscale x 8 x i1>,
+ iXLen);
+
+define void @intrinsic_vsse_mask_v_nxv8i16_nxv8i16(<vscale x 8 x i16> %0, ptr %1, iXLen %2, <vscale x 8 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vsse_mask_v_nxv8i16_nxv8i16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a2, e16, m2, ta, ma
+; CHECK-NEXT: vsse16.v v8, (a0), a1, v0.t
+; CHECK-NEXT: ret
+entry:
+ call void @llvm.riscv.vsse.mask.nxv8i16(
+ <vscale x 8 x i16> %0,
+ ptr %1,
+ iXLen %2,
+ <vscale x 8 x i1> %3,
+ iXLen %4)
+
+ ret void
+}
+
+declare void @llvm.riscv.vsse.nxv16i16(
+ <vscale x 16 x i16>,
+ ptr,
+ iXLen,
+ iXLen);
+
+define void @intrinsic_vsse_v_nxv16i16_nxv16i16(<vscale x 16 x i16> %0, ptr %1, iXLen %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vsse_v_nxv16i16_nxv16i16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a2, e16, m4, ta, ma
+; CHECK-NEXT: vsse16.v v8, (a0), a1
+; CHECK-NEXT: ret
+entry:
+ call void @llvm.riscv.vsse.nxv16i16(
+ <vscale x 16 x i16> %0,
+ ptr %1,
+ iXLen %2,
+ iXLen %3)
+
+ ret void
+}
+
+declare void @llvm.riscv.vsse.mask.nxv16i16(
+ <vscale x 16 x i16>,
+ ptr,
+ iXLen,
+ <vscale x 16 x i1>,
+ iXLen);
+
+define void @intrinsic_vsse_mask_v_nxv16i16_nxv16i16(<vscale x 16 x i16> %0, ptr %1, iXLen %2, <vscale x 16 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vsse_mask_v_nxv16i16_nxv16i16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a2, e16, m4, ta, ma
+; CHECK-NEXT: vsse16.v v8, (a0), a1, v0.t
+; CHECK-NEXT: ret
+entry:
+ call void @llvm.riscv.vsse.mask.nxv16i16(
+ <vscale x 16 x i16> %0,
+ ptr %1,
+ iXLen %2,
+ <vscale x 16 x i1> %3,
+ iXLen %4)
+
+ ret void
+}
+
+declare void @llvm.riscv.vsse.nxv32i16(
+ <vscale x 32 x i16>,
+ ptr,
+ iXLen,
+ iXLen);
+
+define void @intrinsic_vsse_v_nxv32i16_nxv32i16(<vscale x 32 x i16> %0, ptr %1, iXLen %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vsse_v_nxv32i16_nxv32i16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a2, e16, m8, ta, ma
+; CHECK-NEXT: vsse16.v v8, (a0), a1
+; CHECK-NEXT: ret
+entry:
+ call void @llvm.riscv.vsse.nxv32i16(
+ <vscale x 32 x i16> %0,
+ ptr %1,
+ iXLen %2,
+ iXLen %3)
+
+ ret void
+}
+
+declare void @llvm.riscv.vsse.mask.nxv32i16(
+ <vscale x 32 x i16>,
+ ptr,
+ iXLen,
+ <vscale x 32 x i1>,
+ iXLen);
+
+define void @intrinsic_vsse_mask_v_nxv32i16_nxv32i16(<vscale x 32 x i16> %0, ptr %1, iXLen %2, <vscale x 32 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vsse_mask_v_nxv32i16_nxv32i16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a2, e16, m8, ta, ma
+; CHECK-NEXT: vsse16.v v8, (a0), a1, v0.t
+; CHECK-NEXT: ret
+entry:
+ call void @llvm.riscv.vsse.mask.nxv32i16(
+ <vscale x 32 x i16> %0,
+ ptr %1,
+ iXLen %2,
+ <vscale x 32 x i1> %3,
+ iXLen %4)
+
+ ret void
+}
+
+declare void @llvm.riscv.vsse.nxv1f16(
+ <vscale x 1 x half>,
+ ptr,
+ iXLen,
+ iXLen);
+
+define void @intrinsic_vsse_v_nxv1f16_nxv1f16(<vscale x 1 x half> %0, ptr %1, iXLen %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vsse_v_nxv1f16_nxv1f16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a2, e16, mf4, ta, ma
+; CHECK-NEXT: vsse16.v v8, (a0), a1
+; CHECK-NEXT: ret
+entry:
+ call void @llvm.riscv.vsse.nxv1f16(
+ <vscale x 1 x half> %0,
+ ptr %1,
+ iXLen %2,
+ iXLen %3)
+
+ ret void
+}
+
+declare void @llvm.riscv.vsse.mask.nxv1f16(
+ <vscale x 1 x half>,
+ ptr,
+ iXLen,
+ <vscale x 1 x i1>,
+ iXLen);
+
+define void @intrinsic_vsse_mask_v_nxv1f16_nxv1f16(<vscale x 1 x half> %0, ptr %1, iXLen %2, <vscale x 1 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vsse_mask_v_nxv1f16_nxv1f16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a2, e16, mf4, ta, ma
+; CHECK-NEXT: vsse16.v v8, (a0), a1, v0.t
+; CHECK-NEXT: ret
+entry:
+ call void @llvm.riscv.vsse.mask.nxv1f16(
+ <vscale x 1 x half> %0,
+ ptr %1,
+ iXLen %2,
+ <vscale x 1 x i1> %3,
+ iXLen %4)
+
+ ret void
+}
+
+declare void @llvm.riscv.vsse.nxv2f16(
+ <vscale x 2 x half>,
+ ptr,
+ iXLen,
+ iXLen);
+
+define void @intrinsic_vsse_v_nxv2f16_nxv2f16(<vscale x 2 x half> %0, ptr %1, iXLen %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vsse_v_nxv2f16_nxv2f16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a2, e16, mf2, ta, ma
+; CHECK-NEXT: vsse16.v v8, (a0), a1
+; CHECK-NEXT: ret
+entry:
+ call void @llvm.riscv.vsse.nxv2f16(
+ <vscale x 2 x half> %0,
+ ptr %1,
+ iXLen %2,
+ iXLen %3)
+
+ ret void
+}
+
+declare void @llvm.riscv.vsse.mask.nxv2f16(
+ <vscale x 2 x half>,
+ ptr,
+ iXLen,
+ <vscale x 2 x i1>,
+ iXLen);
+
+define void @intrinsic_vsse_mask_v_nxv2f16_nxv2f16(<vscale x 2 x half> %0, ptr %1, iXLen %2, <vscale x 2 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vsse_mask_v_nxv2f16_nxv2f16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a2, e16, mf2, ta, ma
+; CHECK-NEXT: vsse16.v v8, (a0), a1, v0.t
+; CHECK-NEXT: ret
+entry:
+ call void @llvm.riscv.vsse.mask.nxv2f16(
+ <vscale x 2 x half> %0,
+ ptr %1,
+ iXLen %2,
+ <vscale x 2 x i1> %3,
+ iXLen %4)
+
+ ret void
+}
+
+declare void @llvm.riscv.vsse.nxv4f16(
+ <vscale x 4 x half>,
+ ptr,
+ iXLen,
+ iXLen);
+
+define void @intrinsic_vsse_v_nxv4f16_nxv4f16(<vscale x 4 x half> %0, ptr %1, iXLen %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vsse_v_nxv4f16_nxv4f16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a2, e16, m1, ta, ma
+; CHECK-NEXT: vsse16.v v8, (a0), a1
+; CHECK-NEXT: ret
+entry:
+ call void @llvm.riscv.vsse.nxv4f16(
+ <vscale x 4 x half> %0,
+ ptr %1,
+ iXLen %2,
+ iXLen %3)
+
+ ret void
+}
+
+declare void @llvm.riscv.vsse.mask.nxv4f16(
+ <vscale x 4 x half>,
+ ptr,
+ iXLen,
+ <vscale x 4 x i1>,
+ iXLen);
+
+define void @intrinsic_vsse_mask_v_nxv4f16_nxv4f16(<vscale x 4 x half> %0, ptr %1, iXLen %2, <vscale x 4 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vsse_mask_v_nxv4f16_nxv4f16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a2, e16, m1, ta, ma
+; CHECK-NEXT: vsse16.v v8, (a0), a1, v0.t
+; CHECK-NEXT: ret
+entry:
+ call void @llvm.riscv.vsse.mask.nxv4f16(
+ <vscale x 4 x half> %0,
+ ptr %1,
+ iXLen %2,
+ <vscale x 4 x i1> %3,
+ iXLen %4)
+
+ ret void
+}
+
+declare void @llvm.riscv.vsse.nxv8f16(
+ <vscale x 8 x half>,
+ ptr,
+ iXLen,
+ iXLen);
+
+define void @intrinsic_vsse_v_nxv8f16_nxv8f16(<vscale x 8 x half> %0, ptr %1, iXLen %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vsse_v_nxv8f16_nxv8f16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a2, e16, m2, ta, ma
+; CHECK-NEXT: vsse16.v v8, (a0), a1
+; CHECK-NEXT: ret
+entry:
+ call void @llvm.riscv.vsse.nxv8f16(
+ <vscale x 8 x half> %0,
+ ptr %1,
+ iXLen %2,
+ iXLen %3)
+
+ ret void
+}
+
+declare void @llvm.riscv.vsse.mask.nxv8f16(
+ <vscale x 8 x half>,
+ ptr,
+ iXLen,
+ <vscale x 8 x i1>,
+ iXLen);
+
+define void @intrinsic_vsse_mask_v_nxv8f16_nxv8f16(<vscale x 8 x half> %0, ptr %1, iXLen %2, <vscale x 8 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vsse_mask_v_nxv8f16_nxv8f16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a2, e16, m2, ta, ma
+; CHECK-NEXT: vsse16.v v8, (a0), a1, v0.t
+; CHECK-NEXT: ret
+entry:
+ call void @llvm.riscv.vsse.mask.nxv8f16(
+ <vscale x 8 x half> %0,
+ ptr %1,
+ iXLen %2,
+ <vscale x 8 x i1> %3,
+ iXLen %4)
+
+ ret void
+}
+
+declare void @llvm.riscv.vsse.nxv16f16(
+ <vscale x 16 x half>,
+ ptr,
+ iXLen,
+ iXLen);
+
+define void @intrinsic_vsse_v_nxv16f16_nxv16f16(<vscale x 16 x half> %0, ptr %1, iXLen %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vsse_v_nxv16f16_nxv16f16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a2, e16, m4, ta, ma
+; CHECK-NEXT: vsse16.v v8, (a0), a1
+; CHECK-NEXT: ret
+entry:
+ call void @llvm.riscv.vsse.nxv16f16(
+ <vscale x 16 x half> %0,
+ ptr %1,
+ iXLen %2,
+ iXLen %3)
+
+ ret void
+}
+
+declare void @llvm.riscv.vsse.mask.nxv16f16(
+ <vscale x 16 x half>,
+ ptr,
+ iXLen,
+ <vscale x 16 x i1>,
+ iXLen);
+
+define void @intrinsic_vsse_mask_v_nxv16f16_nxv16f16(<vscale x 16 x half> %0, ptr %1, iXLen %2, <vscale x 16 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vsse_mask_v_nxv16f16_nxv16f16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a2, e16, m4, ta, ma
+; CHECK-NEXT: vsse16.v v8, (a0), a1, v0.t
+; CHECK-NEXT: ret
+entry:
+ call void @llvm.riscv.vsse.mask.nxv16f16(
+ <vscale x 16 x half> %0,
+ ptr %1,
+ iXLen %2,
+ <vscale x 16 x i1> %3,
+ iXLen %4)
+
+ ret void
+}
+
+declare void @llvm.riscv.vsse.nxv32f16(
+ <vscale x 32 x half>,
+ ptr,
+ iXLen,
+ iXLen);
+
+define void @intrinsic_vsse_v_nxv32f16_nxv32f16(<vscale x 32 x half> %0, ptr %1, iXLen %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vsse_v_nxv32f16_nxv32f16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a2, e16, m8, ta, ma
+; CHECK-NEXT: vsse16.v v8, (a0), a1
+; CHECK-NEXT: ret
+entry:
+ call void @llvm.riscv.vsse.nxv32f16(
+ <vscale x 32 x half> %0,
+ ptr %1,
+ iXLen %2,
+ iXLen %3)
+
+ ret void
+}
+
+declare void @llvm.riscv.vsse.mask.nxv32f16(
+ <vscale x 32 x half>,
+ ptr,
+ iXLen,
+ <vscale x 32 x i1>,
+ iXLen);
+
+define void @intrinsic_vsse_mask_v_nxv32f16_nxv32f16(<vscale x 32 x half> %0, ptr %1, iXLen %2, <vscale x 32 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vsse_mask_v_nxv32f16_nxv32f16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a2, e16, m8, ta, ma
+; CHECK-NEXT: vsse16.v v8, (a0), a1, v0.t
+; CHECK-NEXT: ret
+entry:
+ call void @llvm.riscv.vsse.mask.nxv32f16(
+ <vscale x 32 x half> %0,
+ ptr %1,
+ iXLen %2,
+ <vscale x 32 x i1> %3,
+ iXLen %4)
+
+ ret void
+}
+
+declare void @llvm.riscv.vsse.nxv1i8(
+ <vscale x 1 x i8>,
+ ptr,
+ iXLen,
+ iXLen);
+
+define void @intrinsic_vsse_v_nxv1i8_nxv1i8(<vscale x 1 x i8> %0, ptr %1, iXLen %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vsse_v_nxv1i8_nxv1i8:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a2, e8, mf8, ta, ma
+; CHECK-NEXT: vsse8.v v8, (a0), a1
+; CHECK-NEXT: ret
+entry:
+ call void @llvm.riscv.vsse.nxv1i8(
+ <vscale x 1 x i8> %0,
+ ptr %1,
+ iXLen %2,
+ iXLen %3)
+
+ ret void
+}
+
+declare void @llvm.riscv.vsse.mask.nxv1i8(
+ <vscale x 1 x i8>,
+ ptr,
+ iXLen,
+ <vscale x 1 x i1>,
+ iXLen);
+
+define void @intrinsic_vsse_mask_v_nxv1i8_nxv1i8(<vscale x 1 x i8> %0, ptr %1, iXLen %2, <vscale x 1 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vsse_mask_v_nxv1i8_nxv1i8:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a2, e8, mf8, ta, ma
+; CHECK-NEXT: vsse8.v v8, (a0), a1, v0.t
+; CHECK-NEXT: ret
+entry:
+ call void @llvm.riscv.vsse.mask.nxv1i8(
+ <vscale x 1 x i8> %0,
+ ptr %1,
+ iXLen %2,
+ <vscale x 1 x i1> %3,
+ iXLen %4)
+
+ ret void
+}
+
+declare void @llvm.riscv.vsse.nxv2i8(
+ <vscale x 2 x i8>,
+ ptr,
+ iXLen,
+ iXLen);
+
+define void @intrinsic_vsse_v_nxv2i8_nxv2i8(<vscale x 2 x i8> %0, ptr %1, iXLen %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vsse_v_nxv2i8_nxv2i8:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a2, e8, mf4, ta, ma
+; CHECK-NEXT: vsse8.v v8, (a0), a1
+; CHECK-NEXT: ret
+entry:
+ call void @llvm.riscv.vsse.nxv2i8(
+ <vscale x 2 x i8> %0,
+ ptr %1,
+ iXLen %2,
+ iXLen %3)
+
+ ret void
+}
+
+declare void @llvm.riscv.vsse.mask.nxv2i8(
+ <vscale x 2 x i8>,
+ ptr,
+ iXLen,
+ <vscale x 2 x i1>,
+ iXLen);
+
+define void @intrinsic_vsse_mask_v_nxv2i8_nxv2i8(<vscale x 2 x i8> %0, ptr %1, iXLen %2, <vscale x 2 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vsse_mask_v_nxv2i8_nxv2i8:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a2, e8, mf4, ta, ma
+; CHECK-NEXT: vsse8.v v8, (a0), a1, v0.t
+; CHECK-NEXT: ret
+entry:
+ call void @llvm.riscv.vsse.mask.nxv2i8(
+ <vscale x 2 x i8> %0,
+ ptr %1,
+ iXLen %2,
+ <vscale x 2 x i1> %3,
+ iXLen %4)
+
+ ret void
+}
+
+declare void @llvm.riscv.vsse.nxv4i8(
+ <vscale x 4 x i8>,
+ ptr,
+ iXLen,
+ iXLen);
+
+define void @intrinsic_vsse_v_nxv4i8_nxv4i8(<vscale x 4 x i8> %0, ptr %1, iXLen %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vsse_v_nxv4i8_nxv4i8:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a2, e8, mf2, ta, ma
+; CHECK-NEXT: vsse8.v v8, (a0), a1
+; CHECK-NEXT: ret
+entry:
+ call void @llvm.riscv.vsse.nxv4i8(
+ <vscale x 4 x i8> %0,
+ ptr %1,
+ iXLen %2,
+ iXLen %3)
+
+ ret void
+}
+
+declare void @llvm.riscv.vsse.mask.nxv4i8(
+ <vscale x 4 x i8>,
+ ptr,
+ iXLen,
+ <vscale x 4 x i1>,
+ iXLen);
+
+define void @intrinsic_vsse_mask_v_nxv4i8_nxv4i8(<vscale x 4 x i8> %0, ptr %1, iXLen %2, <vscale x 4 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vsse_mask_v_nxv4i8_nxv4i8:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a2, e8, mf2, ta, ma
+; CHECK-NEXT: vsse8.v v8, (a0), a1, v0.t
+; CHECK-NEXT: ret
+entry:
+ call void @llvm.riscv.vsse.mask.nxv4i8(
+ <vscale x 4 x i8> %0,
+ ptr %1,
+ iXLen %2,
+ <vscale x 4 x i1> %3,
+ iXLen %4)
+
+ ret void
+}
+
+declare void @llvm.riscv.vsse.nxv8i8(
+ <vscale x 8 x i8>,
+ ptr,
+ iXLen,
+ iXLen);
+
+define void @intrinsic_vsse_v_nxv8i8_nxv8i8(<vscale x 8 x i8> %0, ptr %1, iXLen %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vsse_v_nxv8i8_nxv8i8:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a2, e8, m1, ta, ma
+; CHECK-NEXT: vsse8.v v8, (a0), a1
+; CHECK-NEXT: ret
+entry:
+ call void @llvm.riscv.vsse.nxv8i8(
+ <vscale x 8 x i8> %0,
+ ptr %1,
+ iXLen %2,
+ iXLen %3)
+
+ ret void
+}
+
+declare void @llvm.riscv.vsse.mask.nxv8i8(
+ <vscale x 8 x i8>,
+ ptr,
+ iXLen,
+ <vscale x 8 x i1>,
+ iXLen);
+
+define void @intrinsic_vsse_mask_v_nxv8i8_nxv8i8(<vscale x 8 x i8> %0, ptr %1, iXLen %2, <vscale x 8 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vsse_mask_v_nxv8i8_nxv8i8:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a2, e8, m1, ta, ma
+; CHECK-NEXT: vsse8.v v8, (a0), a1, v0.t
+; CHECK-NEXT: ret
+entry:
+ call void @llvm.riscv.vsse.mask.nxv8i8(
+ <vscale x 8 x i8> %0,
+ ptr %1,
+ iXLen %2,
+ <vscale x 8 x i1> %3,
+ iXLen %4)
+
+ ret void
+}
+
+declare void @llvm.riscv.vsse.nxv16i8(
+ <vscale x 16 x i8>,
+ ptr,
+ iXLen,
+ iXLen);
+
+define void @intrinsic_vsse_v_nxv16i8_nxv16i8(<vscale x 16 x i8> %0, ptr %1, iXLen %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vsse_v_nxv16i8_nxv16i8:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a2, e8, m2, ta, ma
+; CHECK-NEXT: vsse8.v v8, (a0), a1
+; CHECK-NEXT: ret
+entry:
+ call void @llvm.riscv.vsse.nxv16i8(
+ <vscale x 16 x i8> %0,
+ ptr %1,
+ iXLen %2,
+ iXLen %3)
+
+ ret void
+}
+
+declare void @llvm.riscv.vsse.mask.nxv16i8(
+ <vscale x 16 x i8>,
+ ptr,
+ iXLen,
+ <vscale x 16 x i1>,
+ iXLen);
+
+define void @intrinsic_vsse_mask_v_nxv16i8_nxv16i8(<vscale x 16 x i8> %0, ptr %1, iXLen %2, <vscale x 16 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vsse_mask_v_nxv16i8_nxv16i8:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a2, e8, m2, ta, ma
+; CHECK-NEXT: vsse8.v v8, (a0), a1, v0.t
+; CHECK-NEXT: ret
+entry:
+ call void @llvm.riscv.vsse.mask.nxv16i8(
+ <vscale x 16 x i8> %0,
+ ptr %1,
+ iXLen %2,
+ <vscale x 16 x i1> %3,
+ iXLen %4)
+
+ ret void
+}
+
+declare void @llvm.riscv.vsse.nxv32i8(
+ <vscale x 32 x i8>,
+ ptr,
+ iXLen,
+ iXLen);
+
+define void @intrinsic_vsse_v_nxv32i8_nxv32i8(<vscale x 32 x i8> %0, ptr %1, iXLen %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vsse_v_nxv32i8_nxv32i8:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a2, e8, m4, ta, ma
+; CHECK-NEXT: vsse8.v v8, (a0), a1
+; CHECK-NEXT: ret
+entry:
+ call void @llvm.riscv.vsse.nxv32i8(
+ <vscale x 32 x i8> %0,
+ ptr %1,
+ iXLen %2,
+ iXLen %3)
+
+ ret void
+}
+
+declare void @llvm.riscv.vsse.mask.nxv32i8(
+ <vscale x 32 x i8>,
+ ptr,
+ iXLen,
+ <vscale x 32 x i1>,
+ iXLen);
+
+define void @intrinsic_vsse_mask_v_nxv32i8_nxv32i8(<vscale x 32 x i8> %0, ptr %1, iXLen %2, <vscale x 32 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vsse_mask_v_nxv32i8_nxv32i8:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a2, e8, m4, ta, ma
+; CHECK-NEXT: vsse8.v v8, (a0), a1, v0.t
+; CHECK-NEXT: ret
+entry:
+ call void @llvm.riscv.vsse.mask.nxv32i8(
+ <vscale x 32 x i8> %0,
+ ptr %1,
+ iXLen %2,
+ <vscale x 32 x i1> %3,
+ iXLen %4)
+
+ ret void
+}
+
+declare void @llvm.riscv.vsse.nxv64i8(
+ <vscale x 64 x i8>,
+ ptr,
+ iXLen,
+ iXLen);
+
+define void @intrinsic_vsse_v_nxv64i8_nxv64i8(<vscale x 64 x i8> %0, ptr %1, iXLen %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vsse_v_nxv64i8_nxv64i8:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a2, e8, m8, ta, ma
+; CHECK-NEXT: vsse8.v v8, (a0), a1
+; CHECK-NEXT: ret
+entry:
+ call void @llvm.riscv.vsse.nxv64i8(
+ <vscale x 64 x i8> %0,
+ ptr %1,
+ iXLen %2,
+ iXLen %3)
+
+ ret void
+}
+
+declare void @llvm.riscv.vsse.mask.nxv64i8(
+ <vscale x 64 x i8>,
+ ptr,
+ iXLen,
+ <vscale x 64 x i1>,
+ iXLen);
+
+define void @intrinsic_vsse_mask_v_nxv64i8_nxv64i8(<vscale x 64 x i8> %0, ptr %1, iXLen %2, <vscale x 64 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vsse_mask_v_nxv64i8_nxv64i8:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a2, e8, m8, ta, ma
+; CHECK-NEXT: vsse8.v v8, (a0), a1, v0.t
+; CHECK-NEXT: ret
+entry:
+ call void @llvm.riscv.vsse.mask.nxv64i8(
+ <vscale x 64 x i8> %0,
+ ptr %1,
+ iXLen %2,
+ <vscale x 64 x i1> %3,
+ iXLen %4)
+
+ ret void
+}
diff --git a/llvm/test/CodeGen/RISCV/features-info.ll b/llvm/test/CodeGen/RISCV/features-info.ll
index 37e11db..988d049 100644
--- a/llvm/test/CodeGen/RISCV/features-info.ll
+++ b/llvm/test/CodeGen/RISCV/features-info.ll
@@ -136,6 +136,7 @@
; CHECK-NEXT: shgatpa - 'Shgatpa' (SvNNx4 mode supported for all modes supported by satp, as well as Bare).
; CHECK-NEXT: shifted-zextw-fusion - Enable SLLI+SRLI to be fused when computing (shifted) word zero extension.
; CHECK-NEXT: shlcofideleg - 'Shlcofideleg' (Delegating LCOFI Interrupts to VS-mode).
+; CHECK-NEXT: short-forward-branch-i-minmax - Enable short forward branch optimization for min,max instructions in Zbb.
; CHECK-NEXT: short-forward-branch-opt - Enable short forward branch optimization.
; CHECK-NEXT: shtvala - 'Shtvala' (htval provides all needed values).
; CHECK-NEXT: shvsatpa - 'Shvsatpa' (vsatp supports all modes supported by satp).
diff --git a/llvm/test/CodeGen/RISCV/short-forward-branch-opt-min-max.ll b/llvm/test/CodeGen/RISCV/short-forward-branch-opt-min-max.ll
new file mode 100644
index 0000000..05e06cea
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/short-forward-branch-opt-min-max.ll
@@ -0,0 +1,703 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6
+; RUN: llc < %s -mtriple=riscv32 -mattr=+zbb | FileCheck %s --check-prefixes=RV32I-ZBB
+; RUN: llc < %s -mtriple=riscv64 -mattr=+zbb | FileCheck %s --check-prefixes=RV64I-ZBB
+; RUN: llc < %s -mtriple=riscv32 -mattr=+zbb,+short-forward-branch-opt | \
+; RUN: FileCheck %s --check-prefixes=RV32I-SFB-ZBB
+; RUN: llc < %s -mtriple=riscv64 -mattr=+zbb,+short-forward-branch-opt | \
+; RUN: FileCheck %s --check-prefixes=RV64I-SFB-ZBB
+; RUN: llc < %s -mtriple=riscv32 -mattr=+zbb,+short-forward-branch-i-minmax | \
+; RUN: FileCheck %s --check-prefixes=RV32I-SFBIMinMax-ZBB
+; RUN: llc < %s -mtriple=riscv64 -mattr=+zbb,+short-forward-branch-i-minmax | \
+; RUN: FileCheck %s --check-prefixes=RV64I-SFBIMinMax-ZBB
+
+define i32 @select_example_smax(i32 %a, i32 %b, i1 zeroext %x, i32 %y) {
+; RV32I-ZBB-LABEL: select_example_smax:
+; RV32I-ZBB: # %bb.0: # %entry
+; RV32I-ZBB-NEXT: beqz a2, .LBB0_2
+; RV32I-ZBB-NEXT: # %bb.1:
+; RV32I-ZBB-NEXT: max a1, a0, a3
+; RV32I-ZBB-NEXT: .LBB0_2: # %entry
+; RV32I-ZBB-NEXT: mv a0, a1
+; RV32I-ZBB-NEXT: ret
+;
+; RV64I-ZBB-LABEL: select_example_smax:
+; RV64I-ZBB: # %bb.0: # %entry
+; RV64I-ZBB-NEXT: beqz a2, .LBB0_2
+; RV64I-ZBB-NEXT: # %bb.1:
+; RV64I-ZBB-NEXT: sext.w a3, a3
+; RV64I-ZBB-NEXT: sext.w a0, a0
+; RV64I-ZBB-NEXT: max a1, a0, a3
+; RV64I-ZBB-NEXT: .LBB0_2: # %entry
+; RV64I-ZBB-NEXT: mv a0, a1
+; RV64I-ZBB-NEXT: ret
+;
+; RV32I-SFB-ZBB-LABEL: select_example_smax:
+; RV32I-SFB-ZBB: # %bb.0: # %entry
+; RV32I-SFB-ZBB-NEXT: max a0, a0, a3
+; RV32I-SFB-ZBB-NEXT: bnez a2, .LBB0_2
+; RV32I-SFB-ZBB-NEXT: # %bb.1: # %entry
+; RV32I-SFB-ZBB-NEXT: mv a0, a1
+; RV32I-SFB-ZBB-NEXT: .LBB0_2: # %entry
+; RV32I-SFB-ZBB-NEXT: ret
+;
+; RV64I-SFB-ZBB-LABEL: select_example_smax:
+; RV64I-SFB-ZBB: # %bb.0: # %entry
+; RV64I-SFB-ZBB-NEXT: sext.w a3, a3
+; RV64I-SFB-ZBB-NEXT: sext.w a0, a0
+; RV64I-SFB-ZBB-NEXT: max a0, a0, a3
+; RV64I-SFB-ZBB-NEXT: bnez a2, .LBB0_2
+; RV64I-SFB-ZBB-NEXT: # %bb.1: # %entry
+; RV64I-SFB-ZBB-NEXT: mv a0, a1
+; RV64I-SFB-ZBB-NEXT: .LBB0_2: # %entry
+; RV64I-SFB-ZBB-NEXT: ret
+;
+; RV32I-SFBIMinMax-ZBB-LABEL: select_example_smax:
+; RV32I-SFBIMinMax-ZBB: # %bb.0: # %entry
+; RV32I-SFBIMinMax-ZBB-NEXT: beqz a2, .LBB0_2
+; RV32I-SFBIMinMax-ZBB-NEXT: # %bb.1: # %entry
+; RV32I-SFBIMinMax-ZBB-NEXT: max a1, a0, a3
+; RV32I-SFBIMinMax-ZBB-NEXT: .LBB0_2: # %entry
+; RV32I-SFBIMinMax-ZBB-NEXT: mv a0, a1
+; RV32I-SFBIMinMax-ZBB-NEXT: ret
+;
+; RV64I-SFBIMinMax-ZBB-LABEL: select_example_smax:
+; RV64I-SFBIMinMax-ZBB: # %bb.0: # %entry
+; RV64I-SFBIMinMax-ZBB-NEXT: sext.w a3, a3
+; RV64I-SFBIMinMax-ZBB-NEXT: sext.w a0, a0
+; RV64I-SFBIMinMax-ZBB-NEXT: beqz a2, .LBB0_2
+; RV64I-SFBIMinMax-ZBB-NEXT: # %bb.1: # %entry
+; RV64I-SFBIMinMax-ZBB-NEXT: max a1, a0, a3
+; RV64I-SFBIMinMax-ZBB-NEXT: .LBB0_2: # %entry
+; RV64I-SFBIMinMax-ZBB-NEXT: mv a0, a1
+; RV64I-SFBIMinMax-ZBB-NEXT: ret
+entry:
+ %res = call i32 @llvm.smax.i32(i32 %a, i32 %y)
+ %sel = select i1 %x, i32 %res, i32 %b
+ ret i32 %sel
+}
+
+define i32 @select_example_smin(i32 %a, i32 %b, i1 zeroext %x, i32 %y) {
+; RV32I-ZBB-LABEL: select_example_smin:
+; RV32I-ZBB: # %bb.0: # %entry
+; RV32I-ZBB-NEXT: beqz a2, .LBB1_2
+; RV32I-ZBB-NEXT: # %bb.1:
+; RV32I-ZBB-NEXT: min a1, a0, a3
+; RV32I-ZBB-NEXT: .LBB1_2: # %entry
+; RV32I-ZBB-NEXT: mv a0, a1
+; RV32I-ZBB-NEXT: ret
+;
+; RV64I-ZBB-LABEL: select_example_smin:
+; RV64I-ZBB: # %bb.0: # %entry
+; RV64I-ZBB-NEXT: beqz a2, .LBB1_2
+; RV64I-ZBB-NEXT: # %bb.1:
+; RV64I-ZBB-NEXT: sext.w a3, a3
+; RV64I-ZBB-NEXT: sext.w a0, a0
+; RV64I-ZBB-NEXT: min a1, a0, a3
+; RV64I-ZBB-NEXT: .LBB1_2: # %entry
+; RV64I-ZBB-NEXT: mv a0, a1
+; RV64I-ZBB-NEXT: ret
+;
+; RV32I-SFB-ZBB-LABEL: select_example_smin:
+; RV32I-SFB-ZBB: # %bb.0: # %entry
+; RV32I-SFB-ZBB-NEXT: min a0, a0, a3
+; RV32I-SFB-ZBB-NEXT: bnez a2, .LBB1_2
+; RV32I-SFB-ZBB-NEXT: # %bb.1: # %entry
+; RV32I-SFB-ZBB-NEXT: mv a0, a1
+; RV32I-SFB-ZBB-NEXT: .LBB1_2: # %entry
+; RV32I-SFB-ZBB-NEXT: ret
+;
+; RV64I-SFB-ZBB-LABEL: select_example_smin:
+; RV64I-SFB-ZBB: # %bb.0: # %entry
+; RV64I-SFB-ZBB-NEXT: sext.w a3, a3
+; RV64I-SFB-ZBB-NEXT: sext.w a0, a0
+; RV64I-SFB-ZBB-NEXT: min a0, a0, a3
+; RV64I-SFB-ZBB-NEXT: bnez a2, .LBB1_2
+; RV64I-SFB-ZBB-NEXT: # %bb.1: # %entry
+; RV64I-SFB-ZBB-NEXT: mv a0, a1
+; RV64I-SFB-ZBB-NEXT: .LBB1_2: # %entry
+; RV64I-SFB-ZBB-NEXT: ret
+;
+; RV32I-SFBIMinMax-ZBB-LABEL: select_example_smin:
+; RV32I-SFBIMinMax-ZBB: # %bb.0: # %entry
+; RV32I-SFBIMinMax-ZBB-NEXT: beqz a2, .LBB1_2
+; RV32I-SFBIMinMax-ZBB-NEXT: # %bb.1: # %entry
+; RV32I-SFBIMinMax-ZBB-NEXT: min a1, a0, a3
+; RV32I-SFBIMinMax-ZBB-NEXT: .LBB1_2: # %entry
+; RV32I-SFBIMinMax-ZBB-NEXT: mv a0, a1
+; RV32I-SFBIMinMax-ZBB-NEXT: ret
+;
+; RV64I-SFBIMinMax-ZBB-LABEL: select_example_smin:
+; RV64I-SFBIMinMax-ZBB: # %bb.0: # %entry
+; RV64I-SFBIMinMax-ZBB-NEXT: sext.w a3, a3
+; RV64I-SFBIMinMax-ZBB-NEXT: sext.w a0, a0
+; RV64I-SFBIMinMax-ZBB-NEXT: beqz a2, .LBB1_2
+; RV64I-SFBIMinMax-ZBB-NEXT: # %bb.1: # %entry
+; RV64I-SFBIMinMax-ZBB-NEXT: min a1, a0, a3
+; RV64I-SFBIMinMax-ZBB-NEXT: .LBB1_2: # %entry
+; RV64I-SFBIMinMax-ZBB-NEXT: mv a0, a1
+; RV64I-SFBIMinMax-ZBB-NEXT: ret
+entry:
+ %res = call i32 @llvm.smin.i32(i32 %a, i32 %y)
+ %sel = select i1 %x, i32 %res, i32 %b
+ ret i32 %sel
+}
+
+define i32 @select_example_umax(i32 %a, i32 %b, i1 zeroext %x, i32 %y) {
+; RV32I-ZBB-LABEL: select_example_umax:
+; RV32I-ZBB: # %bb.0: # %entry
+; RV32I-ZBB-NEXT: beqz a2, .LBB2_2
+; RV32I-ZBB-NEXT: # %bb.1:
+; RV32I-ZBB-NEXT: maxu a1, a0, a3
+; RV32I-ZBB-NEXT: .LBB2_2: # %entry
+; RV32I-ZBB-NEXT: mv a0, a1
+; RV32I-ZBB-NEXT: ret
+;
+; RV64I-ZBB-LABEL: select_example_umax:
+; RV64I-ZBB: # %bb.0: # %entry
+; RV64I-ZBB-NEXT: beqz a2, .LBB2_2
+; RV64I-ZBB-NEXT: # %bb.1:
+; RV64I-ZBB-NEXT: sext.w a3, a3
+; RV64I-ZBB-NEXT: sext.w a0, a0
+; RV64I-ZBB-NEXT: maxu a1, a0, a3
+; RV64I-ZBB-NEXT: .LBB2_2: # %entry
+; RV64I-ZBB-NEXT: mv a0, a1
+; RV64I-ZBB-NEXT: ret
+;
+; RV32I-SFB-ZBB-LABEL: select_example_umax:
+; RV32I-SFB-ZBB: # %bb.0: # %entry
+; RV32I-SFB-ZBB-NEXT: maxu a0, a0, a3
+; RV32I-SFB-ZBB-NEXT: bnez a2, .LBB2_2
+; RV32I-SFB-ZBB-NEXT: # %bb.1: # %entry
+; RV32I-SFB-ZBB-NEXT: mv a0, a1
+; RV32I-SFB-ZBB-NEXT: .LBB2_2: # %entry
+; RV32I-SFB-ZBB-NEXT: ret
+;
+; RV64I-SFB-ZBB-LABEL: select_example_umax:
+; RV64I-SFB-ZBB: # %bb.0: # %entry
+; RV64I-SFB-ZBB-NEXT: sext.w a3, a3
+; RV64I-SFB-ZBB-NEXT: sext.w a0, a0
+; RV64I-SFB-ZBB-NEXT: maxu a0, a0, a3
+; RV64I-SFB-ZBB-NEXT: bnez a2, .LBB2_2
+; RV64I-SFB-ZBB-NEXT: # %bb.1: # %entry
+; RV64I-SFB-ZBB-NEXT: mv a0, a1
+; RV64I-SFB-ZBB-NEXT: .LBB2_2: # %entry
+; RV64I-SFB-ZBB-NEXT: ret
+;
+; RV32I-SFBIMinMax-ZBB-LABEL: select_example_umax:
+; RV32I-SFBIMinMax-ZBB: # %bb.0: # %entry
+; RV32I-SFBIMinMax-ZBB-NEXT: beqz a2, .LBB2_2
+; RV32I-SFBIMinMax-ZBB-NEXT: # %bb.1: # %entry
+; RV32I-SFBIMinMax-ZBB-NEXT: maxu a1, a0, a3
+; RV32I-SFBIMinMax-ZBB-NEXT: .LBB2_2: # %entry
+; RV32I-SFBIMinMax-ZBB-NEXT: mv a0, a1
+; RV32I-SFBIMinMax-ZBB-NEXT: ret
+;
+; RV64I-SFBIMinMax-ZBB-LABEL: select_example_umax:
+; RV64I-SFBIMinMax-ZBB: # %bb.0: # %entry
+; RV64I-SFBIMinMax-ZBB-NEXT: sext.w a3, a3
+; RV64I-SFBIMinMax-ZBB-NEXT: sext.w a0, a0
+; RV64I-SFBIMinMax-ZBB-NEXT: beqz a2, .LBB2_2
+; RV64I-SFBIMinMax-ZBB-NEXT: # %bb.1: # %entry
+; RV64I-SFBIMinMax-ZBB-NEXT: maxu a1, a0, a3
+; RV64I-SFBIMinMax-ZBB-NEXT: .LBB2_2: # %entry
+; RV64I-SFBIMinMax-ZBB-NEXT: mv a0, a1
+; RV64I-SFBIMinMax-ZBB-NEXT: ret
+entry:
+ %res = call i32 @llvm.umax.i32(i32 %a, i32 %y)
+ %sel = select i1 %x, i32 %res, i32 %b
+ ret i32 %sel
+}
+
+define i32 @select_example_umin(i32 %a, i32 %b, i1 zeroext %x, i32 %y) {
+; RV32I-ZBB-LABEL: select_example_umin:
+; RV32I-ZBB: # %bb.0: # %entry
+; RV32I-ZBB-NEXT: beqz a2, .LBB3_2
+; RV32I-ZBB-NEXT: # %bb.1:
+; RV32I-ZBB-NEXT: minu a1, a0, a3
+; RV32I-ZBB-NEXT: .LBB3_2: # %entry
+; RV32I-ZBB-NEXT: mv a0, a1
+; RV32I-ZBB-NEXT: ret
+;
+; RV64I-ZBB-LABEL: select_example_umin:
+; RV64I-ZBB: # %bb.0: # %entry
+; RV64I-ZBB-NEXT: beqz a2, .LBB3_2
+; RV64I-ZBB-NEXT: # %bb.1:
+; RV64I-ZBB-NEXT: sext.w a3, a3
+; RV64I-ZBB-NEXT: sext.w a0, a0
+; RV64I-ZBB-NEXT: minu a1, a0, a3
+; RV64I-ZBB-NEXT: .LBB3_2: # %entry
+; RV64I-ZBB-NEXT: mv a0, a1
+; RV64I-ZBB-NEXT: ret
+;
+; RV32I-SFB-ZBB-LABEL: select_example_umin:
+; RV32I-SFB-ZBB: # %bb.0: # %entry
+; RV32I-SFB-ZBB-NEXT: minu a0, a0, a3
+; RV32I-SFB-ZBB-NEXT: bnez a2, .LBB3_2
+; RV32I-SFB-ZBB-NEXT: # %bb.1: # %entry
+; RV32I-SFB-ZBB-NEXT: mv a0, a1
+; RV32I-SFB-ZBB-NEXT: .LBB3_2: # %entry
+; RV32I-SFB-ZBB-NEXT: ret
+;
+; RV64I-SFB-ZBB-LABEL: select_example_umin:
+; RV64I-SFB-ZBB: # %bb.0: # %entry
+; RV64I-SFB-ZBB-NEXT: sext.w a3, a3
+; RV64I-SFB-ZBB-NEXT: sext.w a0, a0
+; RV64I-SFB-ZBB-NEXT: minu a0, a0, a3
+; RV64I-SFB-ZBB-NEXT: bnez a2, .LBB3_2
+; RV64I-SFB-ZBB-NEXT: # %bb.1: # %entry
+; RV64I-SFB-ZBB-NEXT: mv a0, a1
+; RV64I-SFB-ZBB-NEXT: .LBB3_2: # %entry
+; RV64I-SFB-ZBB-NEXT: ret
+;
+; RV32I-SFBIMinMax-ZBB-LABEL: select_example_umin:
+; RV32I-SFBIMinMax-ZBB: # %bb.0: # %entry
+; RV32I-SFBIMinMax-ZBB-NEXT: beqz a2, .LBB3_2
+; RV32I-SFBIMinMax-ZBB-NEXT: # %bb.1: # %entry
+; RV32I-SFBIMinMax-ZBB-NEXT: minu a1, a0, a3
+; RV32I-SFBIMinMax-ZBB-NEXT: .LBB3_2: # %entry
+; RV32I-SFBIMinMax-ZBB-NEXT: mv a0, a1
+; RV32I-SFBIMinMax-ZBB-NEXT: ret
+;
+; RV64I-SFBIMinMax-ZBB-LABEL: select_example_umin:
+; RV64I-SFBIMinMax-ZBB: # %bb.0: # %entry
+; RV64I-SFBIMinMax-ZBB-NEXT: sext.w a3, a3
+; RV64I-SFBIMinMax-ZBB-NEXT: sext.w a0, a0
+; RV64I-SFBIMinMax-ZBB-NEXT: beqz a2, .LBB3_2
+; RV64I-SFBIMinMax-ZBB-NEXT: # %bb.1: # %entry
+; RV64I-SFBIMinMax-ZBB-NEXT: minu a1, a0, a3
+; RV64I-SFBIMinMax-ZBB-NEXT: .LBB3_2: # %entry
+; RV64I-SFBIMinMax-ZBB-NEXT: mv a0, a1
+; RV64I-SFBIMinMax-ZBB-NEXT: ret
+entry:
+ %res = call i32 @llvm.umin.i32(i32 %a, i32 %y)
+ %sel = select i1 %x, i32 %res, i32 %b
+ ret i32 %sel
+}
+
+define i64 @select_example_smax_1(i64 %a, i64 %b, i1 zeroext %x, i64 %y) {
+; RV32I-ZBB-LABEL: select_example_smax_1:
+; RV32I-ZBB: # %bb.0: # %entry
+; RV32I-ZBB-NEXT: beq a1, a6, .LBB4_2
+; RV32I-ZBB-NEXT: # %bb.1: # %entry
+; RV32I-ZBB-NEXT: slt a7, a6, a1
+; RV32I-ZBB-NEXT: beqz a7, .LBB4_3
+; RV32I-ZBB-NEXT: j .LBB4_4
+; RV32I-ZBB-NEXT: .LBB4_2:
+; RV32I-ZBB-NEXT: sltu a7, a5, a0
+; RV32I-ZBB-NEXT: bnez a7, .LBB4_4
+; RV32I-ZBB-NEXT: .LBB4_3: # %entry
+; RV32I-ZBB-NEXT: mv a1, a6
+; RV32I-ZBB-NEXT: mv a0, a5
+; RV32I-ZBB-NEXT: .LBB4_4: # %entry
+; RV32I-ZBB-NEXT: beqz a4, .LBB4_6
+; RV32I-ZBB-NEXT: # %bb.5: # %entry
+; RV32I-ZBB-NEXT: ret
+; RV32I-ZBB-NEXT: .LBB4_6: # %entry
+; RV32I-ZBB-NEXT: mv a0, a2
+; RV32I-ZBB-NEXT: mv a1, a3
+; RV32I-ZBB-NEXT: ret
+;
+; RV64I-ZBB-LABEL: select_example_smax_1:
+; RV64I-ZBB: # %bb.0: # %entry
+; RV64I-ZBB-NEXT: beqz a2, .LBB4_2
+; RV64I-ZBB-NEXT: # %bb.1:
+; RV64I-ZBB-NEXT: max a1, a0, a3
+; RV64I-ZBB-NEXT: .LBB4_2: # %entry
+; RV64I-ZBB-NEXT: mv a0, a1
+; RV64I-ZBB-NEXT: ret
+;
+; RV32I-SFB-ZBB-LABEL: select_example_smax_1:
+; RV32I-SFB-ZBB: # %bb.0: # %entry
+; RV32I-SFB-ZBB-NEXT: sltu a7, a5, a0
+; RV32I-SFB-ZBB-NEXT: slt t0, a6, a1
+; RV32I-SFB-ZBB-NEXT: bne a1, a6, .LBB4_2
+; RV32I-SFB-ZBB-NEXT: # %bb.1: # %entry
+; RV32I-SFB-ZBB-NEXT: mv t0, a7
+; RV32I-SFB-ZBB-NEXT: .LBB4_2: # %entry
+; RV32I-SFB-ZBB-NEXT: bnez t0, .LBB4_4
+; RV32I-SFB-ZBB-NEXT: # %bb.3: # %entry
+; RV32I-SFB-ZBB-NEXT: mv a1, a6
+; RV32I-SFB-ZBB-NEXT: .LBB4_4: # %entry
+; RV32I-SFB-ZBB-NEXT: bnez t0, .LBB4_6
+; RV32I-SFB-ZBB-NEXT: # %bb.5: # %entry
+; RV32I-SFB-ZBB-NEXT: mv a0, a5
+; RV32I-SFB-ZBB-NEXT: .LBB4_6: # %entry
+; RV32I-SFB-ZBB-NEXT: bnez a4, .LBB4_8
+; RV32I-SFB-ZBB-NEXT: # %bb.7: # %entry
+; RV32I-SFB-ZBB-NEXT: mv a0, a2
+; RV32I-SFB-ZBB-NEXT: .LBB4_8: # %entry
+; RV32I-SFB-ZBB-NEXT: bnez a4, .LBB4_10
+; RV32I-SFB-ZBB-NEXT: # %bb.9: # %entry
+; RV32I-SFB-ZBB-NEXT: mv a1, a3
+; RV32I-SFB-ZBB-NEXT: .LBB4_10: # %entry
+; RV32I-SFB-ZBB-NEXT: ret
+;
+; RV64I-SFB-ZBB-LABEL: select_example_smax_1:
+; RV64I-SFB-ZBB: # %bb.0: # %entry
+; RV64I-SFB-ZBB-NEXT: max a0, a0, a3
+; RV64I-SFB-ZBB-NEXT: bnez a2, .LBB4_2
+; RV64I-SFB-ZBB-NEXT: # %bb.1: # %entry
+; RV64I-SFB-ZBB-NEXT: mv a0, a1
+; RV64I-SFB-ZBB-NEXT: .LBB4_2: # %entry
+; RV64I-SFB-ZBB-NEXT: ret
+;
+; RV32I-SFBIMinMax-ZBB-LABEL: select_example_smax_1:
+; RV32I-SFBIMinMax-ZBB: # %bb.0: # %entry
+; RV32I-SFBIMinMax-ZBB-NEXT: sltu a7, a5, a0
+; RV32I-SFBIMinMax-ZBB-NEXT: slt t0, a6, a1
+; RV32I-SFBIMinMax-ZBB-NEXT: bne a1, a6, .LBB4_2
+; RV32I-SFBIMinMax-ZBB-NEXT: # %bb.1: # %entry
+; RV32I-SFBIMinMax-ZBB-NEXT: mv t0, a7
+; RV32I-SFBIMinMax-ZBB-NEXT: .LBB4_2: # %entry
+; RV32I-SFBIMinMax-ZBB-NEXT: bnez t0, .LBB4_4
+; RV32I-SFBIMinMax-ZBB-NEXT: # %bb.3: # %entry
+; RV32I-SFBIMinMax-ZBB-NEXT: mv a1, a6
+; RV32I-SFBIMinMax-ZBB-NEXT: .LBB4_4: # %entry
+; RV32I-SFBIMinMax-ZBB-NEXT: bnez t0, .LBB4_6
+; RV32I-SFBIMinMax-ZBB-NEXT: # %bb.5: # %entry
+; RV32I-SFBIMinMax-ZBB-NEXT: mv a0, a5
+; RV32I-SFBIMinMax-ZBB-NEXT: .LBB4_6: # %entry
+; RV32I-SFBIMinMax-ZBB-NEXT: bnez a4, .LBB4_8
+; RV32I-SFBIMinMax-ZBB-NEXT: # %bb.7: # %entry
+; RV32I-SFBIMinMax-ZBB-NEXT: mv a0, a2
+; RV32I-SFBIMinMax-ZBB-NEXT: .LBB4_8: # %entry
+; RV32I-SFBIMinMax-ZBB-NEXT: bnez a4, .LBB4_10
+; RV32I-SFBIMinMax-ZBB-NEXT: # %bb.9: # %entry
+; RV32I-SFBIMinMax-ZBB-NEXT: mv a1, a3
+; RV32I-SFBIMinMax-ZBB-NEXT: .LBB4_10: # %entry
+; RV32I-SFBIMinMax-ZBB-NEXT: ret
+;
+; RV64I-SFBIMinMax-ZBB-LABEL: select_example_smax_1:
+; RV64I-SFBIMinMax-ZBB: # %bb.0: # %entry
+; RV64I-SFBIMinMax-ZBB-NEXT: beqz a2, .LBB4_2
+; RV64I-SFBIMinMax-ZBB-NEXT: # %bb.1: # %entry
+; RV64I-SFBIMinMax-ZBB-NEXT: max a1, a0, a3
+; RV64I-SFBIMinMax-ZBB-NEXT: .LBB4_2: # %entry
+; RV64I-SFBIMinMax-ZBB-NEXT: mv a0, a1
+; RV64I-SFBIMinMax-ZBB-NEXT: ret
+entry:
+ %res = call i64 @llvm.smax.i64(i64 %a, i64 %y)
+ %sel = select i1 %x, i64 %res, i64 %b
+ ret i64 %sel
+}
+
+define i64 @select_example_smin_1(i64 %a, i64 %b, i1 zeroext %x, i64 %y) {
+; RV32I-ZBB-LABEL: select_example_smin_1:
+; RV32I-ZBB: # %bb.0: # %entry
+; RV32I-ZBB-NEXT: beq a1, a6, .LBB5_2
+; RV32I-ZBB-NEXT: # %bb.1: # %entry
+; RV32I-ZBB-NEXT: slt a7, a1, a6
+; RV32I-ZBB-NEXT: beqz a7, .LBB5_3
+; RV32I-ZBB-NEXT: j .LBB5_4
+; RV32I-ZBB-NEXT: .LBB5_2:
+; RV32I-ZBB-NEXT: sltu a7, a0, a5
+; RV32I-ZBB-NEXT: bnez a7, .LBB5_4
+; RV32I-ZBB-NEXT: .LBB5_3: # %entry
+; RV32I-ZBB-NEXT: mv a1, a6
+; RV32I-ZBB-NEXT: mv a0, a5
+; RV32I-ZBB-NEXT: .LBB5_4: # %entry
+; RV32I-ZBB-NEXT: beqz a4, .LBB5_6
+; RV32I-ZBB-NEXT: # %bb.5: # %entry
+; RV32I-ZBB-NEXT: ret
+; RV32I-ZBB-NEXT: .LBB5_6: # %entry
+; RV32I-ZBB-NEXT: mv a0, a2
+; RV32I-ZBB-NEXT: mv a1, a3
+; RV32I-ZBB-NEXT: ret
+;
+; RV64I-ZBB-LABEL: select_example_smin_1:
+; RV64I-ZBB: # %bb.0: # %entry
+; RV64I-ZBB-NEXT: beqz a2, .LBB5_2
+; RV64I-ZBB-NEXT: # %bb.1:
+; RV64I-ZBB-NEXT: min a1, a0, a3
+; RV64I-ZBB-NEXT: .LBB5_2: # %entry
+; RV64I-ZBB-NEXT: mv a0, a1
+; RV64I-ZBB-NEXT: ret
+;
+; RV32I-SFB-ZBB-LABEL: select_example_smin_1:
+; RV32I-SFB-ZBB: # %bb.0: # %entry
+; RV32I-SFB-ZBB-NEXT: sltu a7, a0, a5
+; RV32I-SFB-ZBB-NEXT: slt t0, a1, a6
+; RV32I-SFB-ZBB-NEXT: bne a1, a6, .LBB5_2
+; RV32I-SFB-ZBB-NEXT: # %bb.1: # %entry
+; RV32I-SFB-ZBB-NEXT: mv t0, a7
+; RV32I-SFB-ZBB-NEXT: .LBB5_2: # %entry
+; RV32I-SFB-ZBB-NEXT: bnez t0, .LBB5_4
+; RV32I-SFB-ZBB-NEXT: # %bb.3: # %entry
+; RV32I-SFB-ZBB-NEXT: mv a1, a6
+; RV32I-SFB-ZBB-NEXT: .LBB5_4: # %entry
+; RV32I-SFB-ZBB-NEXT: bnez t0, .LBB5_6
+; RV32I-SFB-ZBB-NEXT: # %bb.5: # %entry
+; RV32I-SFB-ZBB-NEXT: mv a0, a5
+; RV32I-SFB-ZBB-NEXT: .LBB5_6: # %entry
+; RV32I-SFB-ZBB-NEXT: bnez a4, .LBB5_8
+; RV32I-SFB-ZBB-NEXT: # %bb.7: # %entry
+; RV32I-SFB-ZBB-NEXT: mv a0, a2
+; RV32I-SFB-ZBB-NEXT: .LBB5_8: # %entry
+; RV32I-SFB-ZBB-NEXT: bnez a4, .LBB5_10
+; RV32I-SFB-ZBB-NEXT: # %bb.9: # %entry
+; RV32I-SFB-ZBB-NEXT: mv a1, a3
+; RV32I-SFB-ZBB-NEXT: .LBB5_10: # %entry
+; RV32I-SFB-ZBB-NEXT: ret
+;
+; RV64I-SFB-ZBB-LABEL: select_example_smin_1:
+; RV64I-SFB-ZBB: # %bb.0: # %entry
+; RV64I-SFB-ZBB-NEXT: min a0, a0, a3
+; RV64I-SFB-ZBB-NEXT: bnez a2, .LBB5_2
+; RV64I-SFB-ZBB-NEXT: # %bb.1: # %entry
+; RV64I-SFB-ZBB-NEXT: mv a0, a1
+; RV64I-SFB-ZBB-NEXT: .LBB5_2: # %entry
+; RV64I-SFB-ZBB-NEXT: ret
+;
+; RV32I-SFBIMinMax-ZBB-LABEL: select_example_smin_1:
+; RV32I-SFBIMinMax-ZBB: # %bb.0: # %entry
+; RV32I-SFBIMinMax-ZBB-NEXT: sltu a7, a0, a5
+; RV32I-SFBIMinMax-ZBB-NEXT: slt t0, a1, a6
+; RV32I-SFBIMinMax-ZBB-NEXT: bne a1, a6, .LBB5_2
+; RV32I-SFBIMinMax-ZBB-NEXT: # %bb.1: # %entry
+; RV32I-SFBIMinMax-ZBB-NEXT: mv t0, a7
+; RV32I-SFBIMinMax-ZBB-NEXT: .LBB5_2: # %entry
+; RV32I-SFBIMinMax-ZBB-NEXT: bnez t0, .LBB5_4
+; RV32I-SFBIMinMax-ZBB-NEXT: # %bb.3: # %entry
+; RV32I-SFBIMinMax-ZBB-NEXT: mv a1, a6
+; RV32I-SFBIMinMax-ZBB-NEXT: .LBB5_4: # %entry
+; RV32I-SFBIMinMax-ZBB-NEXT: bnez t0, .LBB5_6
+; RV32I-SFBIMinMax-ZBB-NEXT: # %bb.5: # %entry
+; RV32I-SFBIMinMax-ZBB-NEXT: mv a0, a5
+; RV32I-SFBIMinMax-ZBB-NEXT: .LBB5_6: # %entry
+; RV32I-SFBIMinMax-ZBB-NEXT: bnez a4, .LBB5_8
+; RV32I-SFBIMinMax-ZBB-NEXT: # %bb.7: # %entry
+; RV32I-SFBIMinMax-ZBB-NEXT: mv a0, a2
+; RV32I-SFBIMinMax-ZBB-NEXT: .LBB5_8: # %entry
+; RV32I-SFBIMinMax-ZBB-NEXT: bnez a4, .LBB5_10
+; RV32I-SFBIMinMax-ZBB-NEXT: # %bb.9: # %entry
+; RV32I-SFBIMinMax-ZBB-NEXT: mv a1, a3
+; RV32I-SFBIMinMax-ZBB-NEXT: .LBB5_10: # %entry
+; RV32I-SFBIMinMax-ZBB-NEXT: ret
+;
+; RV64I-SFBIMinMax-ZBB-LABEL: select_example_smin_1:
+; RV64I-SFBIMinMax-ZBB: # %bb.0: # %entry
+; RV64I-SFBIMinMax-ZBB-NEXT: beqz a2, .LBB5_2
+; RV64I-SFBIMinMax-ZBB-NEXT: # %bb.1: # %entry
+; RV64I-SFBIMinMax-ZBB-NEXT: min a1, a0, a3
+; RV64I-SFBIMinMax-ZBB-NEXT: .LBB5_2: # %entry
+; RV64I-SFBIMinMax-ZBB-NEXT: mv a0, a1
+; RV64I-SFBIMinMax-ZBB-NEXT: ret
+entry:
+ %res = call i64 @llvm.smin.i64(i64 %a, i64 %y)
+ %sel = select i1 %x, i64 %res, i64 %b
+ ret i64 %sel
+}
+
+define i64 @select_example_umax_1(i64 %a, i64 %b, i1 zeroext %x, i64 %y) {
+; RV32I-ZBB-LABEL: select_example_umax_1:
+; RV32I-ZBB: # %bb.0: # %entry
+; RV32I-ZBB-NEXT: beq a1, a6, .LBB6_2
+; RV32I-ZBB-NEXT: # %bb.1: # %entry
+; RV32I-ZBB-NEXT: sltu a7, a6, a1
+; RV32I-ZBB-NEXT: beqz a7, .LBB6_3
+; RV32I-ZBB-NEXT: j .LBB6_4
+; RV32I-ZBB-NEXT: .LBB6_2:
+; RV32I-ZBB-NEXT: sltu a7, a5, a0
+; RV32I-ZBB-NEXT: bnez a7, .LBB6_4
+; RV32I-ZBB-NEXT: .LBB6_3: # %entry
+; RV32I-ZBB-NEXT: mv a1, a6
+; RV32I-ZBB-NEXT: mv a0, a5
+; RV32I-ZBB-NEXT: .LBB6_4: # %entry
+; RV32I-ZBB-NEXT: beqz a4, .LBB6_6
+; RV32I-ZBB-NEXT: # %bb.5: # %entry
+; RV32I-ZBB-NEXT: ret
+; RV32I-ZBB-NEXT: .LBB6_6: # %entry
+; RV32I-ZBB-NEXT: mv a0, a2
+; RV32I-ZBB-NEXT: mv a1, a3
+; RV32I-ZBB-NEXT: ret
+;
+; RV64I-ZBB-LABEL: select_example_umax_1:
+; RV64I-ZBB: # %bb.0: # %entry
+; RV64I-ZBB-NEXT: beqz a2, .LBB6_2
+; RV64I-ZBB-NEXT: # %bb.1:
+; RV64I-ZBB-NEXT: maxu a1, a0, a3
+; RV64I-ZBB-NEXT: .LBB6_2: # %entry
+; RV64I-ZBB-NEXT: mv a0, a1
+; RV64I-ZBB-NEXT: ret
+;
+; RV32I-SFB-ZBB-LABEL: select_example_umax_1:
+; RV32I-SFB-ZBB: # %bb.0: # %entry
+; RV32I-SFB-ZBB-NEXT: sltu a7, a5, a0
+; RV32I-SFB-ZBB-NEXT: sltu t0, a6, a1
+; RV32I-SFB-ZBB-NEXT: bne a1, a6, .LBB6_2
+; RV32I-SFB-ZBB-NEXT: # %bb.1: # %entry
+; RV32I-SFB-ZBB-NEXT: mv t0, a7
+; RV32I-SFB-ZBB-NEXT: .LBB6_2: # %entry
+; RV32I-SFB-ZBB-NEXT: bnez t0, .LBB6_4
+; RV32I-SFB-ZBB-NEXT: # %bb.3: # %entry
+; RV32I-SFB-ZBB-NEXT: mv a1, a6
+; RV32I-SFB-ZBB-NEXT: .LBB6_4: # %entry
+; RV32I-SFB-ZBB-NEXT: bnez t0, .LBB6_6
+; RV32I-SFB-ZBB-NEXT: # %bb.5: # %entry
+; RV32I-SFB-ZBB-NEXT: mv a0, a5
+; RV32I-SFB-ZBB-NEXT: .LBB6_6: # %entry
+; RV32I-SFB-ZBB-NEXT: bnez a4, .LBB6_8
+; RV32I-SFB-ZBB-NEXT: # %bb.7: # %entry
+; RV32I-SFB-ZBB-NEXT: mv a0, a2
+; RV32I-SFB-ZBB-NEXT: .LBB6_8: # %entry
+; RV32I-SFB-ZBB-NEXT: bnez a4, .LBB6_10
+; RV32I-SFB-ZBB-NEXT: # %bb.9: # %entry
+; RV32I-SFB-ZBB-NEXT: mv a1, a3
+; RV32I-SFB-ZBB-NEXT: .LBB6_10: # %entry
+; RV32I-SFB-ZBB-NEXT: ret
+;
+; RV64I-SFB-ZBB-LABEL: select_example_umax_1:
+; RV64I-SFB-ZBB: # %bb.0: # %entry
+; RV64I-SFB-ZBB-NEXT: maxu a0, a0, a3
+; RV64I-SFB-ZBB-NEXT: bnez a2, .LBB6_2
+; RV64I-SFB-ZBB-NEXT: # %bb.1: # %entry
+; RV64I-SFB-ZBB-NEXT: mv a0, a1
+; RV64I-SFB-ZBB-NEXT: .LBB6_2: # %entry
+; RV64I-SFB-ZBB-NEXT: ret
+;
+; RV32I-SFBIMinMax-ZBB-LABEL: select_example_umax_1:
+; RV32I-SFBIMinMax-ZBB: # %bb.0: # %entry
+; RV32I-SFBIMinMax-ZBB-NEXT: sltu a7, a5, a0
+; RV32I-SFBIMinMax-ZBB-NEXT: sltu t0, a6, a1
+; RV32I-SFBIMinMax-ZBB-NEXT: bne a1, a6, .LBB6_2
+; RV32I-SFBIMinMax-ZBB-NEXT: # %bb.1: # %entry
+; RV32I-SFBIMinMax-ZBB-NEXT: mv t0, a7
+; RV32I-SFBIMinMax-ZBB-NEXT: .LBB6_2: # %entry
+; RV32I-SFBIMinMax-ZBB-NEXT: bnez t0, .LBB6_4
+; RV32I-SFBIMinMax-ZBB-NEXT: # %bb.3: # %entry
+; RV32I-SFBIMinMax-ZBB-NEXT: mv a1, a6
+; RV32I-SFBIMinMax-ZBB-NEXT: .LBB6_4: # %entry
+; RV32I-SFBIMinMax-ZBB-NEXT: bnez t0, .LBB6_6
+; RV32I-SFBIMinMax-ZBB-NEXT: # %bb.5: # %entry
+; RV32I-SFBIMinMax-ZBB-NEXT: mv a0, a5
+; RV32I-SFBIMinMax-ZBB-NEXT: .LBB6_6: # %entry
+; RV32I-SFBIMinMax-ZBB-NEXT: bnez a4, .LBB6_8
+; RV32I-SFBIMinMax-ZBB-NEXT: # %bb.7: # %entry
+; RV32I-SFBIMinMax-ZBB-NEXT: mv a0, a2
+; RV32I-SFBIMinMax-ZBB-NEXT: .LBB6_8: # %entry
+; RV32I-SFBIMinMax-ZBB-NEXT: bnez a4, .LBB6_10
+; RV32I-SFBIMinMax-ZBB-NEXT: # %bb.9: # %entry
+; RV32I-SFBIMinMax-ZBB-NEXT: mv a1, a3
+; RV32I-SFBIMinMax-ZBB-NEXT: .LBB6_10: # %entry
+; RV32I-SFBIMinMax-ZBB-NEXT: ret
+;
+; RV64I-SFBIMinMax-ZBB-LABEL: select_example_umax_1:
+; RV64I-SFBIMinMax-ZBB: # %bb.0: # %entry
+; RV64I-SFBIMinMax-ZBB-NEXT: beqz a2, .LBB6_2
+; RV64I-SFBIMinMax-ZBB-NEXT: # %bb.1: # %entry
+; RV64I-SFBIMinMax-ZBB-NEXT: maxu a1, a0, a3
+; RV64I-SFBIMinMax-ZBB-NEXT: .LBB6_2: # %entry
+; RV64I-SFBIMinMax-ZBB-NEXT: mv a0, a1
+; RV64I-SFBIMinMax-ZBB-NEXT: ret
+entry:
+ %res = call i64 @llvm.umax.i64(i64 %a, i64 %y)
+ %sel = select i1 %x, i64 %res, i64 %b
+ ret i64 %sel
+}
+
+define i64 @select_example_umin_1(i64 %a, i64 %b, i1 zeroext %x, i64 %y) {
+; RV32I-ZBB-LABEL: select_example_umin_1:
+; RV32I-ZBB: # %bb.0: # %entry
+; RV32I-ZBB-NEXT: beq a1, a6, .LBB7_2
+; RV32I-ZBB-NEXT: # %bb.1: # %entry
+; RV32I-ZBB-NEXT: sltu a7, a1, a6
+; RV32I-ZBB-NEXT: beqz a7, .LBB7_3
+; RV32I-ZBB-NEXT: j .LBB7_4
+; RV32I-ZBB-NEXT: .LBB7_2:
+; RV32I-ZBB-NEXT: sltu a7, a0, a5
+; RV32I-ZBB-NEXT: bnez a7, .LBB7_4
+; RV32I-ZBB-NEXT: .LBB7_3: # %entry
+; RV32I-ZBB-NEXT: mv a1, a6
+; RV32I-ZBB-NEXT: mv a0, a5
+; RV32I-ZBB-NEXT: .LBB7_4: # %entry
+; RV32I-ZBB-NEXT: beqz a4, .LBB7_6
+; RV32I-ZBB-NEXT: # %bb.5: # %entry
+; RV32I-ZBB-NEXT: ret
+; RV32I-ZBB-NEXT: .LBB7_6: # %entry
+; RV32I-ZBB-NEXT: mv a0, a2
+; RV32I-ZBB-NEXT: mv a1, a3
+; RV32I-ZBB-NEXT: ret
+;
+; RV64I-ZBB-LABEL: select_example_umin_1:
+; RV64I-ZBB: # %bb.0: # %entry
+; RV64I-ZBB-NEXT: beqz a2, .LBB7_2
+; RV64I-ZBB-NEXT: # %bb.1:
+; RV64I-ZBB-NEXT: minu a1, a0, a3
+; RV64I-ZBB-NEXT: .LBB7_2: # %entry
+; RV64I-ZBB-NEXT: mv a0, a1
+; RV64I-ZBB-NEXT: ret
+;
+; RV32I-SFB-ZBB-LABEL: select_example_umin_1:
+; RV32I-SFB-ZBB: # %bb.0: # %entry
+; RV32I-SFB-ZBB-NEXT: sltu a7, a0, a5
+; RV32I-SFB-ZBB-NEXT: sltu t0, a1, a6
+; RV32I-SFB-ZBB-NEXT: bne a1, a6, .LBB7_2
+; RV32I-SFB-ZBB-NEXT: # %bb.1: # %entry
+; RV32I-SFB-ZBB-NEXT: mv t0, a7
+; RV32I-SFB-ZBB-NEXT: .LBB7_2: # %entry
+; RV32I-SFB-ZBB-NEXT: bnez t0, .LBB7_4
+; RV32I-SFB-ZBB-NEXT: # %bb.3: # %entry
+; RV32I-SFB-ZBB-NEXT: mv a1, a6
+; RV32I-SFB-ZBB-NEXT: .LBB7_4: # %entry
+; RV32I-SFB-ZBB-NEXT: bnez t0, .LBB7_6
+; RV32I-SFB-ZBB-NEXT: # %bb.5: # %entry
+; RV32I-SFB-ZBB-NEXT: mv a0, a5
+; RV32I-SFB-ZBB-NEXT: .LBB7_6: # %entry
+; RV32I-SFB-ZBB-NEXT: bnez a4, .LBB7_8
+; RV32I-SFB-ZBB-NEXT: # %bb.7: # %entry
+; RV32I-SFB-ZBB-NEXT: mv a0, a2
+; RV32I-SFB-ZBB-NEXT: .LBB7_8: # %entry
+; RV32I-SFB-ZBB-NEXT: bnez a4, .LBB7_10
+; RV32I-SFB-ZBB-NEXT: # %bb.9: # %entry
+; RV32I-SFB-ZBB-NEXT: mv a1, a3
+; RV32I-SFB-ZBB-NEXT: .LBB7_10: # %entry
+; RV32I-SFB-ZBB-NEXT: ret
+;
+; RV64I-SFB-ZBB-LABEL: select_example_umin_1:
+; RV64I-SFB-ZBB: # %bb.0: # %entry
+; RV64I-SFB-ZBB-NEXT: minu a0, a0, a3
+; RV64I-SFB-ZBB-NEXT: bnez a2, .LBB7_2
+; RV64I-SFB-ZBB-NEXT: # %bb.1: # %entry
+; RV64I-SFB-ZBB-NEXT: mv a0, a1
+; RV64I-SFB-ZBB-NEXT: .LBB7_2: # %entry
+; RV64I-SFB-ZBB-NEXT: ret
+;
+; RV32I-SFBIMinMax-ZBB-LABEL: select_example_umin_1:
+; RV32I-SFBIMinMax-ZBB: # %bb.0: # %entry
+; RV32I-SFBIMinMax-ZBB-NEXT: sltu a7, a0, a5
+; RV32I-SFBIMinMax-ZBB-NEXT: sltu t0, a1, a6
+; RV32I-SFBIMinMax-ZBB-NEXT: bne a1, a6, .LBB7_2
+; RV32I-SFBIMinMax-ZBB-NEXT: # %bb.1: # %entry
+; RV32I-SFBIMinMax-ZBB-NEXT: mv t0, a7
+; RV32I-SFBIMinMax-ZBB-NEXT: .LBB7_2: # %entry
+; RV32I-SFBIMinMax-ZBB-NEXT: bnez t0, .LBB7_4
+; RV32I-SFBIMinMax-ZBB-NEXT: # %bb.3: # %entry
+; RV32I-SFBIMinMax-ZBB-NEXT: mv a1, a6
+; RV32I-SFBIMinMax-ZBB-NEXT: .LBB7_4: # %entry
+; RV32I-SFBIMinMax-ZBB-NEXT: bnez t0, .LBB7_6
+; RV32I-SFBIMinMax-ZBB-NEXT: # %bb.5: # %entry
+; RV32I-SFBIMinMax-ZBB-NEXT: mv a0, a5
+; RV32I-SFBIMinMax-ZBB-NEXT: .LBB7_6: # %entry
+; RV32I-SFBIMinMax-ZBB-NEXT: bnez a4, .LBB7_8
+; RV32I-SFBIMinMax-ZBB-NEXT: # %bb.7: # %entry
+; RV32I-SFBIMinMax-ZBB-NEXT: mv a0, a2
+; RV32I-SFBIMinMax-ZBB-NEXT: .LBB7_8: # %entry
+; RV32I-SFBIMinMax-ZBB-NEXT: bnez a4, .LBB7_10
+; RV32I-SFBIMinMax-ZBB-NEXT: # %bb.9: # %entry
+; RV32I-SFBIMinMax-ZBB-NEXT: mv a1, a3
+; RV32I-SFBIMinMax-ZBB-NEXT: .LBB7_10: # %entry
+; RV32I-SFBIMinMax-ZBB-NEXT: ret
+;
+; RV64I-SFBIMinMax-ZBB-LABEL: select_example_umin_1:
+; RV64I-SFBIMinMax-ZBB: # %bb.0: # %entry
+; RV64I-SFBIMinMax-ZBB-NEXT: beqz a2, .LBB7_2
+; RV64I-SFBIMinMax-ZBB-NEXT: # %bb.1: # %entry
+; RV64I-SFBIMinMax-ZBB-NEXT: minu a1, a0, a3
+; RV64I-SFBIMinMax-ZBB-NEXT: .LBB7_2: # %entry
+; RV64I-SFBIMinMax-ZBB-NEXT: mv a0, a1
+; RV64I-SFBIMinMax-ZBB-NEXT: ret
+entry:
+ %res = call i64 @llvm.umin.i64(i64 %a, i64 %y)
+ %sel = select i1 %x, i64 %res, i64 %b
+ ret i64 %sel
+}
diff --git a/llvm/test/CodeGen/X86/basic-block-sections-bb-hash.ll b/llvm/test/CodeGen/X86/basic-block-sections-bb-hash.ll
new file mode 100644
index 0000000..293b48d
--- /dev/null
+++ b/llvm/test/CodeGen/X86/basic-block-sections-bb-hash.ll
@@ -0,0 +1,39 @@
+;; BB section test with basic block hashes.
+
+;; basic block sections Profile with bb hashes
+; RUN: echo 'v1' > %t
+; RUN: echo 'f foo' >> %t
+; RUN: echo 'g 0:10,1:9,2:1 1:8,3:8 2:2,3:2 3:11' >> %t
+; RUN: echo 'c 0 2 3' >> %t
+; RUN: echo 'h 0:64863A11B5CA0000 1:54F1E80D6B270006 2:54F1F4E66B270008 3:C8BC6041A2CB0009' >> %t
+; RUN: llc < %s -O0 -mtriple=x86_64-pc-linux -function-sections -basic-block-sections=%t | FileCheck %s
+;
+define void @foo(i1 zeroext) nounwind {
+ %2 = alloca i8, align 1
+ %3 = zext i1 %0 to i8
+ store i8 %3, ptr %2, align 1
+ %4 = load i8, ptr %2, align 1
+ %5 = trunc i8 %4 to i1
+ br i1 %5, label %6, label %8
+
+6: ; preds = %1
+ %7 = call i32 @bar()
+ br label %10
+
+8: ; preds = %1
+ %9 = call i32 @baz()
+ br label %10
+
+10: ; preds = %8, %6
+ ret void
+}
+
+declare i32 @bar() #1
+
+declare i32 @baz() #1
+
+; CHECK: .section .text.foo,"ax",@progbits
+; CHECK: callq baz
+; CHECK: retq
+; CHECK: .section .text.split.foo,"ax",@progbits
+; CHECK: callq bar
diff --git a/llvm/test/CodeGen/X86/basic-block-sections-clusters-error.ll b/llvm/test/CodeGen/X86/basic-block-sections-clusters-error.ll
index 751ab76..eb0a14b 100644
--- a/llvm/test/CodeGen/X86/basic-block-sections-clusters-error.ll
+++ b/llvm/test/CodeGen/X86/basic-block-sections-clusters-error.ll
@@ -69,6 +69,20 @@
; RUN: echo 'g 0:4,1:2:3' >> %t15
; RUN: not --crash llc < %s -O0 -mtriple=x86_64 -function-sections -basic-block-sections=%t15 2>&1 | FileCheck %s --check-prefix=CHECK-ERROR15
; CHECK-ERROR15: LLVM ERROR: invalid profile {{.*}} at line 4: unsigned integer expected: '2:3'
+; RUN: echo 'v1' > %t16
+; RUN: echo 'f dummy1' >> %t16
+; RUN: echo 'c 0 1' >> %t16
+; RUN: echo 'g 0:4,1:2' >> %t16
+; RUN: echo 'h a:1111111111111111 1:ffffffffffffffff' >> %t16
+; RUN: not --crash llc < %s -O0 -mtriple=x86_64 -function-sections -basic-block-sections=%t16 2>&1 | FileCheck %s --check-prefix=CHECK-ERROR16
+; CHECK-ERROR16: LLVM ERROR: invalid profile {{.*}} at line 5: unsigned integer expected: 'a'
+; RUN: echo 'v1' > %t17
+; RUN: echo 'f dummy1' >> %t17
+; RUN: echo 'c 0 1' >> %t17
+; RUN: echo 'g 0:4,1:2' >> %t17
+; RUN: echo 'h 0:111111111111111g 1:ffffffffffffffff' >> %t17
+; RUN: not --crash llc < %s -O0 -mtriple=x86_64 -function-sections -basic-block-sections=%t17 2>&1 | FileCheck %s --check-prefix=CHECK-ERROR17
+; CHECK-ERROR17: LLVM ERROR: invalid profile {{.*}} at line 5: unsigned integer expected in hex format: '111111111111111g'
define i32 @dummy1(i32 %x, i32 %y, i32 %z) {
diff --git a/llvm/test/CodeGen/X86/bittest-big-integer.ll b/llvm/test/CodeGen/X86/bittest-big-integer.ll
index cc3dcf3..06e7d47 100644
--- a/llvm/test/CodeGen/X86/bittest-big-integer.ll
+++ b/llvm/test/CodeGen/X86/bittest-big-integer.ll
@@ -1676,3 +1676,291 @@ define i1 @test_ne_i4096(ptr %word, i32 %position) nounwind {
%cmp = icmp ne i4096 %test, 0
ret i1 %cmp
}
+
+; Special Cases
+
+; Multiple uses of the stored value
+define i1 @complement_cmpz_i128(ptr %word, i32 %position) nounwind {
+; X86-LABEL: complement_cmpz_i128:
+; X86: # %bb.0:
+; X86-NEXT: pushl %ebp
+; X86-NEXT: movl %esp, %ebp
+; X86-NEXT: pushl %ebx
+; X86-NEXT: pushl %edi
+; X86-NEXT: pushl %esi
+; X86-NEXT: andl $-16, %esp
+; X86-NEXT: subl $64, %esp
+; X86-NEXT: movzbl 12(%ebp), %ecx
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $1, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %ecx, %eax
+; X86-NEXT: shrb $3, %al
+; X86-NEXT: andb $12, %al
+; X86-NEXT: negb %al
+; X86-NEXT: movsbl %al, %esi
+; X86-NEXT: movl 36(%esp,%esi), %eax
+; X86-NEXT: movl 40(%esp,%esi), %edi
+; X86-NEXT: movl %edi, %edx
+; X86-NEXT: shldl %cl, %eax, %edx
+; X86-NEXT: movl 32(%esp,%esi), %ebx
+; X86-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl 44(%esp,%esi), %esi
+; X86-NEXT: shldl %cl, %edi, %esi
+; X86-NEXT: movl %ebx, %edi
+; X86-NEXT: shll %cl, %edi
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT: shldl %cl, %ebx, %eax
+; X86-NEXT: movl 8(%ebp), %ecx
+; X86-NEXT: xorl 12(%ecx), %esi
+; X86-NEXT: xorl 8(%ecx), %edx
+; X86-NEXT: xorl 4(%ecx), %eax
+; X86-NEXT: xorl (%ecx), %edi
+; X86-NEXT: movl %edx, 8(%ecx)
+; X86-NEXT: movl %esi, 12(%ecx)
+; X86-NEXT: movl %edi, (%ecx)
+; X86-NEXT: movl %eax, 4(%ecx)
+; X86-NEXT: orl %esi, %eax
+; X86-NEXT: orl %edx, %edi
+; X86-NEXT: orl %eax, %edi
+; X86-NEXT: setne %al
+; X86-NEXT: leal -12(%ebp), %esp
+; X86-NEXT: popl %esi
+; X86-NEXT: popl %edi
+; X86-NEXT: popl %ebx
+; X86-NEXT: popl %ebp
+; X86-NEXT: retl
+;
+; SSE-LABEL: complement_cmpz_i128:
+; SSE: # %bb.0:
+; SSE-NEXT: movl %esi, %ecx
+; SSE-NEXT: movl $1, %eax
+; SSE-NEXT: xorl %edx, %edx
+; SSE-NEXT: shldq %cl, %rax, %rdx
+; SSE-NEXT: shlq %cl, %rax
+; SSE-NEXT: xorl %esi, %esi
+; SSE-NEXT: testb $64, %cl
+; SSE-NEXT: cmovneq %rax, %rdx
+; SSE-NEXT: cmovneq %rsi, %rax
+; SSE-NEXT: xorq 8(%rdi), %rdx
+; SSE-NEXT: xorq (%rdi), %rax
+; SSE-NEXT: movq %rax, (%rdi)
+; SSE-NEXT: movq %rdx, 8(%rdi)
+; SSE-NEXT: orq %rdx, %rax
+; SSE-NEXT: setne %al
+; SSE-NEXT: retq
+;
+; AVX2-LABEL: complement_cmpz_i128:
+; AVX2: # %bb.0:
+; AVX2-NEXT: movl %esi, %ecx
+; AVX2-NEXT: movl $1, %eax
+; AVX2-NEXT: xorl %edx, %edx
+; AVX2-NEXT: shldq %cl, %rax, %rdx
+; AVX2-NEXT: xorl %esi, %esi
+; AVX2-NEXT: shlxq %rcx, %rax, %rax
+; AVX2-NEXT: testb $64, %cl
+; AVX2-NEXT: cmovneq %rax, %rdx
+; AVX2-NEXT: cmovneq %rsi, %rax
+; AVX2-NEXT: xorq 8(%rdi), %rdx
+; AVX2-NEXT: xorq (%rdi), %rax
+; AVX2-NEXT: movq %rax, (%rdi)
+; AVX2-NEXT: movq %rdx, 8(%rdi)
+; AVX2-NEXT: orq %rdx, %rax
+; AVX2-NEXT: setne %al
+; AVX2-NEXT: retq
+;
+; AVX512-LABEL: complement_cmpz_i128:
+; AVX512: # %bb.0:
+; AVX512-NEXT: movl %esi, %ecx
+; AVX512-NEXT: xorl %eax, %eax
+; AVX512-NEXT: movl $1, %edx
+; AVX512-NEXT: xorl %esi, %esi
+; AVX512-NEXT: shldq %cl, %rdx, %rsi
+; AVX512-NEXT: shlxq %rcx, %rdx, %rdx
+; AVX512-NEXT: testb $64, %cl
+; AVX512-NEXT: cmovneq %rdx, %rsi
+; AVX512-NEXT: cmovneq %rax, %rdx
+; AVX512-NEXT: xorq 8(%rdi), %rsi
+; AVX512-NEXT: xorq (%rdi), %rdx
+; AVX512-NEXT: movq %rdx, (%rdi)
+; AVX512-NEXT: movq %rsi, 8(%rdi)
+; AVX512-NEXT: orq %rsi, %rdx
+; AVX512-NEXT: setne %al
+; AVX512-NEXT: retq
+ %rem = and i32 %position, 127
+ %ofs = zext nneg i32 %rem to i128
+ %bit = shl nuw i128 1, %ofs
+ %ld = load i128, ptr %word
+ %res = xor i128 %ld, %bit
+ store i128 %res, ptr %word
+ %cmp = icmp ne i128 %res, 0
+ ret i1 %cmp
+}
+
+; Multiple loads in store chain
+define i32 @reset_multiload_i128(ptr %word, i32 %position, ptr %p) nounwind {
+; X86-LABEL: reset_multiload_i128:
+; X86: # %bb.0:
+; X86-NEXT: pushl %ebp
+; X86-NEXT: movl %esp, %ebp
+; X86-NEXT: pushl %ebx
+; X86-NEXT: pushl %edi
+; X86-NEXT: pushl %esi
+; X86-NEXT: andl $-16, %esp
+; X86-NEXT: subl $64, %esp
+; X86-NEXT: movl 12(%ebp), %ecx
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $1, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %ecx, %eax
+; X86-NEXT: shrb $3, %al
+; X86-NEXT: andb $12, %al
+; X86-NEXT: negb %al
+; X86-NEXT: movsbl %al, %edi
+; X86-NEXT: movl 36(%esp,%edi), %edx
+; X86-NEXT: movl 40(%esp,%edi), %ebx
+; X86-NEXT: movl %ebx, %esi
+; X86-NEXT: shldl %cl, %edx, %esi
+; X86-NEXT: movl 32(%esp,%edi), %eax
+; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl 44(%esp,%edi), %edi
+; X86-NEXT: shldl %cl, %ebx, %edi
+; X86-NEXT: movl %eax, %ebx
+; X86-NEXT: # kill: def $cl killed $cl killed $ecx
+; X86-NEXT: shll %cl, %ebx
+; X86-NEXT: notl %ebx
+; X86-NEXT: movl 16(%ebp), %eax
+; X86-NEXT: movl (%eax), %eax
+; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl 12(%ebp), %eax
+; X86-NEXT: andl $96, %eax
+; X86-NEXT: shrl $3, %eax
+; X86-NEXT: movl 8(%ebp), %ecx
+; X86-NEXT: movl (%ecx,%eax), %eax
+; X86-NEXT: andl %ebx, (%ecx)
+; X86-NEXT: movl 12(%ebp), %ecx
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT: shldl %cl, %ebx, %edx
+; X86-NEXT: notl %edx
+; X86-NEXT: movl 8(%ebp), %ebx
+; X86-NEXT: andl %edx, 4(%ebx)
+; X86-NEXT: notl %esi
+; X86-NEXT: andl %esi, 8(%ebx)
+; X86-NEXT: notl %edi
+; X86-NEXT: andl %edi, 12(%ebx)
+; X86-NEXT: btl %ecx, %eax
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT: jae .LBB22_2
+; X86-NEXT: # %bb.1:
+; X86-NEXT: xorl %eax, %eax
+; X86-NEXT: .LBB22_2:
+; X86-NEXT: leal -12(%ebp), %esp
+; X86-NEXT: popl %esi
+; X86-NEXT: popl %edi
+; X86-NEXT: popl %ebx
+; X86-NEXT: popl %ebp
+; X86-NEXT: retl
+;
+; SSE-LABEL: reset_multiload_i128:
+; SSE: # %bb.0:
+; SSE-NEXT: movl %esi, %ecx
+; SSE-NEXT: movl $1, %esi
+; SSE-NEXT: xorl %r8d, %r8d
+; SSE-NEXT: shldq %cl, %rsi, %r8
+; SSE-NEXT: xorl %eax, %eax
+; SSE-NEXT: shlq %cl, %rsi
+; SSE-NEXT: testb $64, %cl
+; SSE-NEXT: cmovneq %rsi, %r8
+; SSE-NEXT: cmovneq %rax, %rsi
+; SSE-NEXT: notq %r8
+; SSE-NEXT: notq %rsi
+; SSE-NEXT: movl %ecx, %r9d
+; SSE-NEXT: andl $96, %r9d
+; SSE-NEXT: shrl $3, %r9d
+; SSE-NEXT: movl (%rdi,%r9), %r9d
+; SSE-NEXT: btl %ecx, %r9d
+; SSE-NEXT: jb .LBB22_2
+; SSE-NEXT: # %bb.1:
+; SSE-NEXT: movl (%rdx), %eax
+; SSE-NEXT: .LBB22_2:
+; SSE-NEXT: andq %r8, 8(%rdi)
+; SSE-NEXT: andq %rsi, (%rdi)
+; SSE-NEXT: # kill: def $eax killed $eax killed $rax
+; SSE-NEXT: retq
+;
+; AVX2-LABEL: reset_multiload_i128:
+; AVX2: # %bb.0:
+; AVX2-NEXT: movl %esi, %ecx
+; AVX2-NEXT: xorl %eax, %eax
+; AVX2-NEXT: movl $1, %r8d
+; AVX2-NEXT: xorl %esi, %esi
+; AVX2-NEXT: shldq %cl, %r8, %rsi
+; AVX2-NEXT: shlxq %rcx, %r8, %r8
+; AVX2-NEXT: testb $64, %cl
+; AVX2-NEXT: cmovneq %r8, %rsi
+; AVX2-NEXT: cmovneq %rax, %r8
+; AVX2-NEXT: notq %rsi
+; AVX2-NEXT: notq %r8
+; AVX2-NEXT: movl %ecx, %r9d
+; AVX2-NEXT: andl $96, %r9d
+; AVX2-NEXT: shrl $3, %r9d
+; AVX2-NEXT: movl (%rdi,%r9), %r9d
+; AVX2-NEXT: btl %ecx, %r9d
+; AVX2-NEXT: jb .LBB22_2
+; AVX2-NEXT: # %bb.1:
+; AVX2-NEXT: movl (%rdx), %eax
+; AVX2-NEXT: .LBB22_2:
+; AVX2-NEXT: andq %rsi, 8(%rdi)
+; AVX2-NEXT: andq %r8, (%rdi)
+; AVX2-NEXT: # kill: def $eax killed $eax killed $rax
+; AVX2-NEXT: retq
+;
+; AVX512-LABEL: reset_multiload_i128:
+; AVX512: # %bb.0:
+; AVX512-NEXT: movl %esi, %ecx
+; AVX512-NEXT: movl $1, %r8d
+; AVX512-NEXT: xorl %esi, %esi
+; AVX512-NEXT: shldq %cl, %r8, %rsi
+; AVX512-NEXT: xorl %eax, %eax
+; AVX512-NEXT: shlxq %rcx, %r8, %r8
+; AVX512-NEXT: testb $64, %cl
+; AVX512-NEXT: cmovneq %r8, %rsi
+; AVX512-NEXT: cmovneq %rax, %r8
+; AVX512-NEXT: notq %rsi
+; AVX512-NEXT: notq %r8
+; AVX512-NEXT: movl %ecx, %r9d
+; AVX512-NEXT: andl $96, %r9d
+; AVX512-NEXT: shrl $3, %r9d
+; AVX512-NEXT: movl (%rdi,%r9), %r9d
+; AVX512-NEXT: btl %ecx, %r9d
+; AVX512-NEXT: jb .LBB22_2
+; AVX512-NEXT: # %bb.1:
+; AVX512-NEXT: movl (%rdx), %eax
+; AVX512-NEXT: .LBB22_2:
+; AVX512-NEXT: andq %rsi, 8(%rdi)
+; AVX512-NEXT: andq %r8, (%rdi)
+; AVX512-NEXT: # kill: def $eax killed $eax killed $rax
+; AVX512-NEXT: retq
+ %rem = and i32 %position, 127
+ %ofs = zext nneg i32 %rem to i128
+ %bit = shl nuw i128 1, %ofs
+ %mask = xor i128 %bit, -1
+ %ld = load i128, ptr %word
+ %sel = load i32, ptr %p
+ %test = and i128 %ld, %bit
+ %res = and i128 %ld, %mask
+ %cmp = icmp eq i128 %test, 0
+ store i128 %res, ptr %word
+ %ret = select i1 %cmp, i32 %sel, i32 0
+ ret i32 %ret
+}
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/replicating-load-store-costs.ll b/llvm/test/Transforms/LoopVectorize/AArch64/replicating-load-store-costs.ll
index 7f34513..68cfc65 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/replicating-load-store-costs.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/replicating-load-store-costs.ll
@@ -660,6 +660,114 @@ exit:
ret i32 %red
}
+
+define i32 @test_or_reduction_with_stride_2(i32 %scale, ptr %src) {
+; CHECK-LABEL: define i32 @test_or_reduction_with_stride_2(
+; CHECK-SAME: i32 [[SCALE:%.*]], ptr [[SRC:%.*]]) {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: br label %[[VECTOR_PH:.*]]
+; CHECK: [[VECTOR_PH]]:
+; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <16 x i32> poison, i32 [[SCALE]], i64 0
+; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <16 x i32> [[BROADCAST_SPLATINSERT]], <16 x i32> poison, <16 x i32> zeroinitializer
+; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
+; CHECK: [[VECTOR_BODY]]:
+; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <16 x i32> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP66:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 2
+; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 0
+; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 2
+; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[OFFSET_IDX]], 4
+; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[OFFSET_IDX]], 6
+; CHECK-NEXT: [[TMP4:%.*]] = add i64 [[OFFSET_IDX]], 8
+; CHECK-NEXT: [[TMP5:%.*]] = add i64 [[OFFSET_IDX]], 10
+; CHECK-NEXT: [[TMP6:%.*]] = add i64 [[OFFSET_IDX]], 12
+; CHECK-NEXT: [[TMP7:%.*]] = add i64 [[OFFSET_IDX]], 14
+; CHECK-NEXT: [[TMP8:%.*]] = add i64 [[OFFSET_IDX]], 16
+; CHECK-NEXT: [[TMP9:%.*]] = add i64 [[OFFSET_IDX]], 18
+; CHECK-NEXT: [[TMP10:%.*]] = add i64 [[OFFSET_IDX]], 20
+; CHECK-NEXT: [[TMP11:%.*]] = add i64 [[OFFSET_IDX]], 22
+; CHECK-NEXT: [[TMP12:%.*]] = add i64 [[OFFSET_IDX]], 24
+; CHECK-NEXT: [[TMP13:%.*]] = add i64 [[OFFSET_IDX]], 26
+; CHECK-NEXT: [[TMP14:%.*]] = add i64 [[OFFSET_IDX]], 28
+; CHECK-NEXT: [[TMP15:%.*]] = add i64 [[OFFSET_IDX]], 30
+; CHECK-NEXT: [[TMP16:%.*]] = getelementptr [32 x i8], ptr [[SRC]], i64 [[TMP0]]
+; CHECK-NEXT: [[TMP17:%.*]] = getelementptr [32 x i8], ptr [[SRC]], i64 [[TMP1]]
+; CHECK-NEXT: [[TMP18:%.*]] = getelementptr [32 x i8], ptr [[SRC]], i64 [[TMP2]]
+; CHECK-NEXT: [[TMP19:%.*]] = getelementptr [32 x i8], ptr [[SRC]], i64 [[TMP3]]
+; CHECK-NEXT: [[TMP20:%.*]] = getelementptr [32 x i8], ptr [[SRC]], i64 [[TMP4]]
+; CHECK-NEXT: [[TMP21:%.*]] = getelementptr [32 x i8], ptr [[SRC]], i64 [[TMP5]]
+; CHECK-NEXT: [[TMP22:%.*]] = getelementptr [32 x i8], ptr [[SRC]], i64 [[TMP6]]
+; CHECK-NEXT: [[TMP23:%.*]] = getelementptr [32 x i8], ptr [[SRC]], i64 [[TMP7]]
+; CHECK-NEXT: [[TMP24:%.*]] = getelementptr [32 x i8], ptr [[SRC]], i64 [[TMP8]]
+; CHECK-NEXT: [[TMP25:%.*]] = getelementptr [32 x i8], ptr [[SRC]], i64 [[TMP9]]
+; CHECK-NEXT: [[TMP26:%.*]] = getelementptr [32 x i8], ptr [[SRC]], i64 [[TMP10]]
+; CHECK-NEXT: [[TMP27:%.*]] = getelementptr [32 x i8], ptr [[SRC]], i64 [[TMP11]]
+; CHECK-NEXT: [[TMP28:%.*]] = getelementptr [32 x i8], ptr [[SRC]], i64 [[TMP12]]
+; CHECK-NEXT: [[TMP29:%.*]] = getelementptr [32 x i8], ptr [[SRC]], i64 [[TMP13]]
+; CHECK-NEXT: [[TMP30:%.*]] = getelementptr [32 x i8], ptr [[SRC]], i64 [[TMP14]]
+; CHECK-NEXT: [[TMP31:%.*]] = getelementptr [32 x i8], ptr [[SRC]], i64 [[TMP15]]
+; CHECK-NEXT: [[TMP32:%.*]] = load i8, ptr [[TMP16]], align 1
+; CHECK-NEXT: [[TMP33:%.*]] = load i8, ptr [[TMP17]], align 1
+; CHECK-NEXT: [[TMP34:%.*]] = load i8, ptr [[TMP18]], align 1
+; CHECK-NEXT: [[TMP35:%.*]] = load i8, ptr [[TMP19]], align 1
+; CHECK-NEXT: [[TMP36:%.*]] = load i8, ptr [[TMP20]], align 1
+; CHECK-NEXT: [[TMP37:%.*]] = load i8, ptr [[TMP21]], align 1
+; CHECK-NEXT: [[TMP38:%.*]] = load i8, ptr [[TMP22]], align 1
+; CHECK-NEXT: [[TMP39:%.*]] = load i8, ptr [[TMP23]], align 1
+; CHECK-NEXT: [[TMP40:%.*]] = load i8, ptr [[TMP24]], align 1
+; CHECK-NEXT: [[TMP41:%.*]] = load i8, ptr [[TMP25]], align 1
+; CHECK-NEXT: [[TMP42:%.*]] = load i8, ptr [[TMP26]], align 1
+; CHECK-NEXT: [[TMP43:%.*]] = load i8, ptr [[TMP27]], align 1
+; CHECK-NEXT: [[TMP44:%.*]] = load i8, ptr [[TMP28]], align 1
+; CHECK-NEXT: [[TMP45:%.*]] = load i8, ptr [[TMP29]], align 1
+; CHECK-NEXT: [[TMP46:%.*]] = load i8, ptr [[TMP30]], align 1
+; CHECK-NEXT: [[TMP47:%.*]] = load i8, ptr [[TMP31]], align 1
+; CHECK-NEXT: [[TMP48:%.*]] = insertelement <16 x i8> poison, i8 [[TMP32]], i32 0
+; CHECK-NEXT: [[TMP49:%.*]] = insertelement <16 x i8> [[TMP48]], i8 [[TMP33]], i32 1
+; CHECK-NEXT: [[TMP50:%.*]] = insertelement <16 x i8> [[TMP49]], i8 [[TMP34]], i32 2
+; CHECK-NEXT: [[TMP51:%.*]] = insertelement <16 x i8> [[TMP50]], i8 [[TMP35]], i32 3
+; CHECK-NEXT: [[TMP52:%.*]] = insertelement <16 x i8> [[TMP51]], i8 [[TMP36]], i32 4
+; CHECK-NEXT: [[TMP53:%.*]] = insertelement <16 x i8> [[TMP52]], i8 [[TMP37]], i32 5
+; CHECK-NEXT: [[TMP54:%.*]] = insertelement <16 x i8> [[TMP53]], i8 [[TMP38]], i32 6
+; CHECK-NEXT: [[TMP55:%.*]] = insertelement <16 x i8> [[TMP54]], i8 [[TMP39]], i32 7
+; CHECK-NEXT: [[TMP56:%.*]] = insertelement <16 x i8> [[TMP55]], i8 [[TMP40]], i32 8
+; CHECK-NEXT: [[TMP57:%.*]] = insertelement <16 x i8> [[TMP56]], i8 [[TMP41]], i32 9
+; CHECK-NEXT: [[TMP58:%.*]] = insertelement <16 x i8> [[TMP57]], i8 [[TMP42]], i32 10
+; CHECK-NEXT: [[TMP59:%.*]] = insertelement <16 x i8> [[TMP58]], i8 [[TMP43]], i32 11
+; CHECK-NEXT: [[TMP60:%.*]] = insertelement <16 x i8> [[TMP59]], i8 [[TMP44]], i32 12
+; CHECK-NEXT: [[TMP61:%.*]] = insertelement <16 x i8> [[TMP60]], i8 [[TMP45]], i32 13
+; CHECK-NEXT: [[TMP62:%.*]] = insertelement <16 x i8> [[TMP61]], i8 [[TMP46]], i32 14
+; CHECK-NEXT: [[TMP63:%.*]] = insertelement <16 x i8> [[TMP62]], i8 [[TMP47]], i32 15
+; CHECK-NEXT: [[TMP64:%.*]] = sext <16 x i8> [[TMP63]] to <16 x i32>
+; CHECK-NEXT: [[TMP65:%.*]] = mul <16 x i32> [[BROADCAST_SPLAT]], [[TMP64]]
+; CHECK-NEXT: [[TMP66]] = or <16 x i32> [[TMP65]], [[VEC_PHI]]
+; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16
+; CHECK-NEXT: [[TMP67:%.*]] = icmp eq i64 [[INDEX_NEXT]], 48
+; CHECK-NEXT: br i1 [[TMP67]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP22:![0-9]+]]
+; CHECK: [[MIDDLE_BLOCK]]:
+; CHECK-NEXT: [[TMP68:%.*]] = call i32 @llvm.vector.reduce.or.v16i32(<16 x i32> [[TMP66]])
+; CHECK-NEXT: br label %[[SCALAR_PH:.*]]
+; CHECK: [[SCALAR_PH]]:
+;
+entry:
+ br label %loop
+
+loop:
+ %iv = phi i64 [ %iv.next, %loop ], [ 0, %entry ]
+ %reduction = phi i32 [ %reduction.next, %loop ], [ 0, %entry ]
+ %gep = getelementptr [32 x i8], ptr %src, i64 %iv
+ %load = load i8, ptr %gep, align 1
+ %sext = sext i8 %load to i32
+ %mul = mul i32 %scale, %sext
+ %reduction.next = or i32 %mul, %reduction
+ %iv.next = add i64 %iv, 2
+ %cmp = icmp eq i64 %iv.next, 100
+ br i1 %cmp, label %exit, label %loop
+
+exit:
+ ret i32 %reduction.next
+}
+
attributes #0 = { "target-cpu"="neoverse-512tvb" }
!0 = !{!1, !2, i64 0}
diff --git a/llvm/test/Transforms/LoopVectorize/X86/uniform_load.ll b/llvm/test/Transforms/LoopVectorize/X86/uniform_load.ll
index d4004da..8081c0e 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/uniform_load.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/uniform_load.ll
@@ -64,39 +64,24 @@ exit:
define void @uniform_load_can_fold_users(ptr noalias %src, ptr %dst, i64 %start, double %d) {
; CHECK-LABEL: define void @uniform_load_can_fold_users(
; CHECK-SAME: ptr noalias [[SRC:%.*]], ptr [[DST:%.*]], i64 [[START:%.*]], double [[D:%.*]]) {
-; CHECK-NEXT: [[ENTRY:.*:]]
-; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[START]], 1
-; CHECK-NEXT: [[SMIN:%.*]] = call i64 @llvm.smin.i64(i64 [[START]], i64 0)
-; CHECK-NEXT: [[TMP1:%.*]] = sub i64 [[TMP0]], [[SMIN]]
-; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP1]], 2
-; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], [[SCALAR_PH:label %.*]], label %[[VECTOR_PH:.*]]
-; CHECK: [[VECTOR_PH]]:
-; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP1]], 2
-; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP1]], [[N_MOD_VF]]
-; CHECK-NEXT: [[TMP2:%.*]] = sub i64 [[START]], [[N_VEC]]
-; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
-; CHECK: [[VECTOR_BODY]]:
-; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
-; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 0
-; CHECK-NEXT: [[TMP4:%.*]] = add i64 [[INDEX]], 1
+; CHECK-NEXT: [[ENTRY:.*]]:
+; CHECK-NEXT: br label %[[LOOP:.*]]
+; CHECK: [[LOOP]]:
+; CHECK-NEXT: [[TMP4:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[IV_1_NEXT:%.*]], %[[LOOP]] ]
+; CHECK-NEXT: [[IV_2:%.*]] = phi i64 [ [[START]], %[[ENTRY]] ], [ [[IV_2_NEXT:%.*]], %[[LOOP]] ]
; CHECK-NEXT: [[TMP5:%.*]] = load double, ptr [[SRC]], align 8
-; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x double> poison, double [[TMP5]], i64 0
-; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x double> [[BROADCAST_SPLATINSERT]], <2 x double> poison, <2 x i32> zeroinitializer
-; CHECK-NEXT: [[TMP6:%.*]] = fmul <2 x double> [[BROADCAST_SPLAT]], splat (double 9.000000e+00)
-; CHECK-NEXT: [[TMP7:%.*]] = extractelement <2 x double> [[TMP6]], i32 0
+; CHECK-NEXT: [[TMP7:%.*]] = fmul double [[TMP5]], 9.000000e+00
; CHECK-NEXT: [[TMP8:%.*]] = fdiv double [[TMP7]], [[D]]
-; CHECK-NEXT: [[TMP9:%.*]] = sub i64 [[TMP3]], 1
; CHECK-NEXT: [[TMP10:%.*]] = sub i64 [[TMP4]], 1
-; CHECK-NEXT: [[TMP11:%.*]] = getelementptr double, ptr [[DST]], i64 [[TMP3]]
; CHECK-NEXT: [[TMP12:%.*]] = getelementptr double, ptr [[DST]], i64 [[TMP4]]
-; CHECK-NEXT: [[TMP13:%.*]] = getelementptr double, ptr [[TMP11]], i64 [[TMP9]]
; CHECK-NEXT: [[TMP14:%.*]] = getelementptr double, ptr [[TMP12]], i64 [[TMP10]]
-; CHECK-NEXT: store double [[TMP8]], ptr [[TMP13]], align 8
; CHECK-NEXT: store double [[TMP8]], ptr [[TMP14]], align 8
-; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
-; CHECK-NEXT: [[TMP15:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
-; CHECK-NEXT: br i1 [[TMP15]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]]
-; CHECK: [[MIDDLE_BLOCK]]:
+; CHECK-NEXT: [[IV_1_NEXT]] = add i64 [[TMP4]], 1
+; CHECK-NEXT: [[IV_2_NEXT]] = add i64 [[IV_2]], -1
+; CHECK-NEXT: [[EC:%.*]] = icmp sgt i64 [[IV_2]], 0
+; CHECK-NEXT: br i1 [[EC]], label %[[LOOP]], label %[[EXIT:.*]]
+; CHECK: [[EXIT]]:
+; CHECK-NEXT: ret void
;
entry:
br label %loop
diff --git a/llvm/test/Transforms/LoopVectorize/single-scalar-cast-minbw.ll b/llvm/test/Transforms/LoopVectorize/single-scalar-cast-minbw.ll
index 9a69982..70adac2 100644
--- a/llvm/test/Transforms/LoopVectorize/single-scalar-cast-minbw.ll
+++ b/llvm/test/Transforms/LoopVectorize/single-scalar-cast-minbw.ll
@@ -84,12 +84,8 @@ define void @single_scalar_cast_stored(ptr %src, ptr %dst, i32 %n) {
; CHECK: [[VECTOR_BODY]]:
; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
; CHECK-NEXT: [[TMP0:%.*]] = load i16, ptr [[SRC]], align 2, !alias.scope [[META4:![0-9]+]]
-; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i16> poison, i16 [[TMP0]], i64 0
-; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i16> [[BROADCAST_SPLATINSERT]], <4 x i16> poison, <4 x i32> zeroinitializer
-; CHECK-NEXT: [[TMP1:%.*]] = icmp eq <4 x i16> [[BROADCAST_SPLAT]], zeroinitializer
-; CHECK-NEXT: [[TMP2:%.*]] = and <4 x i16> [[BROADCAST_SPLAT]], splat (i16 15)
-; CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x i1> [[TMP1]], i32 0
-; CHECK-NEXT: [[TMP4:%.*]] = extractelement <4 x i16> [[TMP2]], i32 0
+; CHECK-NEXT: [[TMP3:%.*]] = icmp eq i16 [[TMP0]], 0
+; CHECK-NEXT: [[TMP4:%.*]] = and i16 [[TMP0]], 15
; CHECK-NEXT: [[TMP5:%.*]] = select i1 [[TMP3]], i16 0, i16 [[TMP4]]
; CHECK-NEXT: store i16 [[TMP5]], ptr [[DST]], align 2, !alias.scope [[META7:![0-9]+]], !noalias [[META4]]
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4
diff --git a/llvm/test/Transforms/SimplifyCFG/pr165301.ll b/llvm/test/Transforms/SimplifyCFG/pr165301.ll
new file mode 100644
index 0000000..4a539d7
--- /dev/null
+++ b/llvm/test/Transforms/SimplifyCFG/pr165301.ll
@@ -0,0 +1,26 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6
+; RUN: opt -S -passes="simplifycfg<switch-range-to-icmp>" < %s | FileCheck %s
+
+; Make sure there's no use after free when removing incoming values from PHI nodes
+
+define i32 @pr165301(i1 %cond) {
+; CHECK-LABEL: define i32 @pr165301(
+; CHECK-SAME: i1 [[COND:%.*]]) {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: br label %[[SWITCHBB:.*]]
+; CHECK: [[SWITCHBB]]:
+; CHECK-NEXT: br label %[[SWITCHBB]]
+;
+entry:
+ br label %switchbb
+
+switchbb:
+ switch i1 %cond, label %default [
+ i1 false, label %switchbb
+ i1 true, label %switchbb
+ ]
+
+default:
+ %phi.lcssa = phi i32 [ 0, %switchbb ]
+ ret i32 %phi.lcssa
+}
diff --git a/llvm/test/tools/llvm-dwarfdump/AArch64/DW_AT_APPLE_property.s b/llvm/test/tools/llvm-dwarfdump/AArch64/DW_AT_APPLE_property.s
new file mode 100644
index 0000000..6c38791
--- /dev/null
+++ b/llvm/test/tools/llvm-dwarfdump/AArch64/DW_AT_APPLE_property.s
@@ -0,0 +1,126 @@
+# Checks that we correctly display the DW_AT_APPLE_property_name of a
+# referenced DW_TAG_APPLE_property.
+#
+# RUN: llvm-mc -triple=aarch64--darwin -filetype=obj -o %t.o < %s
+# RUN: not llvm-dwarfdump %t.o 2> %t.errs.txt | FileCheck %s
+# RUN: FileCheck %s --check-prefix=ERRORS < %t.errs.txt
+
+# CHECK: 0x[[PROP_REF:[0-9a-f]+]]: DW_TAG_APPLE_property
+# CHECK-NEXT: DW_AT_APPLE_property_name ("autoSynthProp")
+#
+# CHECK: 0x[[NO_NAME_PROP:[0-9a-f]+]]: DW_TAG_APPLE_property
+# CHECK-NOT: DW_AT_APPLE_property_name
+#
+# CHECK: 0x[[INVALID_STRP:[0-9a-f]+]]: DW_TAG_APPLE_property
+# CHECK-NEXT: DW_AT_APPLE_property_name
+#
+# CHECK: DW_TAG_member
+# CHECK: DW_AT_APPLE_property (0x[[PROP_REF]] "autoSynthProp")
+# CHECK: DW_AT_APPLE_property (0x[[NO_NAME_PROP]] "")
+# CHECK: DW_AT_APPLE_property (0x{{.*}})
+# CHECK: DW_AT_APPLE_property (0x{{.*}})
+# CHECK: DW_AT_APPLE_property (0x[[INVALID_STRP]])
+
+# ERRORS: error: decoding DW_AT_APPLE_property_name: not referencing a DW_TAG_APPLE_property
+# ERRORS: error: decoding DW_AT_APPLE_property_name: invalid DIE
+# ERRORS: error: decoding DW_AT_APPLE_property_name: DW_FORM_strp offset 102 is beyond .debug_str bounds
+
+ .section __DWARF,__debug_abbrev,regular,debug
+Lsection_abbrev:
+ .byte 1 ; Abbreviation Code
+ .byte 17 ; DW_TAG_compile_unit
+ .byte 1 ; DW_CHILDREN_yes
+ .byte 114 ; DW_AT_str_offsets_base
+ .byte 23 ; DW_FORM_sec_offset
+ .byte 0 ; EOM(1)
+ .byte 0 ; EOM(2)
+ .byte 2 ; Abbreviation Code
+ .byte 19 ; DW_TAG_structure_type
+ .byte 1 ; DW_CHILDREN_yes
+ .byte 3 ; DW_AT_name
+ .byte 37 ; DW_FORM_strx1
+ .byte 0 ; EOM(1)
+ .byte 0 ; EOM(2)
+ .byte 3 ; Abbreviation Code
+ .ascii "\200\204\001" ; DW_TAG_APPLE_property
+ .byte 0 ; DW_CHILDREN_no
+ .ascii "\350\177" ; DW_AT_APPLE_property_name
+ .byte 37 ; DW_FORM_strx1
+ .byte 0 ; EOM(1)
+ .byte 0 ; EOM(2)
+ .byte 4 ; Abbreviation Code
+ .ascii "\200\204\001" ; DW_TAG_APPLE_property
+ .byte 0 ; DW_CHILDREN_no
+ .byte 0 ; EOM(1)
+ .byte 0 ; EOM(2)
+ .byte 5 ; Abbreviation Code
+ .ascii "\200\204\001" ; DW_TAG_APPLE_property
+ .byte 0 ; DW_CHILDREN_no
+ .ascii "\350\177" ; DW_AT_APPLE_property_name
+ .byte 14 ; DW_FORM_strp
+ .byte 0 ; EOM(1)
+ .byte 0 ; EOM(2)
+ .byte 6 ; Abbreviation Code
+ .byte 13 ; DW_TAG_member
+ .byte 0 ; DW_CHILDREN_no
+ .byte 3 ; DW_AT_name
+ .byte 37 ; DW_FORM_strx1
+ .ascii "\355\177" ; DW_AT_APPLE_property
+ .byte 19 ; DW_FORM_ref4
+ .ascii "\355\177" ; DW_AT_APPLE_property
+ .byte 19 ; DW_FORM_ref4
+ .ascii "\355\177" ; DW_AT_APPLE_property
+ .byte 19 ; DW_FORM_ref4
+ .ascii "\355\177" ; DW_AT_APPLE_property
+ .byte 19 ; DW_FORM_ref4
+ .ascii "\355\177" ; DW_AT_APPLE_property
+ .byte 19 ; DW_FORM_ref4
+ .byte 0 ; EOM(1)
+ .byte 0 ; EOM(2)
+ .byte 0 ; EOM(3)
+ .section __DWARF,__debug_info,regular,debug
+Lsection_info:
+Lcu_begin0:
+Lset0 = Ldebug_info_end0-Ldebug_info_start0 ; Length of Unit
+ .long Lset0
+Ldebug_info_start0:
+ .short 5 ; DWARF version number
+ .byte 1 ; DWARF Unit Type
+ .byte 8 ; Address Size (in bytes)
+Lset1 = Lsection_abbrev-Lsection_abbrev ; Offset Into Abbrev. Section
+ .long Lset1
+ .byte 1 ; Abbrev [1] DW_TAG_compile_unit
+Lset2 = Lstr_offsets_base0-Lsection_str_off ; DW_AT_str_offsets_base
+ .long Lset2
+ .byte 2 ; Abbrev [2] DW_TAG_structure_type
+ .byte 2 ; DW_AT_name
+ .byte 3 ; Abbrev [3] DW_TAG_APPLE_property
+ .byte 0 ; DW_AT_APPLE_property_name
+ .byte 4 ; Abbrev [4] DW_TAG_APPLE_property
+ .byte 5 ; Abbrev [5] DW_TAG_APPLE_property
+ .long 102 ; DW_AT_APPLE_property_name
+ .byte 6 ; Abbrev [6] DW_TAG_member
+ .byte 1 ; DW_AT_name
+ .long 19 ; DW_AT_APPLE_property
+ .long 21 ; DW_AT_APPLE_property
+ .long 17 ; DW_AT_APPLE_property
+ .long 0 ; DW_AT_APPLE_property
+ .long 22 ; DW_AT_APPLE_property
+ .byte 0 ; End Of Children Mark
+ .byte 0 ; End Of Children Mark
+Ldebug_info_end0:
+ .section __DWARF,__debug_str_offs,regular,debug
+Lsection_str_off:
+ .long 16 ; Length of String Offsets Set
+ .short 5
+ .short 0
+Lstr_offsets_base0:
+ .section __DWARF,__debug_str,regular,debug
+Linfo_string:
+ .asciz "autoSynthProp" ; string offset=0
+ .asciz "_var" ; string offset=14
+ .asciz "Foo" ; string offset=19
+ .section __DWARF,__debug_str_offs,regular,debug
+ .long 0
+ .long 14
+ .long 19
diff --git a/llvm/test/tools/llvm-readobj/ELF/bb-addr-map-feature-warning.test b/llvm/test/tools/llvm-readobj/ELF/bb-addr-map-feature-warning.test
new file mode 100644
index 0000000..24726c34
--- /dev/null
+++ b/llvm/test/tools/llvm-readobj/ELF/bb-addr-map-feature-warning.test
@@ -0,0 +1,37 @@
+## This test checks that we output a warning when the specified version is too old to support the given features.
+
+# RUN: yaml2obj %s -o %t
+# RUN: llvm-readobj --bb-addr-map %t 2>&1 | FileCheck -DFILE=%t %s
+
+--- !ELF
+FileHeader:
+ Class: ELFCLASS64
+ Data: ELFDATA2LSB
+ Type: ET_EXEC
+
+# CHECK: BBAddrMap [
+# CHECK-NEXT: warning: '[[FILE]]': unable to dump SHT_LLVM_BB_ADDR_MAP section with index 1: version should be >= 3 for SHT_LLVM_BB_ADDR_MAP when callsite offsets feature is enabled: version = 2 feature = 32
+Sections:
+ - Name: '.llvm_bb_addr_map (1)'
+ Type: SHT_LLVM_BB_ADDR_MAP
+ Entries:
+ - Version: 2
+ Feature: 0x20
+
+# CHECK: BBAddrMap [
+# CHECK-NEXT: warning: '[[FILE]]': unable to dump SHT_LLVM_BB_ADDR_MAP section with index 2: version should be >= 4 for SHT_LLVM_BB_ADDR_MAP when basic block hash feature is enabled: version = 3 feature = 64
+
+ - Name: '.llvm_bb_addr_map (2)'
+ Type: SHT_LLVM_BB_ADDR_MAP
+ Entries:
+ - Version: 3
+ Feature: 0x40
+
+# CHECK: BBAddrMap [
+# CHECK-NEXT: warning: '[[FILE]]': unable to dump SHT_LLVM_BB_ADDR_MAP section with index 3: version should be >= 5 for SHT_LLVM_BB_ADDR_MAP when post link cfg feature is enabled: version = 4 feature = 128
+
+ - Name: '.llvm_bb_addr_map (3)'
+ Type: SHT_LLVM_BB_ADDR_MAP
+ Entries:
+ - Version: 4
+ Feature: 0x80
diff --git a/llvm/test/tools/llvm-readobj/ELF/bb-addr-map-pgo-analysis-map.test b/llvm/test/tools/llvm-readobj/ELF/bb-addr-map-pgo-analysis-map.test
index 5faafd4..8e9d227 100644
--- a/llvm/test/tools/llvm-readobj/ELF/bb-addr-map-pgo-analysis-map.test
+++ b/llvm/test/tools/llvm-readobj/ELF/bb-addr-map-pgo-analysis-map.test
@@ -15,7 +15,7 @@
## Check that a malformed section can be handled.
# RUN: yaml2obj %s -DBITS=32 -DSIZE=24 -o %t2.o
-# RUN: llvm-readobj %t2.o --bb-addr-map 2>&1 | FileCheck --match-full-lines %s -DOFFSET=0x00000018 -DFILE=%t2.o --check-prefix=TRUNCATED
+# RUN: llvm-readobj %t2.o --bb-addr-map 2>&1 | FileCheck --match-full-lines %s -DOFFSET=0x00000015 -DFILE=%t2.o --check-prefix=TRUNCATED
## Check that missing features can be handled.
# RUN: yaml2obj %s -DBITS=32 -DFEATURE=0x2 -o %t3.o
@@ -59,17 +59,20 @@
# CHECK-NEXT: {
# RAW-NEXT: Frequency: 100
# PRETTY-NEXT: Frequency: 1.0
+# CHECK-NEXT: PostLink Frequency: 10
# CHECK-NEXT: Successors [
# CHECK-NEXT: {
# CHECK-NEXT: ID: 2
# RAW-NEXT: Probability: 0x80000000
# PRETTY-NEXT: Probability: 0x80000000 / 0x80000000 = 100.00%
+# CHECK-NEXT: PostLink Probability: 7
# CHECK-NEXT: }
# CHECK-NEXT: ]
# CHECK-NEXT: }
# CHECK-NEXT: {
# RAW-NEXT: Frequency: 100
# PRETTY-NEXT: Frequency: 1.0
+# CHECK-NEXT: PostLink Frequency: 0
# CHECK-NEXT: Successors [
# CHECK-NEXT: ]
# CHECK-NEXT: }
@@ -172,8 +175,8 @@ Sections:
ShSize: [[SIZE=<none>]]
Link: .text
Entries:
- - Version: 2
- Feature: 0x7
+ - Version: 5
+ Feature: 0x87
BBRanges:
- BaseAddress: [[ADDR=0x11111]]
BBEntries:
@@ -197,10 +200,12 @@ Sections:
PGOAnalyses:
- FuncEntryCount: 100
PGOBBEntries:
- - BBFreq: 100
+ - BBFreq: 100
+ PostLinkBBFreq: 10
Successors:
- - ID: 2
- BrProb: 0x80000000
+ - ID: 2
+ BrProb: 0x80000000
+ PostLinkBrFreq: 7
- BBFreq: 100
Successors: []
- FuncEntryCount: 8888
diff --git a/llvm/test/tools/obj2yaml/ELF/bb-addr-map-pgo-analysis-map.yaml b/llvm/test/tools/obj2yaml/ELF/bb-addr-map-pgo-analysis-map.yaml
index 299bf46..645507a 100644
--- a/llvm/test/tools/obj2yaml/ELF/bb-addr-map-pgo-analysis-map.yaml
+++ b/llvm/test/tools/obj2yaml/ELF/bb-addr-map-pgo-analysis-map.yaml
@@ -15,7 +15,7 @@
# VALID-NEXT: Type: SHT_LLVM_BB_ADDR_MAP
# VALID-NEXT: Entries:
# VALID-NEXT: - Version: 2
-# VALID-NEXT: Feature: 0x7
+# VALID-NEXT: Feature: 0x87
## The 'BaseAddress' field is omitted when it's zero.
# VALID-NEXT: BBRanges:
# VALID-NEXT: - BBEntries:
@@ -43,17 +43,23 @@
# VALID-NEXT: PGOAnalyses:
# VALID-NEXT: - FuncEntryCount: 100
# VALID-NEXT: PGOBBEntries:
-# VALID-NEXT: - BBFreq: 100
+# VALID-NEXT: - BBFreq: 100
+# VALID-NEXT: PostLinkBBFreq: 10
# VALID-NEXT: Successors:
-# VALID-NEXT: - ID: 2
-# VALID-NEXT: BrProb: 0x80000000
-# VALID-NEXT: - ID: 4
-# VALID-NEXT: BrProb: 0x80000000
-# VALID-NEXT: - BBFreq: 50
+# VALID-NEXT: - ID: 2
+# VALID-NEXT: BrProb: 0x80000000
+# VALID-NEXT: PostLinkBrFreq: 7
+# VALID-NEXT: - ID: 4
+# VALID-NEXT: BrProb: 0x80000000
+# VALID-NEXT: PostLinkBrFreq: 0
+# VALID-NEXT: - BBFreq: 50
+# VALID-NEXT: PostLinkBBFreq: 0
# VALID-NEXT: Successors:
-# VALID-NEXT: - ID: 4
-# VALID-NEXT: BrProb: 0xFFFFFFFF
-# VALID-NEXT: - BBFreq: 100
+# VALID-NEXT: - ID: 4
+# VALID-NEXT: BrProb: 0xFFFFFFFF
+# VALID-NEXT: PostLinkBrFreq: 0
+# VALID-NEXT: - BBFreq: 100
+# VALID-NEXT: PostLinkBBFreq: 3
# VALID-NEXT: Successors: []
# VALID-NEXT: PGOBBEntries:
# VALID-NEXT: - BBFreq: 20
@@ -69,7 +75,7 @@ Sections:
ShSize: [[SIZE=<none>]]
Entries:
- Version: 2
- Feature: 0x7
+ Feature: 0x87
BBRanges:
- BaseAddress: 0x0
BBEntries:
@@ -97,17 +103,20 @@ Sections:
PGOAnalyses:
- FuncEntryCount: 100
PGOBBEntries:
- - BBFreq: 100
+ - BBFreq: 100
+ PostLinkBBFreq: 10
Successors:
- - ID: 2
- BrProb: 0x80000000
- - ID: 4
- BrProb: 0x80000000
- - BBFreq: 50
+ - ID: 2
+ BrProb: 0x80000000
+ PostLinkBrFreq: 7
+ - ID: 4
+ BrProb: 0x80000000
+ - BBFreq: 50
Successors:
- - ID: 4
- BrProb: 0xFFFFFFFF
- - BBFreq: 100
+ - ID: 4
+ BrProb: 0xFFFFFFFF
+ - BBFreq: 100
+ PostLinkBBFreq: 3
Successors: []
- PGOBBEntries:
- BBFreq: 20
diff --git a/llvm/test/tools/yaml2obj/ELF/bb-addr-map-pgo-analysis-map.yaml b/llvm/test/tools/yaml2obj/ELF/bb-addr-map-pgo-analysis-map.yaml
index a4cb572..ac9c8d4 100644
--- a/llvm/test/tools/yaml2obj/ELF/bb-addr-map-pgo-analysis-map.yaml
+++ b/llvm/test/tools/yaml2obj/ELF/bb-addr-map-pgo-analysis-map.yaml
@@ -6,8 +6,9 @@
# Case 4: Specify Entries.
# CHECK: Name: .llvm_bb_addr_map (1)
# CHECK: SectionData (
-# CHECK-NEXT: 0000: 02072000 00000000 0000010B 010203E8
-# CHECK-NEXT: 0010: 07E80702 0CEEDDBB F70E0D91 A2C48801
+# CHECK-NEXT: 0000: 02872000 00000000 0000010B 010203E8
+# CHECK-NEXT: 0010: 07E80764 020CEEDD BBF70E28 0D91A2C4
+# CHECK-NEXT: 0020: 880100
# CHECK-NEXT: )
# Case 7: Not including a field which is enabled in feature doesn't emit value
@@ -26,12 +27,12 @@ Sections:
## Test the following cases:
## 1) We can produce an .llvm_bb_addr_map section from a description with
-## Entries and PGO Analysis data.
+## Entries and PGO Analysis and Post Link data.
- Name: '.llvm_bb_addr_map (1)'
Type: SHT_LLVM_BB_ADDR_MAP
Entries:
- Version: 2
- Feature: 0x7
+ Feature: 0x87
BBRanges:
- BaseAddress: 0x0000000000000020
BBEntries:
@@ -42,12 +43,14 @@ Sections:
PGOAnalyses:
- FuncEntryCount: 1000
PGOBBEntries:
- - BBFreq: 1000
+ - BBFreq: 1000
+ PostLinkBBFreq: 100
Successors:
- - ID: 12
- BrProb: 0xeeeeeeee
- - ID: 13
- BrProb: 0x11111111
+ - ID: 12
+ BrProb: 0xeeeeeeee
+ PostLinkBrFreq: 40
+ - ID: 13
+ BrProb: 0x11111111
## 2) According to feature we have FuncEntryCount but none is provided in yaml
- Name: '.llvm_bb_addr_map (2)'
@@ -66,7 +69,7 @@ Sections:
## Check that yaml2obj generates a warning when we use unsupported feature.
# RUN: yaml2obj --docnum=2 %s 2>&1 | FileCheck %s --check-prefix=INVALID-FEATURE
-# INVALID-FEATURE: warning: invalid encoding for BBAddrMap::Features: 0xf0
+# INVALID-FEATURE: warning: invalid encoding for BBAddrMap::Features: 0x100
--- !ELF
FileHeader:
@@ -79,4 +82,4 @@ Sections:
Entries:
- Version: 2
## Specify unsupported feature
- Feature: 0xF0
+ Feature: 0x100
diff --git a/llvm/test/tools/yaml2obj/ELF/bb-addr-map.yaml b/llvm/test/tools/yaml2obj/ELF/bb-addr-map.yaml
index 339e419..05d77d6 100644
--- a/llvm/test/tools/yaml2obj/ELF/bb-addr-map.yaml
+++ b/llvm/test/tools/yaml2obj/ELF/bb-addr-map.yaml
@@ -220,7 +220,7 @@ Sections:
## Check that yaml2obj generates a warning when we use unsupported versions.
# RUN: yaml2obj --docnum=3 %s 2>&1 | FileCheck %s --check-prefix=INVALID-VERSION
-# INVALID-VERSION: warning: unsupported SHT_LLVM_BB_ADDR_MAP version: 5; encoding using the most recent version
+# INVALID-VERSION: warning: unsupported SHT_LLVM_BB_ADDR_MAP version: 6; encoding using the most recent version
--- !ELF
FileHeader:
@@ -232,4 +232,4 @@ Sections:
Type: SHT_LLVM_BB_ADDR_MAP
Entries:
## Specify unsupported version
- - Version: 5
+ - Version: 6