aboutsummaryrefslogtreecommitdiff
path: root/llvm/test/CodeGen/LoongArch/lasx
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/test/CodeGen/LoongArch/lasx')
-rw-r--r--llvm/test/CodeGen/LoongArch/lasx/fp-max-min.ll160
-rw-r--r--llvm/test/CodeGen/LoongArch/lasx/fp-rounding.ll308
-rw-r--r--llvm/test/CodeGen/LoongArch/lasx/ir-instruction/avg.ll321
-rw-r--r--llvm/test/CodeGen/LoongArch/lasx/ir-instruction/avgfloor-ceil.ll379
-rw-r--r--llvm/test/CodeGen/LoongArch/lasx/ir-instruction/flog2.ll258
5 files changed, 1182 insertions, 244 deletions
diff --git a/llvm/test/CodeGen/LoongArch/lasx/fp-max-min.ll b/llvm/test/CodeGen/LoongArch/lasx/fp-max-min.ll
new file mode 100644
index 0000000..48ec98c
--- /dev/null
+++ b/llvm/test/CodeGen/LoongArch/lasx/fp-max-min.ll
@@ -0,0 +1,160 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6
+; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lasx < %s | FileCheck %s
+; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s
+
+define void @minnum_v8f32(ptr %res, ptr %x, ptr %y) nounwind {
+; CHECK-LABEL: minnum_v8f32:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: xvld $xr0, $a2, 0
+; CHECK-NEXT: xvld $xr1, $a1, 0
+; CHECK-NEXT: xvpickve.w $xr2, $xr0, 5
+; CHECK-NEXT: xvpickve.w $xr3, $xr1, 5
+; CHECK-NEXT: fmin.s $fa2, $fa3, $fa2
+; CHECK-NEXT: xvpickve.w $xr3, $xr0, 4
+; CHECK-NEXT: xvpickve.w $xr4, $xr1, 4
+; CHECK-NEXT: fmin.s $fa3, $fa4, $fa3
+; CHECK-NEXT: vextrins.w $vr3, $vr2, 16
+; CHECK-NEXT: xvpickve.w $xr2, $xr0, 6
+; CHECK-NEXT: xvpickve.w $xr4, $xr1, 6
+; CHECK-NEXT: fmin.s $fa2, $fa4, $fa2
+; CHECK-NEXT: vextrins.w $vr3, $vr2, 32
+; CHECK-NEXT: xvpickve.w $xr2, $xr0, 7
+; CHECK-NEXT: xvpickve.w $xr4, $xr1, 7
+; CHECK-NEXT: fmin.s $fa2, $fa4, $fa2
+; CHECK-NEXT: vextrins.w $vr3, $vr2, 48
+; CHECK-NEXT: xvpickve.w $xr2, $xr0, 1
+; CHECK-NEXT: xvpickve.w $xr4, $xr1, 1
+; CHECK-NEXT: fmin.s $fa2, $fa4, $fa2
+; CHECK-NEXT: xvpickve.w $xr4, $xr0, 0
+; CHECK-NEXT: xvpickve.w $xr5, $xr1, 0
+; CHECK-NEXT: fmin.s $fa4, $fa5, $fa4
+; CHECK-NEXT: vextrins.w $vr4, $vr2, 16
+; CHECK-NEXT: xvpickve.w $xr2, $xr0, 2
+; CHECK-NEXT: xvpickve.w $xr5, $xr1, 2
+; CHECK-NEXT: fmin.s $fa2, $fa5, $fa2
+; CHECK-NEXT: vextrins.w $vr4, $vr2, 32
+; CHECK-NEXT: xvpickve.w $xr0, $xr0, 3
+; CHECK-NEXT: xvpickve.w $xr1, $xr1, 3
+; CHECK-NEXT: fmin.s $fa0, $fa1, $fa0
+; CHECK-NEXT: vextrins.w $vr4, $vr0, 48
+; CHECK-NEXT: xvpermi.q $xr4, $xr3, 2
+; CHECK-NEXT: xvst $xr4, $a0, 0
+; CHECK-NEXT: ret
+entry:
+ %v0 = load <8 x float>, ptr %x
+ %v1 = load <8 x float>, ptr %y
+ %r = call <8 x float> @llvm.minnum.v8f32(<8 x float> %v0, <8 x float> %v1)
+ store <8 x float> %r, ptr %res
+ ret void
+}
+
+define void @minnum_v4f64(ptr %res, ptr %x, ptr %y) nounwind {
+; CHECK-LABEL: minnum_v4f64:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: xvld $xr0, $a2, 0
+; CHECK-NEXT: xvld $xr1, $a1, 0
+; CHECK-NEXT: xvpickve.d $xr2, $xr0, 3
+; CHECK-NEXT: xvpickve.d $xr3, $xr1, 3
+; CHECK-NEXT: fmin.d $fa2, $fa3, $fa2
+; CHECK-NEXT: xvpickve.d $xr3, $xr0, 2
+; CHECK-NEXT: xvpickve.d $xr4, $xr1, 2
+; CHECK-NEXT: fmin.d $fa3, $fa4, $fa3
+; CHECK-NEXT: vextrins.d $vr3, $vr2, 16
+; CHECK-NEXT: xvpickve.d $xr2, $xr0, 1
+; CHECK-NEXT: xvpickve.d $xr4, $xr1, 1
+; CHECK-NEXT: fmin.d $fa2, $fa4, $fa2
+; CHECK-NEXT: xvpickve.d $xr0, $xr0, 0
+; CHECK-NEXT: xvpickve.d $xr1, $xr1, 0
+; CHECK-NEXT: fmin.d $fa0, $fa1, $fa0
+; CHECK-NEXT: vextrins.d $vr0, $vr2, 16
+; CHECK-NEXT: xvpermi.q $xr0, $xr3, 2
+; CHECK-NEXT: xvst $xr0, $a0, 0
+; CHECK-NEXT: ret
+entry:
+ %v0 = load <4 x double>, ptr %x
+ %v1 = load <4 x double>, ptr %y
+ %r = call <4 x double> @llvm.minnum.v4f64(<4 x double> %v0, <4 x double> %v1)
+ store <4 x double> %r, ptr %res
+ ret void
+}
+
+define void @maxnum_v8f32(ptr %res, ptr %x, ptr %y) nounwind {
+; CHECK-LABEL: maxnum_v8f32:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: xvld $xr0, $a2, 0
+; CHECK-NEXT: xvld $xr1, $a1, 0
+; CHECK-NEXT: xvpickve.w $xr2, $xr0, 5
+; CHECK-NEXT: xvpickve.w $xr3, $xr1, 5
+; CHECK-NEXT: fmax.s $fa2, $fa3, $fa2
+; CHECK-NEXT: xvpickve.w $xr3, $xr0, 4
+; CHECK-NEXT: xvpickve.w $xr4, $xr1, 4
+; CHECK-NEXT: fmax.s $fa3, $fa4, $fa3
+; CHECK-NEXT: vextrins.w $vr3, $vr2, 16
+; CHECK-NEXT: xvpickve.w $xr2, $xr0, 6
+; CHECK-NEXT: xvpickve.w $xr4, $xr1, 6
+; CHECK-NEXT: fmax.s $fa2, $fa4, $fa2
+; CHECK-NEXT: vextrins.w $vr3, $vr2, 32
+; CHECK-NEXT: xvpickve.w $xr2, $xr0, 7
+; CHECK-NEXT: xvpickve.w $xr4, $xr1, 7
+; CHECK-NEXT: fmax.s $fa2, $fa4, $fa2
+; CHECK-NEXT: vextrins.w $vr3, $vr2, 48
+; CHECK-NEXT: xvpickve.w $xr2, $xr0, 1
+; CHECK-NEXT: xvpickve.w $xr4, $xr1, 1
+; CHECK-NEXT: fmax.s $fa2, $fa4, $fa2
+; CHECK-NEXT: xvpickve.w $xr4, $xr0, 0
+; CHECK-NEXT: xvpickve.w $xr5, $xr1, 0
+; CHECK-NEXT: fmax.s $fa4, $fa5, $fa4
+; CHECK-NEXT: vextrins.w $vr4, $vr2, 16
+; CHECK-NEXT: xvpickve.w $xr2, $xr0, 2
+; CHECK-NEXT: xvpickve.w $xr5, $xr1, 2
+; CHECK-NEXT: fmax.s $fa2, $fa5, $fa2
+; CHECK-NEXT: vextrins.w $vr4, $vr2, 32
+; CHECK-NEXT: xvpickve.w $xr0, $xr0, 3
+; CHECK-NEXT: xvpickve.w $xr1, $xr1, 3
+; CHECK-NEXT: fmax.s $fa0, $fa1, $fa0
+; CHECK-NEXT: vextrins.w $vr4, $vr0, 48
+; CHECK-NEXT: xvpermi.q $xr4, $xr3, 2
+; CHECK-NEXT: xvst $xr4, $a0, 0
+; CHECK-NEXT: ret
+entry:
+ %v0 = load <8 x float>, ptr %x
+ %v1 = load <8 x float>, ptr %y
+ %r = call <8 x float> @llvm.maxnum.v8f32(<8 x float> %v0, <8 x float> %v1)
+ store <8 x float> %r, ptr %res
+ ret void
+}
+
+define void @maxnum_v4f64(ptr %res, ptr %x, ptr %y) nounwind {
+; CHECK-LABEL: maxnum_v4f64:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: xvld $xr0, $a2, 0
+; CHECK-NEXT: xvld $xr1, $a1, 0
+; CHECK-NEXT: xvpickve.d $xr2, $xr0, 3
+; CHECK-NEXT: xvpickve.d $xr3, $xr1, 3
+; CHECK-NEXT: fmax.d $fa2, $fa3, $fa2
+; CHECK-NEXT: xvpickve.d $xr3, $xr0, 2
+; CHECK-NEXT: xvpickve.d $xr4, $xr1, 2
+; CHECK-NEXT: fmax.d $fa3, $fa4, $fa3
+; CHECK-NEXT: vextrins.d $vr3, $vr2, 16
+; CHECK-NEXT: xvpickve.d $xr2, $xr0, 1
+; CHECK-NEXT: xvpickve.d $xr4, $xr1, 1
+; CHECK-NEXT: fmax.d $fa2, $fa4, $fa2
+; CHECK-NEXT: xvpickve.d $xr0, $xr0, 0
+; CHECK-NEXT: xvpickve.d $xr1, $xr1, 0
+; CHECK-NEXT: fmax.d $fa0, $fa1, $fa0
+; CHECK-NEXT: vextrins.d $vr0, $vr2, 16
+; CHECK-NEXT: xvpermi.q $xr0, $xr3, 2
+; CHECK-NEXT: xvst $xr0, $a0, 0
+; CHECK-NEXT: ret
+entry:
+ %v0 = load <4 x double>, ptr %x
+ %v1 = load <4 x double>, ptr %y
+ %r = call <4 x double> @llvm.maxnum.v4f64(<4 x double> %v0, <4 x double> %v1)
+ store <4 x double> %r, ptr %res
+ ret void
+}
+
+declare <8 x float> @llvm.minnum.v8f32(<8 x float>, <8 x float>)
+declare <4 x double> @llvm.minnum.v4f64(<4 x double>, <4 x double>)
+declare <8 x float> @llvm.maxnum.v8f32(<8 x float>, <8 x float>)
+declare <4 x double> @llvm.maxnum.v4f64(<4 x double>, <4 x double>)
diff --git a/llvm/test/CodeGen/LoongArch/lasx/fp-rounding.ll b/llvm/test/CodeGen/LoongArch/lasx/fp-rounding.ll
new file mode 100644
index 0000000..79407c3
--- /dev/null
+++ b/llvm/test/CodeGen/LoongArch/lasx/fp-rounding.ll
@@ -0,0 +1,308 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6
+; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lasx < %s | FileCheck %s
+; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s
+
+;; ceilf
+define void @ceil_v8f32(ptr %res, ptr %a0) nounwind {
+; CHECK-LABEL: ceil_v8f32:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: xvld $xr0, $a1, 0
+; CHECK-NEXT: xvpickve.w $xr1, $xr0, 5
+; CHECK-NEXT: vreplvei.w $vr1, $vr1, 0
+; CHECK-NEXT: vfrintrp.s $vr1, $vr1
+; CHECK-NEXT: xvpickve.w $xr2, $xr0, 4
+; CHECK-NEXT: vreplvei.w $vr2, $vr2, 0
+; CHECK-NEXT: vfrintrp.s $vr2, $vr2
+; CHECK-NEXT: vextrins.w $vr2, $vr1, 16
+; CHECK-NEXT: xvpickve.w $xr1, $xr0, 6
+; CHECK-NEXT: vreplvei.w $vr1, $vr1, 0
+; CHECK-NEXT: vfrintrp.s $vr1, $vr1
+; CHECK-NEXT: vextrins.w $vr2, $vr1, 32
+; CHECK-NEXT: xvpickve.w $xr1, $xr0, 7
+; CHECK-NEXT: vreplvei.w $vr1, $vr1, 0
+; CHECK-NEXT: vfrintrp.s $vr1, $vr1
+; CHECK-NEXT: vextrins.w $vr2, $vr1, 48
+; CHECK-NEXT: xvpickve.w $xr1, $xr0, 1
+; CHECK-NEXT: vreplvei.w $vr1, $vr1, 0
+; CHECK-NEXT: vfrintrp.s $vr1, $vr1
+; CHECK-NEXT: xvpickve.w $xr3, $xr0, 0
+; CHECK-NEXT: vreplvei.w $vr3, $vr3, 0
+; CHECK-NEXT: vfrintrp.s $vr3, $vr3
+; CHECK-NEXT: vextrins.w $vr3, $vr1, 16
+; CHECK-NEXT: xvpickve.w $xr1, $xr0, 2
+; CHECK-NEXT: vreplvei.w $vr1, $vr1, 0
+; CHECK-NEXT: vfrintrp.s $vr1, $vr1
+; CHECK-NEXT: vextrins.w $vr3, $vr1, 32
+; CHECK-NEXT: xvpickve.w $xr0, $xr0, 3
+; CHECK-NEXT: vreplvei.w $vr0, $vr0, 0
+; CHECK-NEXT: vfrintrp.s $vr0, $vr0
+; CHECK-NEXT: vextrins.w $vr3, $vr0, 48
+; CHECK-NEXT: xvpermi.q $xr3, $xr2, 2
+; CHECK-NEXT: xvst $xr3, $a0, 0
+; CHECK-NEXT: ret
+entry:
+ %v0 = load <8 x float>, ptr %a0
+ %r = call <8 x float> @llvm.ceil.v8f32(<8 x float> %v0)
+ store <8 x float> %r, ptr %res
+ ret void
+}
+
+;; ceil
+define void @ceil_v4f64(ptr %res, ptr %a0) nounwind {
+; CHECK-LABEL: ceil_v4f64:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: xvld $xr0, $a1, 0
+; CHECK-NEXT: xvpickve.d $xr1, $xr0, 3
+; CHECK-NEXT: vreplvei.d $vr1, $vr1, 0
+; CHECK-NEXT: vfrintrp.d $vr1, $vr1
+; CHECK-NEXT: xvpickve.d $xr2, $xr0, 2
+; CHECK-NEXT: vreplvei.d $vr2, $vr2, 0
+; CHECK-NEXT: vfrintrp.d $vr2, $vr2
+; CHECK-NEXT: vextrins.d $vr2, $vr1, 16
+; CHECK-NEXT: xvpickve.d $xr1, $xr0, 1
+; CHECK-NEXT: vreplvei.d $vr1, $vr1, 0
+; CHECK-NEXT: vfrintrp.d $vr1, $vr1
+; CHECK-NEXT: xvpickve.d $xr0, $xr0, 0
+; CHECK-NEXT: vreplvei.d $vr0, $vr0, 0
+; CHECK-NEXT: vfrintrp.d $vr0, $vr0
+; CHECK-NEXT: vextrins.d $vr0, $vr1, 16
+; CHECK-NEXT: xvpermi.q $xr0, $xr2, 2
+; CHECK-NEXT: xvst $xr0, $a0, 0
+; CHECK-NEXT: ret
+entry:
+ %v0 = load <4 x double>, ptr %a0
+ %r = call <4 x double> @llvm.ceil.v4f64(<4 x double> %v0)
+ store <4 x double> %r, ptr %res
+ ret void
+}
+
+;; floorf
+define void @floor_v8f32(ptr %res, ptr %a0) nounwind {
+; CHECK-LABEL: floor_v8f32:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: xvld $xr0, $a1, 0
+; CHECK-NEXT: xvpickve.w $xr1, $xr0, 5
+; CHECK-NEXT: vreplvei.w $vr1, $vr1, 0
+; CHECK-NEXT: vfrintrm.s $vr1, $vr1
+; CHECK-NEXT: xvpickve.w $xr2, $xr0, 4
+; CHECK-NEXT: vreplvei.w $vr2, $vr2, 0
+; CHECK-NEXT: vfrintrm.s $vr2, $vr2
+; CHECK-NEXT: vextrins.w $vr2, $vr1, 16
+; CHECK-NEXT: xvpickve.w $xr1, $xr0, 6
+; CHECK-NEXT: vreplvei.w $vr1, $vr1, 0
+; CHECK-NEXT: vfrintrm.s $vr1, $vr1
+; CHECK-NEXT: vextrins.w $vr2, $vr1, 32
+; CHECK-NEXT: xvpickve.w $xr1, $xr0, 7
+; CHECK-NEXT: vreplvei.w $vr1, $vr1, 0
+; CHECK-NEXT: vfrintrm.s $vr1, $vr1
+; CHECK-NEXT: vextrins.w $vr2, $vr1, 48
+; CHECK-NEXT: xvpickve.w $xr1, $xr0, 1
+; CHECK-NEXT: vreplvei.w $vr1, $vr1, 0
+; CHECK-NEXT: vfrintrm.s $vr1, $vr1
+; CHECK-NEXT: xvpickve.w $xr3, $xr0, 0
+; CHECK-NEXT: vreplvei.w $vr3, $vr3, 0
+; CHECK-NEXT: vfrintrm.s $vr3, $vr3
+; CHECK-NEXT: vextrins.w $vr3, $vr1, 16
+; CHECK-NEXT: xvpickve.w $xr1, $xr0, 2
+; CHECK-NEXT: vreplvei.w $vr1, $vr1, 0
+; CHECK-NEXT: vfrintrm.s $vr1, $vr1
+; CHECK-NEXT: vextrins.w $vr3, $vr1, 32
+; CHECK-NEXT: xvpickve.w $xr0, $xr0, 3
+; CHECK-NEXT: vreplvei.w $vr0, $vr0, 0
+; CHECK-NEXT: vfrintrm.s $vr0, $vr0
+; CHECK-NEXT: vextrins.w $vr3, $vr0, 48
+; CHECK-NEXT: xvpermi.q $xr3, $xr2, 2
+; CHECK-NEXT: xvst $xr3, $a0, 0
+; CHECK-NEXT: ret
+entry:
+ %v0 = load <8 x float>, ptr %a0
+ %r = call <8 x float> @llvm.floor.v8f32(<8 x float> %v0)
+ store <8 x float> %r, ptr %res
+ ret void
+}
+
+;; floor
+define void @floor_v4f64(ptr %res, ptr %a0) nounwind {
+; CHECK-LABEL: floor_v4f64:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: xvld $xr0, $a1, 0
+; CHECK-NEXT: xvpickve.d $xr1, $xr0, 3
+; CHECK-NEXT: vreplvei.d $vr1, $vr1, 0
+; CHECK-NEXT: vfrintrm.d $vr1, $vr1
+; CHECK-NEXT: xvpickve.d $xr2, $xr0, 2
+; CHECK-NEXT: vreplvei.d $vr2, $vr2, 0
+; CHECK-NEXT: vfrintrm.d $vr2, $vr2
+; CHECK-NEXT: vextrins.d $vr2, $vr1, 16
+; CHECK-NEXT: xvpickve.d $xr1, $xr0, 1
+; CHECK-NEXT: vreplvei.d $vr1, $vr1, 0
+; CHECK-NEXT: vfrintrm.d $vr1, $vr1
+; CHECK-NEXT: xvpickve.d $xr0, $xr0, 0
+; CHECK-NEXT: vreplvei.d $vr0, $vr0, 0
+; CHECK-NEXT: vfrintrm.d $vr0, $vr0
+; CHECK-NEXT: vextrins.d $vr0, $vr1, 16
+; CHECK-NEXT: xvpermi.q $xr0, $xr2, 2
+; CHECK-NEXT: xvst $xr0, $a0, 0
+; CHECK-NEXT: ret
+entry:
+ %v0 = load <4 x double>, ptr %a0
+ %r = call <4 x double> @llvm.floor.v4f64(<4 x double> %v0)
+ store <4 x double> %r, ptr %res
+ ret void
+}
+
+;; truncf
+define void @trunc_v8f32(ptr %res, ptr %a0) nounwind {
+; CHECK-LABEL: trunc_v8f32:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: xvld $xr0, $a1, 0
+; CHECK-NEXT: xvpickve.w $xr1, $xr0, 5
+; CHECK-NEXT: vreplvei.w $vr1, $vr1, 0
+; CHECK-NEXT: vfrintrz.s $vr1, $vr1
+; CHECK-NEXT: xvpickve.w $xr2, $xr0, 4
+; CHECK-NEXT: vreplvei.w $vr2, $vr2, 0
+; CHECK-NEXT: vfrintrz.s $vr2, $vr2
+; CHECK-NEXT: vextrins.w $vr2, $vr1, 16
+; CHECK-NEXT: xvpickve.w $xr1, $xr0, 6
+; CHECK-NEXT: vreplvei.w $vr1, $vr1, 0
+; CHECK-NEXT: vfrintrz.s $vr1, $vr1
+; CHECK-NEXT: vextrins.w $vr2, $vr1, 32
+; CHECK-NEXT: xvpickve.w $xr1, $xr0, 7
+; CHECK-NEXT: vreplvei.w $vr1, $vr1, 0
+; CHECK-NEXT: vfrintrz.s $vr1, $vr1
+; CHECK-NEXT: vextrins.w $vr2, $vr1, 48
+; CHECK-NEXT: xvpickve.w $xr1, $xr0, 1
+; CHECK-NEXT: vreplvei.w $vr1, $vr1, 0
+; CHECK-NEXT: vfrintrz.s $vr1, $vr1
+; CHECK-NEXT: xvpickve.w $xr3, $xr0, 0
+; CHECK-NEXT: vreplvei.w $vr3, $vr3, 0
+; CHECK-NEXT: vfrintrz.s $vr3, $vr3
+; CHECK-NEXT: vextrins.w $vr3, $vr1, 16
+; CHECK-NEXT: xvpickve.w $xr1, $xr0, 2
+; CHECK-NEXT: vreplvei.w $vr1, $vr1, 0
+; CHECK-NEXT: vfrintrz.s $vr1, $vr1
+; CHECK-NEXT: vextrins.w $vr3, $vr1, 32
+; CHECK-NEXT: xvpickve.w $xr0, $xr0, 3
+; CHECK-NEXT: vreplvei.w $vr0, $vr0, 0
+; CHECK-NEXT: vfrintrz.s $vr0, $vr0
+; CHECK-NEXT: vextrins.w $vr3, $vr0, 48
+; CHECK-NEXT: xvpermi.q $xr3, $xr2, 2
+; CHECK-NEXT: xvst $xr3, $a0, 0
+; CHECK-NEXT: ret
+entry:
+ %v0 = load <8 x float>, ptr %a0
+ %r = call <8 x float> @llvm.trunc.v8f32(<8 x float> %v0)
+ store <8 x float> %r, ptr %res
+ ret void
+}
+
+;; trunc
+define void @trunc_v4f64(ptr %res, ptr %a0) nounwind {
+; CHECK-LABEL: trunc_v4f64:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: xvld $xr0, $a1, 0
+; CHECK-NEXT: xvpickve.d $xr1, $xr0, 3
+; CHECK-NEXT: vreplvei.d $vr1, $vr1, 0
+; CHECK-NEXT: vfrintrz.d $vr1, $vr1
+; CHECK-NEXT: xvpickve.d $xr2, $xr0, 2
+; CHECK-NEXT: vreplvei.d $vr2, $vr2, 0
+; CHECK-NEXT: vfrintrz.d $vr2, $vr2
+; CHECK-NEXT: vextrins.d $vr2, $vr1, 16
+; CHECK-NEXT: xvpickve.d $xr1, $xr0, 1
+; CHECK-NEXT: vreplvei.d $vr1, $vr1, 0
+; CHECK-NEXT: vfrintrz.d $vr1, $vr1
+; CHECK-NEXT: xvpickve.d $xr0, $xr0, 0
+; CHECK-NEXT: vreplvei.d $vr0, $vr0, 0
+; CHECK-NEXT: vfrintrz.d $vr0, $vr0
+; CHECK-NEXT: vextrins.d $vr0, $vr1, 16
+; CHECK-NEXT: xvpermi.q $xr0, $xr2, 2
+; CHECK-NEXT: xvst $xr0, $a0, 0
+; CHECK-NEXT: ret
+entry:
+ %v0 = load <4 x double>, ptr %a0
+ %r = call <4 x double> @llvm.trunc.v4f64(<4 x double> %v0)
+ store <4 x double> %r, ptr %res
+ ret void
+}
+
+;; roundevenf
+define void @roundeven_v8f32(ptr %res, ptr %a0) nounwind {
+; CHECK-LABEL: roundeven_v8f32:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: xvld $xr0, $a1, 0
+; CHECK-NEXT: xvpickve.w $xr1, $xr0, 5
+; CHECK-NEXT: vreplvei.w $vr1, $vr1, 0
+; CHECK-NEXT: vfrintrne.s $vr1, $vr1
+; CHECK-NEXT: xvpickve.w $xr2, $xr0, 4
+; CHECK-NEXT: vreplvei.w $vr2, $vr2, 0
+; CHECK-NEXT: vfrintrne.s $vr2, $vr2
+; CHECK-NEXT: vextrins.w $vr2, $vr1, 16
+; CHECK-NEXT: xvpickve.w $xr1, $xr0, 6
+; CHECK-NEXT: vreplvei.w $vr1, $vr1, 0
+; CHECK-NEXT: vfrintrne.s $vr1, $vr1
+; CHECK-NEXT: vextrins.w $vr2, $vr1, 32
+; CHECK-NEXT: xvpickve.w $xr1, $xr0, 7
+; CHECK-NEXT: vreplvei.w $vr1, $vr1, 0
+; CHECK-NEXT: vfrintrne.s $vr1, $vr1
+; CHECK-NEXT: vextrins.w $vr2, $vr1, 48
+; CHECK-NEXT: xvpickve.w $xr1, $xr0, 1
+; CHECK-NEXT: vreplvei.w $vr1, $vr1, 0
+; CHECK-NEXT: vfrintrne.s $vr1, $vr1
+; CHECK-NEXT: xvpickve.w $xr3, $xr0, 0
+; CHECK-NEXT: vreplvei.w $vr3, $vr3, 0
+; CHECK-NEXT: vfrintrne.s $vr3, $vr3
+; CHECK-NEXT: vextrins.w $vr3, $vr1, 16
+; CHECK-NEXT: xvpickve.w $xr1, $xr0, 2
+; CHECK-NEXT: vreplvei.w $vr1, $vr1, 0
+; CHECK-NEXT: vfrintrne.s $vr1, $vr1
+; CHECK-NEXT: vextrins.w $vr3, $vr1, 32
+; CHECK-NEXT: xvpickve.w $xr0, $xr0, 3
+; CHECK-NEXT: vreplvei.w $vr0, $vr0, 0
+; CHECK-NEXT: vfrintrne.s $vr0, $vr0
+; CHECK-NEXT: vextrins.w $vr3, $vr0, 48
+; CHECK-NEXT: xvpermi.q $xr3, $xr2, 2
+; CHECK-NEXT: xvst $xr3, $a0, 0
+; CHECK-NEXT: ret
+entry:
+ %v0 = load <8 x float>, ptr %a0
+ %r = call <8 x float> @llvm.roundeven.v8f32(<8 x float> %v0)
+ store <8 x float> %r, ptr %res
+ ret void
+}
+
+;; roundeven
+define void @roundeven_v4f64(ptr %res, ptr %a0) nounwind {
+; CHECK-LABEL: roundeven_v4f64:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: xvld $xr0, $a1, 0
+; CHECK-NEXT: xvpickve.d $xr1, $xr0, 3
+; CHECK-NEXT: vreplvei.d $vr1, $vr1, 0
+; CHECK-NEXT: vfrintrne.d $vr1, $vr1
+; CHECK-NEXT: xvpickve.d $xr2, $xr0, 2
+; CHECK-NEXT: vreplvei.d $vr2, $vr2, 0
+; CHECK-NEXT: vfrintrne.d $vr2, $vr2
+; CHECK-NEXT: vextrins.d $vr2, $vr1, 16
+; CHECK-NEXT: xvpickve.d $xr1, $xr0, 1
+; CHECK-NEXT: vreplvei.d $vr1, $vr1, 0
+; CHECK-NEXT: vfrintrne.d $vr1, $vr1
+; CHECK-NEXT: xvpickve.d $xr0, $xr0, 0
+; CHECK-NEXT: vreplvei.d $vr0, $vr0, 0
+; CHECK-NEXT: vfrintrne.d $vr0, $vr0
+; CHECK-NEXT: vextrins.d $vr0, $vr1, 16
+; CHECK-NEXT: xvpermi.q $xr0, $xr2, 2
+; CHECK-NEXT: xvst $xr0, $a0, 0
+; CHECK-NEXT: ret
+entry:
+ %v0 = load <4 x double>, ptr %a0
+ %r = call <4 x double> @llvm.roundeven.v4f64(<4 x double> %v0)
+ store <4 x double> %r, ptr %res
+ ret void
+}
+
+declare <8 x float> @llvm.ceil.v8f32(<8 x float>)
+declare <4 x double> @llvm.ceil.v4f64(<4 x double>)
+declare <8 x float> @llvm.floor.v8f32(<8 x float>)
+declare <4 x double> @llvm.floor.v4f64(<4 x double>)
+declare <8 x float> @llvm.trunc.v8f32(<8 x float>)
+declare <4 x double> @llvm.trunc.v4f64(<4 x double>)
+declare <8 x float> @llvm.roundeven.v8f32(<8 x float>)
+declare <4 x double> @llvm.roundeven.v4f64(<4 x double>)
diff --git a/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/avg.ll b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/avg.ll
new file mode 100644
index 0000000..5c5c199
--- /dev/null
+++ b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/avg.ll
@@ -0,0 +1,321 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6
+; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lasx < %s | FileCheck %s --check-prefixes=CHECK,LA32
+; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s --check-prefixes=CHECK,LA64
+
+define void @xvavg_b(ptr %res, ptr %a, ptr %b) nounwind {
+; CHECK-LABEL: xvavg_b:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: xvld $xr0, $a1, 0
+; CHECK-NEXT: xvld $xr1, $a2, 0
+; CHECK-NEXT: xvavg.b $xr0, $xr0, $xr1
+; CHECK-NEXT: xvst $xr0, $a0, 0
+; CHECK-NEXT: ret
+entry:
+ %va = load <32 x i8>, ptr %a
+ %vb = load <32 x i8>, ptr %b
+ %add = add <32 x i8> %va, %vb
+ %shr = ashr <32 x i8> %add, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
+ store <32 x i8> %shr, ptr %res
+ ret void
+}
+
+define void @xvavg_h(ptr %res, ptr %a, ptr %b) nounwind {
+; CHECK-LABEL: xvavg_h:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: xvld $xr0, $a1, 0
+; CHECK-NEXT: xvld $xr1, $a2, 0
+; CHECK-NEXT: xvavg.h $xr0, $xr0, $xr1
+; CHECK-NEXT: xvst $xr0, $a0, 0
+; CHECK-NEXT: ret
+entry:
+ %va = load <16 x i16>, ptr %a
+ %vb = load <16 x i16>, ptr %b
+ %add = add <16 x i16> %va, %vb
+ %shr = ashr <16 x i16> %add, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
+ store <16 x i16> %shr, ptr %res
+ ret void
+}
+
+define void @xvavg_w(ptr %res, ptr %a, ptr %b) nounwind {
+; CHECK-LABEL: xvavg_w:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: xvld $xr0, $a1, 0
+; CHECK-NEXT: xvld $xr1, $a2, 0
+; CHECK-NEXT: xvavg.w $xr0, $xr0, $xr1
+; CHECK-NEXT: xvst $xr0, $a0, 0
+; CHECK-NEXT: ret
+entry:
+ %va = load <8 x i32>, ptr %a
+ %vb = load <8 x i32>, ptr %b
+ %add = add <8 x i32> %va, %vb
+ %shr = ashr <8 x i32> %add, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
+ store <8 x i32> %shr, ptr %res
+ ret void
+}
+
+define void @xvavg_d(ptr %res, ptr %a, ptr %b) nounwind {
+; LA32-LABEL: xvavg_d:
+; LA32: # %bb.0: # %entry
+; LA32-NEXT: xvld $xr0, $a1, 0
+; LA32-NEXT: xvld $xr1, $a2, 0
+; LA32-NEXT: xvadd.d $xr0, $xr0, $xr1
+; LA32-NEXT: xvsrai.d $xr0, $xr0, 1
+; LA32-NEXT: xvst $xr0, $a0, 0
+; LA32-NEXT: ret
+;
+; LA64-LABEL: xvavg_d:
+; LA64: # %bb.0: # %entry
+; LA64-NEXT: xvld $xr0, $a1, 0
+; LA64-NEXT: xvld $xr1, $a2, 0
+; LA64-NEXT: xvavg.d $xr0, $xr0, $xr1
+; LA64-NEXT: xvst $xr0, $a0, 0
+; LA64-NEXT: ret
+entry:
+ %va = load <4 x i64>, ptr %a
+ %vb = load <4 x i64>, ptr %b
+ %add = add <4 x i64> %va, %vb
+ %shr = ashr <4 x i64> %add, <i64 1, i64 1, i64 1, i64 1>
+ store <4 x i64> %shr, ptr %res
+ ret void
+}
+
+define void @xvavg_bu(ptr %res, ptr %a, ptr %b) nounwind {
+; CHECK-LABEL: xvavg_bu:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: xvld $xr0, $a1, 0
+; CHECK-NEXT: xvld $xr1, $a2, 0
+; CHECK-NEXT: xvavg.bu $xr0, $xr0, $xr1
+; CHECK-NEXT: xvst $xr0, $a0, 0
+; CHECK-NEXT: ret
+entry:
+ %va = load <32 x i8>, ptr %a
+ %vb = load <32 x i8>, ptr %b
+ %add = add <32 x i8> %va, %vb
+ %shr = lshr <32 x i8> %add, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
+ store <32 x i8> %shr, ptr %res
+ ret void
+}
+
+define void @xvavg_hu(ptr %res, ptr %a, ptr %b) nounwind {
+; CHECK-LABEL: xvavg_hu:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: xvld $xr0, $a1, 0
+; CHECK-NEXT: xvld $xr1, $a2, 0
+; CHECK-NEXT: xvavg.hu $xr0, $xr0, $xr1
+; CHECK-NEXT: xvst $xr0, $a0, 0
+; CHECK-NEXT: ret
+entry:
+ %va = load <16 x i16>, ptr %a
+ %vb = load <16 x i16>, ptr %b
+ %add = add <16 x i16> %va, %vb
+ %shr = lshr <16 x i16> %add, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
+ store <16 x i16> %shr, ptr %res
+ ret void
+}
+
+define void @xvavg_wu(ptr %res, ptr %a, ptr %b) nounwind {
+; CHECK-LABEL: xvavg_wu:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: xvld $xr0, $a1, 0
+; CHECK-NEXT: xvld $xr1, $a2, 0
+; CHECK-NEXT: xvavg.wu $xr0, $xr0, $xr1
+; CHECK-NEXT: xvst $xr0, $a0, 0
+; CHECK-NEXT: ret
+entry:
+ %va = load <8 x i32>, ptr %a
+ %vb = load <8 x i32>, ptr %b
+ %add = add <8 x i32> %va, %vb
+ %shr = lshr <8 x i32> %add, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
+ store <8 x i32> %shr, ptr %res
+ ret void
+}
+
+define void @xvavg_du(ptr %res, ptr %a, ptr %b) nounwind {
+; LA32-LABEL: xvavg_du:
+; LA32: # %bb.0: # %entry
+; LA32-NEXT: xvld $xr0, $a1, 0
+; LA32-NEXT: xvld $xr1, $a2, 0
+; LA32-NEXT: xvadd.d $xr0, $xr0, $xr1
+; LA32-NEXT: xvsrli.d $xr0, $xr0, 1
+; LA32-NEXT: xvst $xr0, $a0, 0
+; LA32-NEXT: ret
+;
+; LA64-LABEL: xvavg_du:
+; LA64: # %bb.0: # %entry
+; LA64-NEXT: xvld $xr0, $a1, 0
+; LA64-NEXT: xvld $xr1, $a2, 0
+; LA64-NEXT: xvavg.du $xr0, $xr0, $xr1
+; LA64-NEXT: xvst $xr0, $a0, 0
+; LA64-NEXT: ret
+entry:
+ %va = load <4 x i64>, ptr %a
+ %vb = load <4 x i64>, ptr %b
+ %add = add <4 x i64> %va, %vb
+ %shr = lshr <4 x i64> %add, <i64 1, i64 1, i64 1, i64 1>
+ store <4 x i64> %shr, ptr %res
+ ret void
+}
+
+define void @xvavgr_b(ptr %res, ptr %a, ptr %b) nounwind {
+; CHECK-LABEL: xvavgr_b:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: xvld $xr0, $a1, 0
+; CHECK-NEXT: xvld $xr1, $a2, 0
+; CHECK-NEXT: xvavgr.b $xr0, $xr0, $xr1
+; CHECK-NEXT: xvst $xr0, $a0, 0
+; CHECK-NEXT: ret
+entry:
+ %va = load <32 x i8>, ptr %a
+ %vb = load <32 x i8>, ptr %b
+ %add = add <32 x i8> %va, %vb
+ %add1 = add <32 x i8> %add, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
+ %shr = ashr <32 x i8> %add1, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
+ store <32 x i8> %shr, ptr %res
+ ret void
+}
+
+define void @xvavgr_h(ptr %res, ptr %a, ptr %b) nounwind {
+; CHECK-LABEL: xvavgr_h:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: xvld $xr0, $a1, 0
+; CHECK-NEXT: xvld $xr1, $a2, 0
+; CHECK-NEXT: xvavgr.h $xr0, $xr0, $xr1
+; CHECK-NEXT: xvst $xr0, $a0, 0
+; CHECK-NEXT: ret
+entry:
+ %va = load <16 x i16>, ptr %a
+ %vb = load <16 x i16>, ptr %b
+ %add = add <16 x i16> %va, %vb
+ %add1 = add <16 x i16> %add, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
+ %shr = ashr <16 x i16> %add1, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
+ store <16 x i16> %shr, ptr %res
+ ret void
+}
+
+define void @xvavgr_w(ptr %res, ptr %a, ptr %b) nounwind {
+; CHECK-LABEL: xvavgr_w:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: xvld $xr0, $a1, 0
+; CHECK-NEXT: xvld $xr1, $a2, 0
+; CHECK-NEXT: xvavgr.w $xr0, $xr0, $xr1
+; CHECK-NEXT: xvst $xr0, $a0, 0
+; CHECK-NEXT: ret
+entry:
+ %va = load <8 x i32>, ptr %a
+ %vb = load <8 x i32>, ptr %b
+ %add = add <8 x i32> %va, %vb
+ %add1 = add <8 x i32> %add, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
+ %shr = ashr <8 x i32> %add1, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
+ store <8 x i32> %shr, ptr %res
+ ret void
+}
+
+define void @xvavgr_d(ptr %res, ptr %a, ptr %b) nounwind {
+; LA32-LABEL: xvavgr_d:
+; LA32: # %bb.0: # %entry
+; LA32-NEXT: xvld $xr0, $a1, 0
+; LA32-NEXT: xvld $xr1, $a2, 0
+; LA32-NEXT: xvadd.d $xr0, $xr0, $xr1
+; LA32-NEXT: xvaddi.du $xr0, $xr0, 1
+; LA32-NEXT: xvsrai.d $xr0, $xr0, 1
+; LA32-NEXT: xvst $xr0, $a0, 0
+; LA32-NEXT: ret
+;
+; LA64-LABEL: xvavgr_d:
+; LA64: # %bb.0: # %entry
+; LA64-NEXT: xvld $xr0, $a1, 0
+; LA64-NEXT: xvld $xr1, $a2, 0
+; LA64-NEXT: xvavgr.d $xr0, $xr0, $xr1
+; LA64-NEXT: xvst $xr0, $a0, 0
+; LA64-NEXT: ret
+entry:
+ %va = load <4 x i64>, ptr %a
+ %vb = load <4 x i64>, ptr %b
+ %add = add <4 x i64> %va, %vb
+ %add1 = add <4 x i64> %add, <i64 1, i64 1, i64 1, i64 1>
+ %shr = ashr <4 x i64> %add1, <i64 1, i64 1, i64 1, i64 1>
+ store <4 x i64> %shr, ptr %res
+ ret void
+}
+
+define void @xvavgr_bu(ptr %res, ptr %a, ptr %b) nounwind {
+; CHECK-LABEL: xvavgr_bu:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: xvld $xr0, $a1, 0
+; CHECK-NEXT: xvld $xr1, $a2, 0
+; CHECK-NEXT: xvavgr.bu $xr0, $xr0, $xr1
+; CHECK-NEXT: xvst $xr0, $a0, 0
+; CHECK-NEXT: ret
+entry:
+ %va = load <32 x i8>, ptr %a
+ %vb = load <32 x i8>, ptr %b
+ %add = add <32 x i8> %va, %vb
+ %add1 = add <32 x i8> %add, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
+ %shr = lshr <32 x i8> %add1, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
+ store <32 x i8> %shr, ptr %res
+ ret void
+}
+
+define void @xvavgr_hu(ptr %res, ptr %a, ptr %b) nounwind {
+; CHECK-LABEL: xvavgr_hu:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: xvld $xr0, $a1, 0
+; CHECK-NEXT: xvld $xr1, $a2, 0
+; CHECK-NEXT: xvavgr.hu $xr0, $xr0, $xr1
+; CHECK-NEXT: xvst $xr0, $a0, 0
+; CHECK-NEXT: ret
+entry:
+ %va = load <16 x i16>, ptr %a
+ %vb = load <16 x i16>, ptr %b
+ %add = add <16 x i16> %va, %vb
+ %add1 = add <16 x i16> %add, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
+ %shr = lshr <16 x i16> %add1, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
+ store <16 x i16> %shr, ptr %res
+ ret void
+}
+
+define void @xvavgr_wu(ptr %res, ptr %a, ptr %b) nounwind {
+; CHECK-LABEL: xvavgr_wu:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: xvld $xr0, $a1, 0
+; CHECK-NEXT: xvld $xr1, $a2, 0
+; CHECK-NEXT: xvavgr.wu $xr0, $xr0, $xr1
+; CHECK-NEXT: xvst $xr0, $a0, 0
+; CHECK-NEXT: ret
+entry:
+ %va = load <8 x i32>, ptr %a
+ %vb = load <8 x i32>, ptr %b
+ %add = add <8 x i32> %va, %vb
+ %add1 = add <8 x i32> %add, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
+ %shr = lshr <8 x i32> %add1, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
+ store <8 x i32> %shr, ptr %res
+ ret void
+}
+
+define void @xvavgr_du(ptr %res, ptr %a, ptr %b) nounwind {
+; LA32-LABEL: xvavgr_du:
+; LA32: # %bb.0: # %entry
+; LA32-NEXT: xvld $xr0, $a1, 0
+; LA32-NEXT: xvld $xr1, $a2, 0
+; LA32-NEXT: xvadd.d $xr0, $xr0, $xr1
+; LA32-NEXT: xvaddi.du $xr0, $xr0, 1
+; LA32-NEXT: xvsrli.d $xr0, $xr0, 1
+; LA32-NEXT: xvst $xr0, $a0, 0
+; LA32-NEXT: ret
+;
+; LA64-LABEL: xvavgr_du:
+; LA64: # %bb.0: # %entry
+; LA64-NEXT: xvld $xr0, $a1, 0
+; LA64-NEXT: xvld $xr1, $a2, 0
+; LA64-NEXT: xvavgr.du $xr0, $xr0, $xr1
+; LA64-NEXT: xvst $xr0, $a0, 0
+; LA64-NEXT: ret
+entry:
+ %va = load <4 x i64>, ptr %a
+ %vb = load <4 x i64>, ptr %b
+ %add = add <4 x i64> %va, %vb
+ %add1 = add <4 x i64> %add, <i64 1, i64 1, i64 1, i64 1>
+ %shr = lshr <4 x i64> %add1, <i64 1, i64 1, i64 1, i64 1>
+ store <4 x i64> %shr, ptr %res
+ ret void
+}
diff --git a/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/avgfloor-ceil.ll b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/avgfloor-ceil.ll
new file mode 100644
index 0000000..c82adcb
--- /dev/null
+++ b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/avgfloor-ceil.ll
@@ -0,0 +1,379 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6
+; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lasx < %s | FileCheck %s
+; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s
+
+define void @xvavg_b(ptr %res, ptr %a, ptr %b) nounwind {
+; CHECK-LABEL: xvavg_b:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: xvld $xr0, $a1, 0
+; CHECK-NEXT: xvld $xr1, $a2, 0
+; CHECK-NEXT: xvand.v $xr2, $xr0, $xr1
+; CHECK-NEXT: xvxor.v $xr0, $xr0, $xr1
+; CHECK-NEXT: xvsrai.b $xr0, $xr0, 1
+; CHECK-NEXT: xvadd.b $xr0, $xr2, $xr0
+; CHECK-NEXT: xvst $xr0, $a0, 0
+; CHECK-NEXT: ret
+entry:
+ %va = load <32 x i8>, ptr %a
+ %vb = load <32 x i8>, ptr %b
+ %ea = sext <32 x i8> %va to <32 x i16>
+ %eb = sext <32 x i8> %vb to <32 x i16>
+ %add = add <32 x i16> %ea, %eb
+ %shr = lshr <32 x i16> %add, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
+ %r = trunc <32 x i16> %shr to <32 x i8>
+ store <32 x i8> %r, ptr %res
+ ret void
+}
+
+define void @xvavg_h(ptr %res, ptr %a, ptr %b) nounwind {
+; CHECK-LABEL: xvavg_h:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: xvld $xr0, $a1, 0
+; CHECK-NEXT: xvld $xr1, $a2, 0
+; CHECK-NEXT: xvand.v $xr2, $xr0, $xr1
+; CHECK-NEXT: xvxor.v $xr0, $xr0, $xr1
+; CHECK-NEXT: xvsrai.h $xr0, $xr0, 1
+; CHECK-NEXT: xvadd.h $xr0, $xr2, $xr0
+; CHECK-NEXT: xvst $xr0, $a0, 0
+; CHECK-NEXT: ret
+entry:
+ %va = load <16 x i16>, ptr %a
+ %vb = load <16 x i16>, ptr %b
+ %ea = sext <16 x i16> %va to <16 x i32>
+ %eb = sext <16 x i16> %vb to <16 x i32>
+ %add = add <16 x i32> %ea, %eb
+ %shr = lshr <16 x i32> %add, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
+ %r = trunc <16 x i32> %shr to <16 x i16>
+ store <16 x i16> %r, ptr %res
+ ret void
+}
+
+define void @xvavg_w(ptr %res, ptr %a, ptr %b) nounwind {
+; CHECK-LABEL: xvavg_w:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: xvld $xr0, $a1, 0
+; CHECK-NEXT: xvld $xr1, $a2, 0
+; CHECK-NEXT: xvand.v $xr2, $xr0, $xr1
+; CHECK-NEXT: xvxor.v $xr0, $xr0, $xr1
+; CHECK-NEXT: xvsrai.w $xr0, $xr0, 1
+; CHECK-NEXT: xvadd.w $xr0, $xr2, $xr0
+; CHECK-NEXT: xvst $xr0, $a0, 0
+; CHECK-NEXT: ret
+entry:
+ %va = load <8 x i32>, ptr %a
+ %vb = load <8 x i32>, ptr %b
+ %ea = sext <8 x i32> %va to <8 x i64>
+ %eb = sext <8 x i32> %vb to <8 x i64>
+ %add = add <8 x i64> %ea, %eb
+ %shr = lshr <8 x i64> %add, <i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1>
+ %r = trunc <8 x i64> %shr to <8 x i32>
+ store <8 x i32> %r, ptr %res
+ ret void
+}
+
+define void @xvavg_d(ptr %res, ptr %a, ptr %b) nounwind {
+; CHECK-LABEL: xvavg_d:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: xvld $xr0, $a1, 0
+; CHECK-NEXT: xvld $xr1, $a2, 0
+; CHECK-NEXT: xvand.v $xr2, $xr0, $xr1
+; CHECK-NEXT: xvxor.v $xr0, $xr0, $xr1
+; CHECK-NEXT: xvsrai.d $xr0, $xr0, 1
+; CHECK-NEXT: xvadd.d $xr0, $xr2, $xr0
+; CHECK-NEXT: xvst $xr0, $a0, 0
+; CHECK-NEXT: ret
+entry:
+ %va = load <4 x i64>, ptr %a
+ %vb = load <4 x i64>, ptr %b
+ %ea = sext <4 x i64> %va to <4 x i128>
+ %eb = sext <4 x i64> %vb to <4 x i128>
+ %add = add <4 x i128> %ea, %eb
+ %shr = lshr <4 x i128> %add, <i128 1, i128 1, i128 1, i128 1>
+ %r = trunc <4 x i128> %shr to <4 x i64>
+ store <4 x i64> %r, ptr %res
+ ret void
+}
+
+define void @xvavg_bu(ptr %res, ptr %a, ptr %b) nounwind {
+; CHECK-LABEL: xvavg_bu:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: xvld $xr0, $a1, 0
+; CHECK-NEXT: xvld $xr1, $a2, 0
+; CHECK-NEXT: xvand.v $xr2, $xr0, $xr1
+; CHECK-NEXT: xvxor.v $xr0, $xr0, $xr1
+; CHECK-NEXT: xvsrli.b $xr0, $xr0, 1
+; CHECK-NEXT: xvadd.b $xr0, $xr2, $xr0
+; CHECK-NEXT: xvst $xr0, $a0, 0
+; CHECK-NEXT: ret
+entry:
+ %va = load <32 x i8>, ptr %a
+ %vb = load <32 x i8>, ptr %b
+ %ea = zext <32 x i8> %va to <32 x i16>
+ %eb = zext <32 x i8> %vb to <32 x i16>
+ %add = add <32 x i16> %ea, %eb
+ %shr = lshr <32 x i16> %add, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
+ %r = trunc <32 x i16> %shr to <32 x i8>
+ store <32 x i8> %r, ptr %res
+ ret void
+}
+
+define void @xvavg_hu(ptr %res, ptr %a, ptr %b) nounwind {
+; CHECK-LABEL: xvavg_hu:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: xvld $xr0, $a1, 0
+; CHECK-NEXT: xvld $xr1, $a2, 0
+; CHECK-NEXT: xvand.v $xr2, $xr0, $xr1
+; CHECK-NEXT: xvxor.v $xr0, $xr0, $xr1
+; CHECK-NEXT: xvsrli.h $xr0, $xr0, 1
+; CHECK-NEXT: xvadd.h $xr0, $xr2, $xr0
+; CHECK-NEXT: xvst $xr0, $a0, 0
+; CHECK-NEXT: ret
+entry:
+ %va = load <16 x i16>, ptr %a
+ %vb = load <16 x i16>, ptr %b
+ %ea = zext <16 x i16> %va to <16 x i32>
+ %eb = zext <16 x i16> %vb to <16 x i32>
+ %add = add <16 x i32> %ea, %eb
+ %shr = lshr <16 x i32> %add, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
+ %r = trunc <16 x i32> %shr to <16 x i16>
+ store <16 x i16> %r, ptr %res
+ ret void
+}
+
+define void @xvavg_wu(ptr %res, ptr %a, ptr %b) nounwind {
+; CHECK-LABEL: xvavg_wu:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: xvld $xr0, $a1, 0
+; CHECK-NEXT: xvld $xr1, $a2, 0
+; CHECK-NEXT: xvand.v $xr2, $xr0, $xr1
+; CHECK-NEXT: xvxor.v $xr0, $xr0, $xr1
+; CHECK-NEXT: xvsrli.w $xr0, $xr0, 1
+; CHECK-NEXT: xvadd.w $xr0, $xr2, $xr0
+; CHECK-NEXT: xvst $xr0, $a0, 0
+; CHECK-NEXT: ret
+entry:
+ %va = load <8 x i32>, ptr %a
+ %vb = load <8 x i32>, ptr %b
+ %ea = zext <8 x i32> %va to <8 x i64>
+ %eb = zext <8 x i32> %vb to <8 x i64>
+ %add = add <8 x i64> %ea, %eb
+ %shr = lshr <8 x i64> %add, <i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1>
+ %r = trunc <8 x i64> %shr to <8 x i32>
+ store <8 x i32> %r, ptr %res
+ ret void
+}
+
+define void @xvavg_du(ptr %res, ptr %a, ptr %b) nounwind {
+; CHECK-LABEL: xvavg_du:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: xvld $xr0, $a1, 0
+; CHECK-NEXT: xvld $xr1, $a2, 0
+; CHECK-NEXT: xvand.v $xr2, $xr0, $xr1
+; CHECK-NEXT: xvxor.v $xr0, $xr0, $xr1
+; CHECK-NEXT: xvsrli.d $xr0, $xr0, 1
+; CHECK-NEXT: xvadd.d $xr0, $xr2, $xr0
+; CHECK-NEXT: xvst $xr0, $a0, 0
+; CHECK-NEXT: ret
+entry:
+ %va = load <4 x i64>, ptr %a
+ %vb = load <4 x i64>, ptr %b
+ %ea = zext <4 x i64> %va to <4 x i128>
+ %eb = zext <4 x i64> %vb to <4 x i128>
+ %add = add <4 x i128> %ea, %eb
+ %shr = lshr <4 x i128> %add, <i128 1, i128 1, i128 1, i128 1>
+ %r = trunc <4 x i128> %shr to <4 x i64>
+ store <4 x i64> %r, ptr %res
+ ret void
+}
+
+define void @xvavgr_b(ptr %res, ptr %a, ptr %b) nounwind {
+; CHECK-LABEL: xvavgr_b:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: xvld $xr0, $a1, 0
+; CHECK-NEXT: xvld $xr1, $a2, 0
+; CHECK-NEXT: xvor.v $xr2, $xr0, $xr1
+; CHECK-NEXT: xvxor.v $xr0, $xr0, $xr1
+; CHECK-NEXT: xvsrai.b $xr0, $xr0, 1
+; CHECK-NEXT: xvsub.b $xr0, $xr2, $xr0
+; CHECK-NEXT: xvst $xr0, $a0, 0
+; CHECK-NEXT: ret
+entry:
+ %va = load <32 x i8>, ptr %a
+ %vb = load <32 x i8>, ptr %b
+ %ea = sext <32 x i8> %va to <32 x i16>
+ %eb = sext <32 x i8> %vb to <32 x i16>
+ %add = add <32 x i16> %ea, %eb
+ %add1 = add <32 x i16> %add, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
+ %shr = lshr <32 x i16> %add1, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
+ %r = trunc <32 x i16> %shr to <32 x i8>
+ store <32 x i8> %r, ptr %res
+ ret void
+}
+
+define void @xvavgr_h(ptr %res, ptr %a, ptr %b) nounwind {
+; CHECK-LABEL: xvavgr_h:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: xvld $xr0, $a1, 0
+; CHECK-NEXT: xvld $xr1, $a2, 0
+; CHECK-NEXT: xvor.v $xr2, $xr0, $xr1
+; CHECK-NEXT: xvxor.v $xr0, $xr0, $xr1
+; CHECK-NEXT: xvsrai.h $xr0, $xr0, 1
+; CHECK-NEXT: xvsub.h $xr0, $xr2, $xr0
+; CHECK-NEXT: xvst $xr0, $a0, 0
+; CHECK-NEXT: ret
+entry:
+ %va = load <16 x i16>, ptr %a
+ %vb = load <16 x i16>, ptr %b
+ %ea = sext <16 x i16> %va to <16 x i32>
+ %eb = sext <16 x i16> %vb to <16 x i32>
+ %add = add <16 x i32> %ea, %eb
+ %add1 = add <16 x i32> %add, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
+ %shr = lshr <16 x i32> %add1, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
+ %r = trunc <16 x i32> %shr to <16 x i16>
+ store <16 x i16> %r, ptr %res
+ ret void
+}
+
+define void @xvavgr_w(ptr %res, ptr %a, ptr %b) nounwind {
+; CHECK-LABEL: xvavgr_w:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: xvld $xr0, $a1, 0
+; CHECK-NEXT: xvld $xr1, $a2, 0
+; CHECK-NEXT: xvor.v $xr2, $xr0, $xr1
+; CHECK-NEXT: xvxor.v $xr0, $xr0, $xr1
+; CHECK-NEXT: xvsrai.w $xr0, $xr0, 1
+; CHECK-NEXT: xvsub.w $xr0, $xr2, $xr0
+; CHECK-NEXT: xvst $xr0, $a0, 0
+; CHECK-NEXT: ret
+entry:
+ %va = load <8 x i32>, ptr %a
+ %vb = load <8 x i32>, ptr %b
+ %ea = sext <8 x i32> %va to <8 x i64>
+ %eb = sext <8 x i32> %vb to <8 x i64>
+ %add = add <8 x i64> %ea, %eb
+ %add1 = add <8 x i64> %add, <i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1>
+ %shr = lshr <8 x i64> %add1, <i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1>
+ %r = trunc <8 x i64> %shr to <8 x i32>
+ store <8 x i32> %r, ptr %res
+ ret void
+}
+
+define void @xvavgr_d(ptr %res, ptr %a, ptr %b) nounwind {
+; CHECK-LABEL: xvavgr_d:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: xvld $xr0, $a1, 0
+; CHECK-NEXT: xvld $xr1, $a2, 0
+; CHECK-NEXT: xvor.v $xr2, $xr0, $xr1
+; CHECK-NEXT: xvxor.v $xr0, $xr0, $xr1
+; CHECK-NEXT: xvsrai.d $xr0, $xr0, 1
+; CHECK-NEXT: xvsub.d $xr0, $xr2, $xr0
+; CHECK-NEXT: xvst $xr0, $a0, 0
+; CHECK-NEXT: ret
+entry:
+ %va = load <4 x i64>, ptr %a
+ %vb = load <4 x i64>, ptr %b
+ %ea = sext <4 x i64> %va to <4 x i128>
+ %eb = sext <4 x i64> %vb to <4 x i128>
+ %add = add <4 x i128> %ea, %eb
+ %add1 = add <4 x i128> %add, <i128 1, i128 1, i128 1, i128 1>
+ %shr = lshr <4 x i128> %add1, <i128 1, i128 1, i128 1, i128 1>
+ %r = trunc <4 x i128> %shr to <4 x i64>
+ store <4 x i64> %r, ptr %res
+ ret void
+}
+
+define void @xvavgr_bu(ptr %res, ptr %a, ptr %b) nounwind {
+; CHECK-LABEL: xvavgr_bu:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: xvld $xr0, $a1, 0
+; CHECK-NEXT: xvld $xr1, $a2, 0
+; CHECK-NEXT: xvor.v $xr2, $xr0, $xr1
+; CHECK-NEXT: xvxor.v $xr0, $xr0, $xr1
+; CHECK-NEXT: xvsrli.b $xr0, $xr0, 1
+; CHECK-NEXT: xvsub.b $xr0, $xr2, $xr0
+; CHECK-NEXT: xvst $xr0, $a0, 0
+; CHECK-NEXT: ret
+entry:
+ %va = load <32 x i8>, ptr %a
+ %vb = load <32 x i8>, ptr %b
+ %ea = zext <32 x i8> %va to <32 x i16>
+ %eb = zext <32 x i8> %vb to <32 x i16>
+ %add = add <32 x i16> %ea, %eb
+ %add1 = add <32 x i16> %add, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
+ %shr = lshr <32 x i16> %add1, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
+ %r = trunc <32 x i16> %shr to <32 x i8>
+ store <32 x i8> %r, ptr %res
+ ret void
+}
+
+define void @xvavgr_hu(ptr %res, ptr %a, ptr %b) nounwind {
+; CHECK-LABEL: xvavgr_hu:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: xvld $xr0, $a1, 0
+; CHECK-NEXT: xvld $xr1, $a2, 0
+; CHECK-NEXT: xvor.v $xr2, $xr0, $xr1
+; CHECK-NEXT: xvxor.v $xr0, $xr0, $xr1
+; CHECK-NEXT: xvsrli.h $xr0, $xr0, 1
+; CHECK-NEXT: xvsub.h $xr0, $xr2, $xr0
+; CHECK-NEXT: xvst $xr0, $a0, 0
+; CHECK-NEXT: ret
+entry:
+ %va = load <16 x i16>, ptr %a
+ %vb = load <16 x i16>, ptr %b
+ %ea = zext <16 x i16> %va to <16 x i32>
+ %eb = zext <16 x i16> %vb to <16 x i32>
+ %add = add <16 x i32> %ea, %eb
+ %add1 = add <16 x i32> %add, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
+ %shr = lshr <16 x i32> %add1, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
+ %r = trunc <16 x i32> %shr to <16 x i16>
+ store <16 x i16> %r, ptr %res
+ ret void
+}
+
+define void @xvavgr_wu(ptr %res, ptr %a, ptr %b) nounwind {
+; CHECK-LABEL: xvavgr_wu:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: xvld $xr0, $a1, 0
+; CHECK-NEXT: xvld $xr1, $a2, 0
+; CHECK-NEXT: xvor.v $xr2, $xr0, $xr1
+; CHECK-NEXT: xvxor.v $xr0, $xr0, $xr1
+; CHECK-NEXT: xvsrli.w $xr0, $xr0, 1
+; CHECK-NEXT: xvsub.w $xr0, $xr2, $xr0
+; CHECK-NEXT: xvst $xr0, $a0, 0
+; CHECK-NEXT: ret
+entry:
+ %va = load <8 x i32>, ptr %a
+ %vb = load <8 x i32>, ptr %b
+ %ea = zext <8 x i32> %va to <8 x i64>
+ %eb = zext <8 x i32> %vb to <8 x i64>
+ %add = add <8 x i64> %ea, %eb
+ %add1 = add <8 x i64> %add, <i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1>
+ %shr = lshr <8 x i64> %add1, <i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1>
+ %r = trunc <8 x i64> %shr to <8 x i32>
+ store <8 x i32> %r, ptr %res
+ ret void
+}
+
+define void @xvavgr_du(ptr %res, ptr %a, ptr %b) nounwind {
+; CHECK-LABEL: xvavgr_du:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: xvld $xr0, $a1, 0
+; CHECK-NEXT: xvld $xr1, $a2, 0
+; CHECK-NEXT: xvor.v $xr2, $xr0, $xr1
+; CHECK-NEXT: xvxor.v $xr0, $xr0, $xr1
+; CHECK-NEXT: xvsrli.d $xr0, $xr0, 1
+; CHECK-NEXT: xvsub.d $xr0, $xr2, $xr0
+; CHECK-NEXT: xvst $xr0, $a0, 0
+; CHECK-NEXT: ret
+entry:
+ %va = load <4 x i64>, ptr %a
+ %vb = load <4 x i64>, ptr %b
+ %ea = zext <4 x i64> %va to <4 x i128>
+ %eb = zext <4 x i64> %vb to <4 x i128>
+ %add = add <4 x i128> %ea, %eb
+ %add1 = add <4 x i128> %add, <i128 1, i128 1, i128 1, i128 1>
+ %shr = lshr <4 x i128> %add1, <i128 1, i128 1, i128 1, i128 1>
+ %r = trunc <4 x i128> %shr to <4 x i64>
+ store <4 x i64> %r, ptr %res
+ ret void
+}
diff --git a/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/flog2.ll b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/flog2.ll
index 68f2e3a..6b5f575 100644
--- a/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/flog2.ll
+++ b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/flog2.ll
@@ -1,166 +1,17 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6
-; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lasx < %s | FileCheck %s --check-prefix=LA32
-; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s --check-prefix=LA64
+; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lasx < %s | FileCheck %s
+; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s
declare <8 x float> @llvm.log2.v8f32(<8 x float>)
declare <4 x double> @llvm.log2.v4f64(<4 x double>)
define void @flog2_v8f32(ptr %res, ptr %a) nounwind {
-; LA32-LABEL: flog2_v8f32:
-; LA32: # %bb.0: # %entry
-; LA32-NEXT: addi.w $sp, $sp, -128
-; LA32-NEXT: st.w $ra, $sp, 124 # 4-byte Folded Spill
-; LA32-NEXT: st.w $fp, $sp, 120 # 4-byte Folded Spill
-; LA32-NEXT: xvld $xr0, $a1, 0
-; LA32-NEXT: xvst $xr0, $sp, 80 # 32-byte Folded Spill
-; LA32-NEXT: move $fp, $a0
-; LA32-NEXT: xvpickve.w $xr0, $xr0, 5
-; LA32-NEXT: # kill: def $f0 killed $f0 killed $xr0
-; LA32-NEXT: bl log2f
-; LA32-NEXT: # kill: def $f0 killed $f0 def $vr0
-; LA32-NEXT: vst $vr0, $sp, 48 # 16-byte Folded Spill
-; LA32-NEXT: xvld $xr0, $sp, 80 # 32-byte Folded Reload
-; LA32-NEXT: xvpickve.w $xr0, $xr0, 4
-; LA32-NEXT: # kill: def $f0 killed $f0 killed $xr0
-; LA32-NEXT: bl log2f
-; LA32-NEXT: # kill: def $f0 killed $f0 def $xr0
-; LA32-NEXT: vld $vr1, $sp, 48 # 16-byte Folded Reload
-; LA32-NEXT: vextrins.w $vr0, $vr1, 16
-; LA32-NEXT: xvst $xr0, $sp, 48 # 32-byte Folded Spill
-; LA32-NEXT: xvld $xr0, $sp, 80 # 32-byte Folded Reload
-; LA32-NEXT: xvpickve.w $xr0, $xr0, 6
-; LA32-NEXT: # kill: def $f0 killed $f0 killed $xr0
-; LA32-NEXT: bl log2f
-; LA32-NEXT: # kill: def $f0 killed $f0 def $vr0
-; LA32-NEXT: xvld $xr1, $sp, 48 # 32-byte Folded Reload
-; LA32-NEXT: vextrins.w $vr1, $vr0, 32
-; LA32-NEXT: xvst $xr1, $sp, 48 # 32-byte Folded Spill
-; LA32-NEXT: xvld $xr0, $sp, 80 # 32-byte Folded Reload
-; LA32-NEXT: xvpickve.w $xr0, $xr0, 7
-; LA32-NEXT: # kill: def $f0 killed $f0 killed $xr0
-; LA32-NEXT: bl log2f
-; LA32-NEXT: # kill: def $f0 killed $f0 def $vr0
-; LA32-NEXT: xvld $xr1, $sp, 48 # 32-byte Folded Reload
-; LA32-NEXT: vextrins.w $vr1, $vr0, 48
-; LA32-NEXT: xvst $xr1, $sp, 48 # 32-byte Folded Spill
-; LA32-NEXT: xvld $xr0, $sp, 80 # 32-byte Folded Reload
-; LA32-NEXT: xvpickve.w $xr0, $xr0, 1
-; LA32-NEXT: # kill: def $f0 killed $f0 killed $xr0
-; LA32-NEXT: bl log2f
-; LA32-NEXT: # kill: def $f0 killed $f0 def $vr0
-; LA32-NEXT: vst $vr0, $sp, 16 # 16-byte Folded Spill
-; LA32-NEXT: xvld $xr0, $sp, 80 # 32-byte Folded Reload
-; LA32-NEXT: xvpickve.w $xr0, $xr0, 0
-; LA32-NEXT: # kill: def $f0 killed $f0 killed $xr0
-; LA32-NEXT: bl log2f
-; LA32-NEXT: # kill: def $f0 killed $f0 def $xr0
-; LA32-NEXT: vld $vr1, $sp, 16 # 16-byte Folded Reload
-; LA32-NEXT: vextrins.w $vr0, $vr1, 16
-; LA32-NEXT: xvst $xr0, $sp, 16 # 32-byte Folded Spill
-; LA32-NEXT: xvld $xr0, $sp, 80 # 32-byte Folded Reload
-; LA32-NEXT: xvpickve.w $xr0, $xr0, 2
-; LA32-NEXT: # kill: def $f0 killed $f0 killed $xr0
-; LA32-NEXT: bl log2f
-; LA32-NEXT: # kill: def $f0 killed $f0 def $vr0
-; LA32-NEXT: xvld $xr1, $sp, 16 # 32-byte Folded Reload
-; LA32-NEXT: vextrins.w $vr1, $vr0, 32
-; LA32-NEXT: xvst $xr1, $sp, 16 # 32-byte Folded Spill
-; LA32-NEXT: xvld $xr0, $sp, 80 # 32-byte Folded Reload
-; LA32-NEXT: xvpickve.w $xr0, $xr0, 3
-; LA32-NEXT: # kill: def $f0 killed $f0 killed $xr0
-; LA32-NEXT: bl log2f
-; LA32-NEXT: # kill: def $f0 killed $f0 def $vr0
-; LA32-NEXT: xvld $xr1, $sp, 16 # 32-byte Folded Reload
-; LA32-NEXT: vextrins.w $vr1, $vr0, 48
-; LA32-NEXT: xvld $xr0, $sp, 48 # 32-byte Folded Reload
-; LA32-NEXT: xvpermi.q $xr1, $xr0, 2
-; LA32-NEXT: xvst $xr1, $fp, 0
-; LA32-NEXT: ld.w $fp, $sp, 120 # 4-byte Folded Reload
-; LA32-NEXT: ld.w $ra, $sp, 124 # 4-byte Folded Reload
-; LA32-NEXT: addi.w $sp, $sp, 128
-; LA32-NEXT: ret
-;
-; LA64-LABEL: flog2_v8f32:
-; LA64: # %bb.0: # %entry
-; LA64-NEXT: addi.d $sp, $sp, -128
-; LA64-NEXT: st.d $ra, $sp, 120 # 8-byte Folded Spill
-; LA64-NEXT: st.d $fp, $sp, 112 # 8-byte Folded Spill
-; LA64-NEXT: xvld $xr0, $a1, 0
-; LA64-NEXT: xvst $xr0, $sp, 80 # 32-byte Folded Spill
-; LA64-NEXT: move $fp, $a0
-; LA64-NEXT: xvpickve.w $xr0, $xr0, 5
-; LA64-NEXT: # kill: def $f0 killed $f0 killed $xr0
-; LA64-NEXT: pcaddu18i $ra, %call36(log2f)
-; LA64-NEXT: jirl $ra, $ra, 0
-; LA64-NEXT: # kill: def $f0 killed $f0 def $vr0
-; LA64-NEXT: vst $vr0, $sp, 48 # 16-byte Folded Spill
-; LA64-NEXT: xvld $xr0, $sp, 80 # 32-byte Folded Reload
-; LA64-NEXT: xvpickve.w $xr0, $xr0, 4
-; LA64-NEXT: # kill: def $f0 killed $f0 killed $xr0
-; LA64-NEXT: pcaddu18i $ra, %call36(log2f)
-; LA64-NEXT: jirl $ra, $ra, 0
-; LA64-NEXT: # kill: def $f0 killed $f0 def $xr0
-; LA64-NEXT: vld $vr1, $sp, 48 # 16-byte Folded Reload
-; LA64-NEXT: vextrins.w $vr0, $vr1, 16
-; LA64-NEXT: xvst $xr0, $sp, 48 # 32-byte Folded Spill
-; LA64-NEXT: xvld $xr0, $sp, 80 # 32-byte Folded Reload
-; LA64-NEXT: xvpickve.w $xr0, $xr0, 6
-; LA64-NEXT: # kill: def $f0 killed $f0 killed $xr0
-; LA64-NEXT: pcaddu18i $ra, %call36(log2f)
-; LA64-NEXT: jirl $ra, $ra, 0
-; LA64-NEXT: # kill: def $f0 killed $f0 def $vr0
-; LA64-NEXT: xvld $xr1, $sp, 48 # 32-byte Folded Reload
-; LA64-NEXT: vextrins.w $vr1, $vr0, 32
-; LA64-NEXT: xvst $xr1, $sp, 48 # 32-byte Folded Spill
-; LA64-NEXT: xvld $xr0, $sp, 80 # 32-byte Folded Reload
-; LA64-NEXT: xvpickve.w $xr0, $xr0, 7
-; LA64-NEXT: # kill: def $f0 killed $f0 killed $xr0
-; LA64-NEXT: pcaddu18i $ra, %call36(log2f)
-; LA64-NEXT: jirl $ra, $ra, 0
-; LA64-NEXT: # kill: def $f0 killed $f0 def $vr0
-; LA64-NEXT: xvld $xr1, $sp, 48 # 32-byte Folded Reload
-; LA64-NEXT: vextrins.w $vr1, $vr0, 48
-; LA64-NEXT: xvst $xr1, $sp, 48 # 32-byte Folded Spill
-; LA64-NEXT: xvld $xr0, $sp, 80 # 32-byte Folded Reload
-; LA64-NEXT: xvpickve.w $xr0, $xr0, 1
-; LA64-NEXT: # kill: def $f0 killed $f0 killed $xr0
-; LA64-NEXT: pcaddu18i $ra, %call36(log2f)
-; LA64-NEXT: jirl $ra, $ra, 0
-; LA64-NEXT: # kill: def $f0 killed $f0 def $vr0
-; LA64-NEXT: vst $vr0, $sp, 16 # 16-byte Folded Spill
-; LA64-NEXT: xvld $xr0, $sp, 80 # 32-byte Folded Reload
-; LA64-NEXT: xvpickve.w $xr0, $xr0, 0
-; LA64-NEXT: # kill: def $f0 killed $f0 killed $xr0
-; LA64-NEXT: pcaddu18i $ra, %call36(log2f)
-; LA64-NEXT: jirl $ra, $ra, 0
-; LA64-NEXT: # kill: def $f0 killed $f0 def $xr0
-; LA64-NEXT: vld $vr1, $sp, 16 # 16-byte Folded Reload
-; LA64-NEXT: vextrins.w $vr0, $vr1, 16
-; LA64-NEXT: xvst $xr0, $sp, 16 # 32-byte Folded Spill
-; LA64-NEXT: xvld $xr0, $sp, 80 # 32-byte Folded Reload
-; LA64-NEXT: xvpickve.w $xr0, $xr0, 2
-; LA64-NEXT: # kill: def $f0 killed $f0 killed $xr0
-; LA64-NEXT: pcaddu18i $ra, %call36(log2f)
-; LA64-NEXT: jirl $ra, $ra, 0
-; LA64-NEXT: # kill: def $f0 killed $f0 def $vr0
-; LA64-NEXT: xvld $xr1, $sp, 16 # 32-byte Folded Reload
-; LA64-NEXT: vextrins.w $vr1, $vr0, 32
-; LA64-NEXT: xvst $xr1, $sp, 16 # 32-byte Folded Spill
-; LA64-NEXT: xvld $xr0, $sp, 80 # 32-byte Folded Reload
-; LA64-NEXT: xvpickve.w $xr0, $xr0, 3
-; LA64-NEXT: # kill: def $f0 killed $f0 killed $xr0
-; LA64-NEXT: pcaddu18i $ra, %call36(log2f)
-; LA64-NEXT: jirl $ra, $ra, 0
-; LA64-NEXT: # kill: def $f0 killed $f0 def $vr0
-; LA64-NEXT: xvld $xr1, $sp, 16 # 32-byte Folded Reload
-; LA64-NEXT: vextrins.w $vr1, $vr0, 48
-; LA64-NEXT: xvld $xr0, $sp, 48 # 32-byte Folded Reload
-; LA64-NEXT: xvpermi.q $xr1, $xr0, 2
-; LA64-NEXT: xvst $xr1, $fp, 0
-; LA64-NEXT: ld.d $fp, $sp, 112 # 8-byte Folded Reload
-; LA64-NEXT: ld.d $ra, $sp, 120 # 8-byte Folded Reload
-; LA64-NEXT: addi.d $sp, $sp, 128
-; LA64-NEXT: ret
+; CHECK-LABEL: flog2_v8f32:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: xvld $xr0, $a1, 0
+; CHECK-NEXT: xvflogb.s $xr0, $xr0
+; CHECK-NEXT: xvst $xr0, $a0, 0
+; CHECK-NEXT: ret
entry:
%v = load <8 x float>, ptr %a
%r = call <8 x float> @llvm.log2.v8f32(<8 x float> %v)
@@ -169,93 +20,12 @@ entry:
}
define void @flog2_v4f64(ptr %res, ptr %a) nounwind {
-; LA32-LABEL: flog2_v4f64:
-; LA32: # %bb.0: # %entry
-; LA32-NEXT: addi.w $sp, $sp, -112
-; LA32-NEXT: st.w $ra, $sp, 108 # 4-byte Folded Spill
-; LA32-NEXT: st.w $fp, $sp, 104 # 4-byte Folded Spill
-; LA32-NEXT: xvld $xr0, $a1, 0
-; LA32-NEXT: xvst $xr0, $sp, 64 # 32-byte Folded Spill
-; LA32-NEXT: move $fp, $a0
-; LA32-NEXT: xvpickve.d $xr0, $xr0, 3
-; LA32-NEXT: # kill: def $f0_64 killed $f0_64 killed $xr0
-; LA32-NEXT: bl log2
-; LA32-NEXT: # kill: def $f0_64 killed $f0_64 def $vr0
-; LA32-NEXT: vst $vr0, $sp, 32 # 16-byte Folded Spill
-; LA32-NEXT: xvld $xr0, $sp, 64 # 32-byte Folded Reload
-; LA32-NEXT: xvpickve.d $xr0, $xr0, 2
-; LA32-NEXT: # kill: def $f0_64 killed $f0_64 killed $xr0
-; LA32-NEXT: bl log2
-; LA32-NEXT: # kill: def $f0_64 killed $f0_64 def $xr0
-; LA32-NEXT: vld $vr1, $sp, 32 # 16-byte Folded Reload
-; LA32-NEXT: vextrins.d $vr0, $vr1, 16
-; LA32-NEXT: xvst $xr0, $sp, 32 # 32-byte Folded Spill
-; LA32-NEXT: xvld $xr0, $sp, 64 # 32-byte Folded Reload
-; LA32-NEXT: xvpickve.d $xr0, $xr0, 1
-; LA32-NEXT: # kill: def $f0_64 killed $f0_64 killed $xr0
-; LA32-NEXT: bl log2
-; LA32-NEXT: # kill: def $f0_64 killed $f0_64 def $vr0
-; LA32-NEXT: vst $vr0, $sp, 16 # 16-byte Folded Spill
-; LA32-NEXT: xvld $xr0, $sp, 64 # 32-byte Folded Reload
-; LA32-NEXT: xvpickve.d $xr0, $xr0, 0
-; LA32-NEXT: # kill: def $f0_64 killed $f0_64 killed $xr0
-; LA32-NEXT: bl log2
-; LA32-NEXT: # kill: def $f0_64 killed $f0_64 def $xr0
-; LA32-NEXT: vld $vr1, $sp, 16 # 16-byte Folded Reload
-; LA32-NEXT: vextrins.d $vr0, $vr1, 16
-; LA32-NEXT: xvld $xr1, $sp, 32 # 32-byte Folded Reload
-; LA32-NEXT: xvpermi.q $xr0, $xr1, 2
-; LA32-NEXT: xvst $xr0, $fp, 0
-; LA32-NEXT: ld.w $fp, $sp, 104 # 4-byte Folded Reload
-; LA32-NEXT: ld.w $ra, $sp, 108 # 4-byte Folded Reload
-; LA32-NEXT: addi.w $sp, $sp, 112
-; LA32-NEXT: ret
-;
-; LA64-LABEL: flog2_v4f64:
-; LA64: # %bb.0: # %entry
-; LA64-NEXT: addi.d $sp, $sp, -112
-; LA64-NEXT: st.d $ra, $sp, 104 # 8-byte Folded Spill
-; LA64-NEXT: st.d $fp, $sp, 96 # 8-byte Folded Spill
-; LA64-NEXT: xvld $xr0, $a1, 0
-; LA64-NEXT: xvst $xr0, $sp, 64 # 32-byte Folded Spill
-; LA64-NEXT: move $fp, $a0
-; LA64-NEXT: xvpickve.d $xr0, $xr0, 3
-; LA64-NEXT: # kill: def $f0_64 killed $f0_64 killed $xr0
-; LA64-NEXT: pcaddu18i $ra, %call36(log2)
-; LA64-NEXT: jirl $ra, $ra, 0
-; LA64-NEXT: # kill: def $f0_64 killed $f0_64 def $vr0
-; LA64-NEXT: vst $vr0, $sp, 32 # 16-byte Folded Spill
-; LA64-NEXT: xvld $xr0, $sp, 64 # 32-byte Folded Reload
-; LA64-NEXT: xvpickve.d $xr0, $xr0, 2
-; LA64-NEXT: # kill: def $f0_64 killed $f0_64 killed $xr0
-; LA64-NEXT: pcaddu18i $ra, %call36(log2)
-; LA64-NEXT: jirl $ra, $ra, 0
-; LA64-NEXT: # kill: def $f0_64 killed $f0_64 def $xr0
-; LA64-NEXT: vld $vr1, $sp, 32 # 16-byte Folded Reload
-; LA64-NEXT: vextrins.d $vr0, $vr1, 16
-; LA64-NEXT: xvst $xr0, $sp, 32 # 32-byte Folded Spill
-; LA64-NEXT: xvld $xr0, $sp, 64 # 32-byte Folded Reload
-; LA64-NEXT: xvpickve.d $xr0, $xr0, 1
-; LA64-NEXT: # kill: def $f0_64 killed $f0_64 killed $xr0
-; LA64-NEXT: pcaddu18i $ra, %call36(log2)
-; LA64-NEXT: jirl $ra, $ra, 0
-; LA64-NEXT: # kill: def $f0_64 killed $f0_64 def $vr0
-; LA64-NEXT: vst $vr0, $sp, 16 # 16-byte Folded Spill
-; LA64-NEXT: xvld $xr0, $sp, 64 # 32-byte Folded Reload
-; LA64-NEXT: xvpickve.d $xr0, $xr0, 0
-; LA64-NEXT: # kill: def $f0_64 killed $f0_64 killed $xr0
-; LA64-NEXT: pcaddu18i $ra, %call36(log2)
-; LA64-NEXT: jirl $ra, $ra, 0
-; LA64-NEXT: # kill: def $f0_64 killed $f0_64 def $xr0
-; LA64-NEXT: vld $vr1, $sp, 16 # 16-byte Folded Reload
-; LA64-NEXT: vextrins.d $vr0, $vr1, 16
-; LA64-NEXT: xvld $xr1, $sp, 32 # 32-byte Folded Reload
-; LA64-NEXT: xvpermi.q $xr0, $xr1, 2
-; LA64-NEXT: xvst $xr0, $fp, 0
-; LA64-NEXT: ld.d $fp, $sp, 96 # 8-byte Folded Reload
-; LA64-NEXT: ld.d $ra, $sp, 104 # 8-byte Folded Reload
-; LA64-NEXT: addi.d $sp, $sp, 112
-; LA64-NEXT: ret
+; CHECK-LABEL: flog2_v4f64:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: xvld $xr0, $a1, 0
+; CHECK-NEXT: xvflogb.d $xr0, $xr0
+; CHECK-NEXT: xvst $xr0, $a0, 0
+; CHECK-NEXT: ret
entry:
%v = load <4 x double>, ptr %a
%r = call <4 x double> @llvm.log2.v4f64(<4 x double> %v)