diff options
Diffstat (limited to 'llvm/test/CodeGen/LoongArch/lasx')
| -rw-r--r-- | llvm/test/CodeGen/LoongArch/lasx/fp-max-min.ll | 160 | ||||
| -rw-r--r-- | llvm/test/CodeGen/LoongArch/lasx/fp-rounding.ll | 308 | ||||
| -rw-r--r-- | llvm/test/CodeGen/LoongArch/lasx/ir-instruction/avg.ll | 321 | ||||
| -rw-r--r-- | llvm/test/CodeGen/LoongArch/lasx/ir-instruction/avgfloor-ceil.ll | 379 | ||||
| -rw-r--r-- | llvm/test/CodeGen/LoongArch/lasx/ir-instruction/flog2.ll | 258 | 
5 files changed, 1182 insertions, 244 deletions
| diff --git a/llvm/test/CodeGen/LoongArch/lasx/fp-max-min.ll b/llvm/test/CodeGen/LoongArch/lasx/fp-max-min.ll new file mode 100644 index 0000000..48ec98c --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/fp-max-min.ll @@ -0,0 +1,160 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6 +; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lasx < %s | FileCheck %s +; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s + +define void @minnum_v8f32(ptr %res, ptr %x, ptr %y) nounwind { +; CHECK-LABEL: minnum_v8f32: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    xvld $xr0, $a2, 0 +; CHECK-NEXT:    xvld $xr1, $a1, 0 +; CHECK-NEXT:    xvpickve.w $xr2, $xr0, 5 +; CHECK-NEXT:    xvpickve.w $xr3, $xr1, 5 +; CHECK-NEXT:    fmin.s $fa2, $fa3, $fa2 +; CHECK-NEXT:    xvpickve.w $xr3, $xr0, 4 +; CHECK-NEXT:    xvpickve.w $xr4, $xr1, 4 +; CHECK-NEXT:    fmin.s $fa3, $fa4, $fa3 +; CHECK-NEXT:    vextrins.w $vr3, $vr2, 16 +; CHECK-NEXT:    xvpickve.w $xr2, $xr0, 6 +; CHECK-NEXT:    xvpickve.w $xr4, $xr1, 6 +; CHECK-NEXT:    fmin.s $fa2, $fa4, $fa2 +; CHECK-NEXT:    vextrins.w $vr3, $vr2, 32 +; CHECK-NEXT:    xvpickve.w $xr2, $xr0, 7 +; CHECK-NEXT:    xvpickve.w $xr4, $xr1, 7 +; CHECK-NEXT:    fmin.s $fa2, $fa4, $fa2 +; CHECK-NEXT:    vextrins.w $vr3, $vr2, 48 +; CHECK-NEXT:    xvpickve.w $xr2, $xr0, 1 +; CHECK-NEXT:    xvpickve.w $xr4, $xr1, 1 +; CHECK-NEXT:    fmin.s $fa2, $fa4, $fa2 +; CHECK-NEXT:    xvpickve.w $xr4, $xr0, 0 +; CHECK-NEXT:    xvpickve.w $xr5, $xr1, 0 +; CHECK-NEXT:    fmin.s $fa4, $fa5, $fa4 +; CHECK-NEXT:    vextrins.w $vr4, $vr2, 16 +; CHECK-NEXT:    xvpickve.w $xr2, $xr0, 2 +; CHECK-NEXT:    xvpickve.w $xr5, $xr1, 2 +; CHECK-NEXT:    fmin.s $fa2, $fa5, $fa2 +; CHECK-NEXT:    vextrins.w $vr4, $vr2, 32 +; CHECK-NEXT:    xvpickve.w $xr0, $xr0, 3 +; CHECK-NEXT:    xvpickve.w $xr1, $xr1, 3 +; CHECK-NEXT:    fmin.s $fa0, $fa1, $fa0 +; CHECK-NEXT:    vextrins.w $vr4, $vr0, 48 +; CHECK-NEXT:    xvpermi.q $xr4, $xr3, 2 +; CHECK-NEXT:    xvst $xr4, $a0, 0 +; CHECK-NEXT:    ret +entry: +  %v0 = load <8 x float>, ptr %x +  %v1 = load <8 x float>, ptr %y +  %r = call <8 x float> @llvm.minnum.v8f32(<8 x float> %v0, <8 x float> %v1) +  store <8 x float> %r, ptr %res +  ret void +} + +define void @minnum_v4f64(ptr %res, ptr %x, ptr %y) nounwind { +; CHECK-LABEL: minnum_v4f64: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    xvld $xr0, $a2, 0 +; CHECK-NEXT:    xvld $xr1, $a1, 0 +; CHECK-NEXT:    xvpickve.d $xr2, $xr0, 3 +; CHECK-NEXT:    xvpickve.d $xr3, $xr1, 3 +; CHECK-NEXT:    fmin.d $fa2, $fa3, $fa2 +; CHECK-NEXT:    xvpickve.d $xr3, $xr0, 2 +; CHECK-NEXT:    xvpickve.d $xr4, $xr1, 2 +; CHECK-NEXT:    fmin.d $fa3, $fa4, $fa3 +; CHECK-NEXT:    vextrins.d $vr3, $vr2, 16 +; CHECK-NEXT:    xvpickve.d $xr2, $xr0, 1 +; CHECK-NEXT:    xvpickve.d $xr4, $xr1, 1 +; CHECK-NEXT:    fmin.d $fa2, $fa4, $fa2 +; CHECK-NEXT:    xvpickve.d $xr0, $xr0, 0 +; CHECK-NEXT:    xvpickve.d $xr1, $xr1, 0 +; CHECK-NEXT:    fmin.d $fa0, $fa1, $fa0 +; CHECK-NEXT:    vextrins.d $vr0, $vr2, 16 +; CHECK-NEXT:    xvpermi.q $xr0, $xr3, 2 +; CHECK-NEXT:    xvst $xr0, $a0, 0 +; CHECK-NEXT:    ret +entry: +  %v0 = load <4 x double>, ptr %x +  %v1 = load <4 x double>, ptr %y +  %r = call <4 x double> @llvm.minnum.v4f64(<4 x double> %v0, <4 x double> %v1) +  store <4 x double> %r, ptr %res +  ret void +} + +define void @maxnum_v8f32(ptr %res, ptr %x, ptr %y) nounwind { +; CHECK-LABEL: maxnum_v8f32: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    xvld $xr0, $a2, 0 +; CHECK-NEXT:    xvld $xr1, $a1, 0 +; CHECK-NEXT:    xvpickve.w $xr2, $xr0, 5 +; CHECK-NEXT:    xvpickve.w $xr3, $xr1, 5 +; CHECK-NEXT:    fmax.s $fa2, $fa3, $fa2 +; CHECK-NEXT:    xvpickve.w $xr3, $xr0, 4 +; CHECK-NEXT:    xvpickve.w $xr4, $xr1, 4 +; CHECK-NEXT:    fmax.s $fa3, $fa4, $fa3 +; CHECK-NEXT:    vextrins.w $vr3, $vr2, 16 +; CHECK-NEXT:    xvpickve.w $xr2, $xr0, 6 +; CHECK-NEXT:    xvpickve.w $xr4, $xr1, 6 +; CHECK-NEXT:    fmax.s $fa2, $fa4, $fa2 +; CHECK-NEXT:    vextrins.w $vr3, $vr2, 32 +; CHECK-NEXT:    xvpickve.w $xr2, $xr0, 7 +; CHECK-NEXT:    xvpickve.w $xr4, $xr1, 7 +; CHECK-NEXT:    fmax.s $fa2, $fa4, $fa2 +; CHECK-NEXT:    vextrins.w $vr3, $vr2, 48 +; CHECK-NEXT:    xvpickve.w $xr2, $xr0, 1 +; CHECK-NEXT:    xvpickve.w $xr4, $xr1, 1 +; CHECK-NEXT:    fmax.s $fa2, $fa4, $fa2 +; CHECK-NEXT:    xvpickve.w $xr4, $xr0, 0 +; CHECK-NEXT:    xvpickve.w $xr5, $xr1, 0 +; CHECK-NEXT:    fmax.s $fa4, $fa5, $fa4 +; CHECK-NEXT:    vextrins.w $vr4, $vr2, 16 +; CHECK-NEXT:    xvpickve.w $xr2, $xr0, 2 +; CHECK-NEXT:    xvpickve.w $xr5, $xr1, 2 +; CHECK-NEXT:    fmax.s $fa2, $fa5, $fa2 +; CHECK-NEXT:    vextrins.w $vr4, $vr2, 32 +; CHECK-NEXT:    xvpickve.w $xr0, $xr0, 3 +; CHECK-NEXT:    xvpickve.w $xr1, $xr1, 3 +; CHECK-NEXT:    fmax.s $fa0, $fa1, $fa0 +; CHECK-NEXT:    vextrins.w $vr4, $vr0, 48 +; CHECK-NEXT:    xvpermi.q $xr4, $xr3, 2 +; CHECK-NEXT:    xvst $xr4, $a0, 0 +; CHECK-NEXT:    ret +entry: +  %v0 = load <8 x float>, ptr %x +  %v1 = load <8 x float>, ptr %y +  %r = call <8 x float> @llvm.maxnum.v8f32(<8 x float> %v0, <8 x float> %v1) +  store <8 x float> %r, ptr %res +  ret void +} + +define void @maxnum_v4f64(ptr %res, ptr %x, ptr %y) nounwind { +; CHECK-LABEL: maxnum_v4f64: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    xvld $xr0, $a2, 0 +; CHECK-NEXT:    xvld $xr1, $a1, 0 +; CHECK-NEXT:    xvpickve.d $xr2, $xr0, 3 +; CHECK-NEXT:    xvpickve.d $xr3, $xr1, 3 +; CHECK-NEXT:    fmax.d $fa2, $fa3, $fa2 +; CHECK-NEXT:    xvpickve.d $xr3, $xr0, 2 +; CHECK-NEXT:    xvpickve.d $xr4, $xr1, 2 +; CHECK-NEXT:    fmax.d $fa3, $fa4, $fa3 +; CHECK-NEXT:    vextrins.d $vr3, $vr2, 16 +; CHECK-NEXT:    xvpickve.d $xr2, $xr0, 1 +; CHECK-NEXT:    xvpickve.d $xr4, $xr1, 1 +; CHECK-NEXT:    fmax.d $fa2, $fa4, $fa2 +; CHECK-NEXT:    xvpickve.d $xr0, $xr0, 0 +; CHECK-NEXT:    xvpickve.d $xr1, $xr1, 0 +; CHECK-NEXT:    fmax.d $fa0, $fa1, $fa0 +; CHECK-NEXT:    vextrins.d $vr0, $vr2, 16 +; CHECK-NEXT:    xvpermi.q $xr0, $xr3, 2 +; CHECK-NEXT:    xvst $xr0, $a0, 0 +; CHECK-NEXT:    ret +entry: +  %v0 = load <4 x double>, ptr %x +  %v1 = load <4 x double>, ptr %y +  %r = call <4 x double> @llvm.maxnum.v4f64(<4 x double> %v0, <4 x double> %v1) +  store <4 x double> %r, ptr %res +  ret void +} + +declare <8 x float> @llvm.minnum.v8f32(<8 x float>, <8 x float>) +declare <4 x double> @llvm.minnum.v4f64(<4 x double>, <4 x double>) +declare <8 x float> @llvm.maxnum.v8f32(<8 x float>, <8 x float>) +declare <4 x double> @llvm.maxnum.v4f64(<4 x double>, <4 x double>) diff --git a/llvm/test/CodeGen/LoongArch/lasx/fp-rounding.ll b/llvm/test/CodeGen/LoongArch/lasx/fp-rounding.ll new file mode 100644 index 0000000..79407c3 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/fp-rounding.ll @@ -0,0 +1,308 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6 +; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lasx < %s | FileCheck %s +; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s + +;; ceilf +define void @ceil_v8f32(ptr %res, ptr %a0) nounwind { +; CHECK-LABEL: ceil_v8f32: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    xvld $xr0, $a1, 0 +; CHECK-NEXT:    xvpickve.w $xr1, $xr0, 5 +; CHECK-NEXT:    vreplvei.w $vr1, $vr1, 0 +; CHECK-NEXT:    vfrintrp.s $vr1, $vr1 +; CHECK-NEXT:    xvpickve.w $xr2, $xr0, 4 +; CHECK-NEXT:    vreplvei.w $vr2, $vr2, 0 +; CHECK-NEXT:    vfrintrp.s $vr2, $vr2 +; CHECK-NEXT:    vextrins.w $vr2, $vr1, 16 +; CHECK-NEXT:    xvpickve.w $xr1, $xr0, 6 +; CHECK-NEXT:    vreplvei.w $vr1, $vr1, 0 +; CHECK-NEXT:    vfrintrp.s $vr1, $vr1 +; CHECK-NEXT:    vextrins.w $vr2, $vr1, 32 +; CHECK-NEXT:    xvpickve.w $xr1, $xr0, 7 +; CHECK-NEXT:    vreplvei.w $vr1, $vr1, 0 +; CHECK-NEXT:    vfrintrp.s $vr1, $vr1 +; CHECK-NEXT:    vextrins.w $vr2, $vr1, 48 +; CHECK-NEXT:    xvpickve.w $xr1, $xr0, 1 +; CHECK-NEXT:    vreplvei.w $vr1, $vr1, 0 +; CHECK-NEXT:    vfrintrp.s $vr1, $vr1 +; CHECK-NEXT:    xvpickve.w $xr3, $xr0, 0 +; CHECK-NEXT:    vreplvei.w $vr3, $vr3, 0 +; CHECK-NEXT:    vfrintrp.s $vr3, $vr3 +; CHECK-NEXT:    vextrins.w $vr3, $vr1, 16 +; CHECK-NEXT:    xvpickve.w $xr1, $xr0, 2 +; CHECK-NEXT:    vreplvei.w $vr1, $vr1, 0 +; CHECK-NEXT:    vfrintrp.s $vr1, $vr1 +; CHECK-NEXT:    vextrins.w $vr3, $vr1, 32 +; CHECK-NEXT:    xvpickve.w $xr0, $xr0, 3 +; CHECK-NEXT:    vreplvei.w $vr0, $vr0, 0 +; CHECK-NEXT:    vfrintrp.s $vr0, $vr0 +; CHECK-NEXT:    vextrins.w $vr3, $vr0, 48 +; CHECK-NEXT:    xvpermi.q $xr3, $xr2, 2 +; CHECK-NEXT:    xvst $xr3, $a0, 0 +; CHECK-NEXT:    ret +entry: +  %v0 = load <8 x float>, ptr %a0 +  %r = call <8 x float> @llvm.ceil.v8f32(<8 x float> %v0) +  store <8 x float> %r, ptr %res +  ret void +} + +;; ceil +define void @ceil_v4f64(ptr %res, ptr %a0) nounwind { +; CHECK-LABEL: ceil_v4f64: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    xvld $xr0, $a1, 0 +; CHECK-NEXT:    xvpickve.d $xr1, $xr0, 3 +; CHECK-NEXT:    vreplvei.d $vr1, $vr1, 0 +; CHECK-NEXT:    vfrintrp.d $vr1, $vr1 +; CHECK-NEXT:    xvpickve.d $xr2, $xr0, 2 +; CHECK-NEXT:    vreplvei.d $vr2, $vr2, 0 +; CHECK-NEXT:    vfrintrp.d $vr2, $vr2 +; CHECK-NEXT:    vextrins.d $vr2, $vr1, 16 +; CHECK-NEXT:    xvpickve.d $xr1, $xr0, 1 +; CHECK-NEXT:    vreplvei.d $vr1, $vr1, 0 +; CHECK-NEXT:    vfrintrp.d $vr1, $vr1 +; CHECK-NEXT:    xvpickve.d $xr0, $xr0, 0 +; CHECK-NEXT:    vreplvei.d $vr0, $vr0, 0 +; CHECK-NEXT:    vfrintrp.d $vr0, $vr0 +; CHECK-NEXT:    vextrins.d $vr0, $vr1, 16 +; CHECK-NEXT:    xvpermi.q $xr0, $xr2, 2 +; CHECK-NEXT:    xvst $xr0, $a0, 0 +; CHECK-NEXT:    ret +entry: +  %v0 = load <4 x double>, ptr %a0 +  %r = call <4 x double> @llvm.ceil.v4f64(<4 x double> %v0) +  store <4 x double> %r, ptr %res +  ret void +} + +;; floorf +define void @floor_v8f32(ptr %res, ptr %a0) nounwind { +; CHECK-LABEL: floor_v8f32: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    xvld $xr0, $a1, 0 +; CHECK-NEXT:    xvpickve.w $xr1, $xr0, 5 +; CHECK-NEXT:    vreplvei.w $vr1, $vr1, 0 +; CHECK-NEXT:    vfrintrm.s $vr1, $vr1 +; CHECK-NEXT:    xvpickve.w $xr2, $xr0, 4 +; CHECK-NEXT:    vreplvei.w $vr2, $vr2, 0 +; CHECK-NEXT:    vfrintrm.s $vr2, $vr2 +; CHECK-NEXT:    vextrins.w $vr2, $vr1, 16 +; CHECK-NEXT:    xvpickve.w $xr1, $xr0, 6 +; CHECK-NEXT:    vreplvei.w $vr1, $vr1, 0 +; CHECK-NEXT:    vfrintrm.s $vr1, $vr1 +; CHECK-NEXT:    vextrins.w $vr2, $vr1, 32 +; CHECK-NEXT:    xvpickve.w $xr1, $xr0, 7 +; CHECK-NEXT:    vreplvei.w $vr1, $vr1, 0 +; CHECK-NEXT:    vfrintrm.s $vr1, $vr1 +; CHECK-NEXT:    vextrins.w $vr2, $vr1, 48 +; CHECK-NEXT:    xvpickve.w $xr1, $xr0, 1 +; CHECK-NEXT:    vreplvei.w $vr1, $vr1, 0 +; CHECK-NEXT:    vfrintrm.s $vr1, $vr1 +; CHECK-NEXT:    xvpickve.w $xr3, $xr0, 0 +; CHECK-NEXT:    vreplvei.w $vr3, $vr3, 0 +; CHECK-NEXT:    vfrintrm.s $vr3, $vr3 +; CHECK-NEXT:    vextrins.w $vr3, $vr1, 16 +; CHECK-NEXT:    xvpickve.w $xr1, $xr0, 2 +; CHECK-NEXT:    vreplvei.w $vr1, $vr1, 0 +; CHECK-NEXT:    vfrintrm.s $vr1, $vr1 +; CHECK-NEXT:    vextrins.w $vr3, $vr1, 32 +; CHECK-NEXT:    xvpickve.w $xr0, $xr0, 3 +; CHECK-NEXT:    vreplvei.w $vr0, $vr0, 0 +; CHECK-NEXT:    vfrintrm.s $vr0, $vr0 +; CHECK-NEXT:    vextrins.w $vr3, $vr0, 48 +; CHECK-NEXT:    xvpermi.q $xr3, $xr2, 2 +; CHECK-NEXT:    xvst $xr3, $a0, 0 +; CHECK-NEXT:    ret +entry: +  %v0 = load <8 x float>, ptr %a0 +  %r = call <8 x float> @llvm.floor.v8f32(<8 x float> %v0) +  store <8 x float> %r, ptr %res +  ret void +} + +;; floor +define void @floor_v4f64(ptr %res, ptr %a0) nounwind { +; CHECK-LABEL: floor_v4f64: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    xvld $xr0, $a1, 0 +; CHECK-NEXT:    xvpickve.d $xr1, $xr0, 3 +; CHECK-NEXT:    vreplvei.d $vr1, $vr1, 0 +; CHECK-NEXT:    vfrintrm.d $vr1, $vr1 +; CHECK-NEXT:    xvpickve.d $xr2, $xr0, 2 +; CHECK-NEXT:    vreplvei.d $vr2, $vr2, 0 +; CHECK-NEXT:    vfrintrm.d $vr2, $vr2 +; CHECK-NEXT:    vextrins.d $vr2, $vr1, 16 +; CHECK-NEXT:    xvpickve.d $xr1, $xr0, 1 +; CHECK-NEXT:    vreplvei.d $vr1, $vr1, 0 +; CHECK-NEXT:    vfrintrm.d $vr1, $vr1 +; CHECK-NEXT:    xvpickve.d $xr0, $xr0, 0 +; CHECK-NEXT:    vreplvei.d $vr0, $vr0, 0 +; CHECK-NEXT:    vfrintrm.d $vr0, $vr0 +; CHECK-NEXT:    vextrins.d $vr0, $vr1, 16 +; CHECK-NEXT:    xvpermi.q $xr0, $xr2, 2 +; CHECK-NEXT:    xvst $xr0, $a0, 0 +; CHECK-NEXT:    ret +entry: +  %v0 = load <4 x double>, ptr %a0 +  %r = call <4 x double> @llvm.floor.v4f64(<4 x double> %v0) +  store <4 x double> %r, ptr %res +  ret void +} + +;; truncf +define void @trunc_v8f32(ptr %res, ptr %a0) nounwind { +; CHECK-LABEL: trunc_v8f32: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    xvld $xr0, $a1, 0 +; CHECK-NEXT:    xvpickve.w $xr1, $xr0, 5 +; CHECK-NEXT:    vreplvei.w $vr1, $vr1, 0 +; CHECK-NEXT:    vfrintrz.s $vr1, $vr1 +; CHECK-NEXT:    xvpickve.w $xr2, $xr0, 4 +; CHECK-NEXT:    vreplvei.w $vr2, $vr2, 0 +; CHECK-NEXT:    vfrintrz.s $vr2, $vr2 +; CHECK-NEXT:    vextrins.w $vr2, $vr1, 16 +; CHECK-NEXT:    xvpickve.w $xr1, $xr0, 6 +; CHECK-NEXT:    vreplvei.w $vr1, $vr1, 0 +; CHECK-NEXT:    vfrintrz.s $vr1, $vr1 +; CHECK-NEXT:    vextrins.w $vr2, $vr1, 32 +; CHECK-NEXT:    xvpickve.w $xr1, $xr0, 7 +; CHECK-NEXT:    vreplvei.w $vr1, $vr1, 0 +; CHECK-NEXT:    vfrintrz.s $vr1, $vr1 +; CHECK-NEXT:    vextrins.w $vr2, $vr1, 48 +; CHECK-NEXT:    xvpickve.w $xr1, $xr0, 1 +; CHECK-NEXT:    vreplvei.w $vr1, $vr1, 0 +; CHECK-NEXT:    vfrintrz.s $vr1, $vr1 +; CHECK-NEXT:    xvpickve.w $xr3, $xr0, 0 +; CHECK-NEXT:    vreplvei.w $vr3, $vr3, 0 +; CHECK-NEXT:    vfrintrz.s $vr3, $vr3 +; CHECK-NEXT:    vextrins.w $vr3, $vr1, 16 +; CHECK-NEXT:    xvpickve.w $xr1, $xr0, 2 +; CHECK-NEXT:    vreplvei.w $vr1, $vr1, 0 +; CHECK-NEXT:    vfrintrz.s $vr1, $vr1 +; CHECK-NEXT:    vextrins.w $vr3, $vr1, 32 +; CHECK-NEXT:    xvpickve.w $xr0, $xr0, 3 +; CHECK-NEXT:    vreplvei.w $vr0, $vr0, 0 +; CHECK-NEXT:    vfrintrz.s $vr0, $vr0 +; CHECK-NEXT:    vextrins.w $vr3, $vr0, 48 +; CHECK-NEXT:    xvpermi.q $xr3, $xr2, 2 +; CHECK-NEXT:    xvst $xr3, $a0, 0 +; CHECK-NEXT:    ret +entry: +  %v0 = load <8 x float>, ptr %a0 +  %r = call <8 x float> @llvm.trunc.v8f32(<8 x float> %v0) +  store <8 x float> %r, ptr %res +  ret void +} + +;; trunc +define void @trunc_v4f64(ptr %res, ptr %a0) nounwind { +; CHECK-LABEL: trunc_v4f64: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    xvld $xr0, $a1, 0 +; CHECK-NEXT:    xvpickve.d $xr1, $xr0, 3 +; CHECK-NEXT:    vreplvei.d $vr1, $vr1, 0 +; CHECK-NEXT:    vfrintrz.d $vr1, $vr1 +; CHECK-NEXT:    xvpickve.d $xr2, $xr0, 2 +; CHECK-NEXT:    vreplvei.d $vr2, $vr2, 0 +; CHECK-NEXT:    vfrintrz.d $vr2, $vr2 +; CHECK-NEXT:    vextrins.d $vr2, $vr1, 16 +; CHECK-NEXT:    xvpickve.d $xr1, $xr0, 1 +; CHECK-NEXT:    vreplvei.d $vr1, $vr1, 0 +; CHECK-NEXT:    vfrintrz.d $vr1, $vr1 +; CHECK-NEXT:    xvpickve.d $xr0, $xr0, 0 +; CHECK-NEXT:    vreplvei.d $vr0, $vr0, 0 +; CHECK-NEXT:    vfrintrz.d $vr0, $vr0 +; CHECK-NEXT:    vextrins.d $vr0, $vr1, 16 +; CHECK-NEXT:    xvpermi.q $xr0, $xr2, 2 +; CHECK-NEXT:    xvst $xr0, $a0, 0 +; CHECK-NEXT:    ret +entry: +  %v0 = load <4 x double>, ptr %a0 +  %r = call <4 x double> @llvm.trunc.v4f64(<4 x double> %v0) +  store <4 x double> %r, ptr %res +  ret void +} + +;; roundevenf +define void @roundeven_v8f32(ptr %res, ptr %a0) nounwind { +; CHECK-LABEL: roundeven_v8f32: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    xvld $xr0, $a1, 0 +; CHECK-NEXT:    xvpickve.w $xr1, $xr0, 5 +; CHECK-NEXT:    vreplvei.w $vr1, $vr1, 0 +; CHECK-NEXT:    vfrintrne.s $vr1, $vr1 +; CHECK-NEXT:    xvpickve.w $xr2, $xr0, 4 +; CHECK-NEXT:    vreplvei.w $vr2, $vr2, 0 +; CHECK-NEXT:    vfrintrne.s $vr2, $vr2 +; CHECK-NEXT:    vextrins.w $vr2, $vr1, 16 +; CHECK-NEXT:    xvpickve.w $xr1, $xr0, 6 +; CHECK-NEXT:    vreplvei.w $vr1, $vr1, 0 +; CHECK-NEXT:    vfrintrne.s $vr1, $vr1 +; CHECK-NEXT:    vextrins.w $vr2, $vr1, 32 +; CHECK-NEXT:    xvpickve.w $xr1, $xr0, 7 +; CHECK-NEXT:    vreplvei.w $vr1, $vr1, 0 +; CHECK-NEXT:    vfrintrne.s $vr1, $vr1 +; CHECK-NEXT:    vextrins.w $vr2, $vr1, 48 +; CHECK-NEXT:    xvpickve.w $xr1, $xr0, 1 +; CHECK-NEXT:    vreplvei.w $vr1, $vr1, 0 +; CHECK-NEXT:    vfrintrne.s $vr1, $vr1 +; CHECK-NEXT:    xvpickve.w $xr3, $xr0, 0 +; CHECK-NEXT:    vreplvei.w $vr3, $vr3, 0 +; CHECK-NEXT:    vfrintrne.s $vr3, $vr3 +; CHECK-NEXT:    vextrins.w $vr3, $vr1, 16 +; CHECK-NEXT:    xvpickve.w $xr1, $xr0, 2 +; CHECK-NEXT:    vreplvei.w $vr1, $vr1, 0 +; CHECK-NEXT:    vfrintrne.s $vr1, $vr1 +; CHECK-NEXT:    vextrins.w $vr3, $vr1, 32 +; CHECK-NEXT:    xvpickve.w $xr0, $xr0, 3 +; CHECK-NEXT:    vreplvei.w $vr0, $vr0, 0 +; CHECK-NEXT:    vfrintrne.s $vr0, $vr0 +; CHECK-NEXT:    vextrins.w $vr3, $vr0, 48 +; CHECK-NEXT:    xvpermi.q $xr3, $xr2, 2 +; CHECK-NEXT:    xvst $xr3, $a0, 0 +; CHECK-NEXT:    ret +entry: +  %v0 = load <8 x float>, ptr %a0 +  %r = call <8 x float> @llvm.roundeven.v8f32(<8 x float> %v0) +  store <8 x float> %r, ptr %res +  ret void +} + +;; roundeven +define void @roundeven_v4f64(ptr %res, ptr %a0) nounwind { +; CHECK-LABEL: roundeven_v4f64: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    xvld $xr0, $a1, 0 +; CHECK-NEXT:    xvpickve.d $xr1, $xr0, 3 +; CHECK-NEXT:    vreplvei.d $vr1, $vr1, 0 +; CHECK-NEXT:    vfrintrne.d $vr1, $vr1 +; CHECK-NEXT:    xvpickve.d $xr2, $xr0, 2 +; CHECK-NEXT:    vreplvei.d $vr2, $vr2, 0 +; CHECK-NEXT:    vfrintrne.d $vr2, $vr2 +; CHECK-NEXT:    vextrins.d $vr2, $vr1, 16 +; CHECK-NEXT:    xvpickve.d $xr1, $xr0, 1 +; CHECK-NEXT:    vreplvei.d $vr1, $vr1, 0 +; CHECK-NEXT:    vfrintrne.d $vr1, $vr1 +; CHECK-NEXT:    xvpickve.d $xr0, $xr0, 0 +; CHECK-NEXT:    vreplvei.d $vr0, $vr0, 0 +; CHECK-NEXT:    vfrintrne.d $vr0, $vr0 +; CHECK-NEXT:    vextrins.d $vr0, $vr1, 16 +; CHECK-NEXT:    xvpermi.q $xr0, $xr2, 2 +; CHECK-NEXT:    xvst $xr0, $a0, 0 +; CHECK-NEXT:    ret +entry: +  %v0 = load <4 x double>, ptr %a0 +  %r = call <4 x double> @llvm.roundeven.v4f64(<4 x double> %v0) +  store <4 x double> %r, ptr %res +  ret void +} + +declare <8 x float> @llvm.ceil.v8f32(<8 x float>) +declare <4 x double> @llvm.ceil.v4f64(<4 x double>) +declare <8 x float> @llvm.floor.v8f32(<8 x float>) +declare <4 x double> @llvm.floor.v4f64(<4 x double>) +declare <8 x float> @llvm.trunc.v8f32(<8 x float>) +declare <4 x double> @llvm.trunc.v4f64(<4 x double>) +declare <8 x float> @llvm.roundeven.v8f32(<8 x float>) +declare <4 x double> @llvm.roundeven.v4f64(<4 x double>) diff --git a/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/avg.ll b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/avg.ll new file mode 100644 index 0000000..5c5c199 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/avg.ll @@ -0,0 +1,321 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6 +; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lasx < %s | FileCheck %s --check-prefixes=CHECK,LA32 +; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s --check-prefixes=CHECK,LA64 + +define void @xvavg_b(ptr %res, ptr %a, ptr %b) nounwind { +; CHECK-LABEL: xvavg_b: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    xvld $xr0, $a1, 0 +; CHECK-NEXT:    xvld $xr1, $a2, 0 +; CHECK-NEXT:    xvavg.b $xr0, $xr0, $xr1 +; CHECK-NEXT:    xvst $xr0, $a0, 0 +; CHECK-NEXT:    ret +entry: +  %va = load <32 x i8>, ptr %a +  %vb = load <32 x i8>, ptr %b +  %add = add <32 x i8> %va, %vb +  %shr = ashr <32 x i8> %add, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1> +  store <32 x i8> %shr, ptr %res +  ret void +} + +define void @xvavg_h(ptr %res, ptr %a, ptr %b) nounwind { +; CHECK-LABEL: xvavg_h: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    xvld $xr0, $a1, 0 +; CHECK-NEXT:    xvld $xr1, $a2, 0 +; CHECK-NEXT:    xvavg.h $xr0, $xr0, $xr1 +; CHECK-NEXT:    xvst $xr0, $a0, 0 +; CHECK-NEXT:    ret +entry: +  %va = load <16 x i16>, ptr %a +  %vb = load <16 x i16>, ptr %b +  %add = add <16 x i16> %va, %vb +  %shr = ashr <16 x i16> %add, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1> +  store <16 x i16> %shr, ptr %res +  ret void +} + +define void @xvavg_w(ptr %res, ptr %a, ptr %b) nounwind { +; CHECK-LABEL: xvavg_w: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    xvld $xr0, $a1, 0 +; CHECK-NEXT:    xvld $xr1, $a2, 0 +; CHECK-NEXT:    xvavg.w $xr0, $xr0, $xr1 +; CHECK-NEXT:    xvst $xr0, $a0, 0 +; CHECK-NEXT:    ret +entry: +  %va = load <8 x i32>, ptr %a +  %vb = load <8 x i32>, ptr %b +  %add = add <8 x i32> %va, %vb +  %shr = ashr <8 x i32> %add, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1> +  store <8 x i32> %shr, ptr %res +  ret void +} + +define void @xvavg_d(ptr %res, ptr %a, ptr %b) nounwind { +; LA32-LABEL: xvavg_d: +; LA32:       # %bb.0: # %entry +; LA32-NEXT:    xvld $xr0, $a1, 0 +; LA32-NEXT:    xvld $xr1, $a2, 0 +; LA32-NEXT:    xvadd.d $xr0, $xr0, $xr1 +; LA32-NEXT:    xvsrai.d $xr0, $xr0, 1 +; LA32-NEXT:    xvst $xr0, $a0, 0 +; LA32-NEXT:    ret +; +; LA64-LABEL: xvavg_d: +; LA64:       # %bb.0: # %entry +; LA64-NEXT:    xvld $xr0, $a1, 0 +; LA64-NEXT:    xvld $xr1, $a2, 0 +; LA64-NEXT:    xvavg.d $xr0, $xr0, $xr1 +; LA64-NEXT:    xvst $xr0, $a0, 0 +; LA64-NEXT:    ret +entry: +  %va = load <4 x i64>, ptr %a +  %vb = load <4 x i64>, ptr %b +  %add = add <4 x i64> %va, %vb +  %shr = ashr <4 x i64> %add, <i64 1, i64 1, i64 1, i64 1> +  store <4 x i64> %shr, ptr %res +  ret void +} + +define void @xvavg_bu(ptr %res, ptr %a, ptr %b) nounwind { +; CHECK-LABEL: xvavg_bu: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    xvld $xr0, $a1, 0 +; CHECK-NEXT:    xvld $xr1, $a2, 0 +; CHECK-NEXT:    xvavg.bu $xr0, $xr0, $xr1 +; CHECK-NEXT:    xvst $xr0, $a0, 0 +; CHECK-NEXT:    ret +entry: +  %va = load <32 x i8>, ptr %a +  %vb = load <32 x i8>, ptr %b +  %add = add <32 x i8> %va, %vb +  %shr = lshr <32 x i8> %add, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1> +  store <32 x i8> %shr, ptr %res +  ret void +} + +define void @xvavg_hu(ptr %res, ptr %a, ptr %b) nounwind { +; CHECK-LABEL: xvavg_hu: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    xvld $xr0, $a1, 0 +; CHECK-NEXT:    xvld $xr1, $a2, 0 +; CHECK-NEXT:    xvavg.hu $xr0, $xr0, $xr1 +; CHECK-NEXT:    xvst $xr0, $a0, 0 +; CHECK-NEXT:    ret +entry: +  %va = load <16 x i16>, ptr %a +  %vb = load <16 x i16>, ptr %b +  %add = add <16 x i16> %va, %vb +  %shr = lshr <16 x i16> %add, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1> +  store <16 x i16> %shr, ptr %res +  ret void +} + +define void @xvavg_wu(ptr %res, ptr %a, ptr %b) nounwind { +; CHECK-LABEL: xvavg_wu: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    xvld $xr0, $a1, 0 +; CHECK-NEXT:    xvld $xr1, $a2, 0 +; CHECK-NEXT:    xvavg.wu $xr0, $xr0, $xr1 +; CHECK-NEXT:    xvst $xr0, $a0, 0 +; CHECK-NEXT:    ret +entry: +  %va = load <8 x i32>, ptr %a +  %vb = load <8 x i32>, ptr %b +  %add = add <8 x i32> %va, %vb +  %shr = lshr <8 x i32> %add, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1> +  store <8 x i32> %shr, ptr %res +  ret void +} + +define void @xvavg_du(ptr %res, ptr %a, ptr %b) nounwind { +; LA32-LABEL: xvavg_du: +; LA32:       # %bb.0: # %entry +; LA32-NEXT:    xvld $xr0, $a1, 0 +; LA32-NEXT:    xvld $xr1, $a2, 0 +; LA32-NEXT:    xvadd.d $xr0, $xr0, $xr1 +; LA32-NEXT:    xvsrli.d $xr0, $xr0, 1 +; LA32-NEXT:    xvst $xr0, $a0, 0 +; LA32-NEXT:    ret +; +; LA64-LABEL: xvavg_du: +; LA64:       # %bb.0: # %entry +; LA64-NEXT:    xvld $xr0, $a1, 0 +; LA64-NEXT:    xvld $xr1, $a2, 0 +; LA64-NEXT:    xvavg.du $xr0, $xr0, $xr1 +; LA64-NEXT:    xvst $xr0, $a0, 0 +; LA64-NEXT:    ret +entry: +  %va = load <4 x i64>, ptr %a +  %vb = load <4 x i64>, ptr %b +  %add = add <4 x i64> %va, %vb +  %shr = lshr <4 x i64> %add, <i64 1, i64 1, i64 1, i64 1> +  store <4 x i64> %shr, ptr %res +  ret void +} + +define void @xvavgr_b(ptr %res, ptr %a, ptr %b) nounwind { +; CHECK-LABEL: xvavgr_b: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    xvld $xr0, $a1, 0 +; CHECK-NEXT:    xvld $xr1, $a2, 0 +; CHECK-NEXT:    xvavgr.b $xr0, $xr0, $xr1 +; CHECK-NEXT:    xvst $xr0, $a0, 0 +; CHECK-NEXT:    ret +entry: +  %va = load <32 x i8>, ptr %a +  %vb = load <32 x i8>, ptr %b +  %add = add <32 x i8> %va, %vb +  %add1 = add <32 x i8> %add, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1> +  %shr = ashr <32 x i8> %add1, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1> +  store <32 x i8> %shr, ptr %res +  ret void +} + +define void @xvavgr_h(ptr %res, ptr %a, ptr %b) nounwind { +; CHECK-LABEL: xvavgr_h: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    xvld $xr0, $a1, 0 +; CHECK-NEXT:    xvld $xr1, $a2, 0 +; CHECK-NEXT:    xvavgr.h $xr0, $xr0, $xr1 +; CHECK-NEXT:    xvst $xr0, $a0, 0 +; CHECK-NEXT:    ret +entry: +  %va = load <16 x i16>, ptr %a +  %vb = load <16 x i16>, ptr %b +  %add = add <16 x i16> %va, %vb +  %add1 = add <16 x i16> %add, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1> +  %shr = ashr <16 x i16> %add1, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1> +  store <16 x i16> %shr, ptr %res +  ret void +} + +define void @xvavgr_w(ptr %res, ptr %a, ptr %b) nounwind { +; CHECK-LABEL: xvavgr_w: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    xvld $xr0, $a1, 0 +; CHECK-NEXT:    xvld $xr1, $a2, 0 +; CHECK-NEXT:    xvavgr.w $xr0, $xr0, $xr1 +; CHECK-NEXT:    xvst $xr0, $a0, 0 +; CHECK-NEXT:    ret +entry: +  %va = load <8 x i32>, ptr %a +  %vb = load <8 x i32>, ptr %b +  %add = add <8 x i32> %va, %vb +  %add1 = add <8 x i32> %add, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1> +  %shr = ashr <8 x i32> %add1, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1> +  store <8 x i32> %shr, ptr %res +  ret void +} + +define void @xvavgr_d(ptr %res, ptr %a, ptr %b) nounwind { +; LA32-LABEL: xvavgr_d: +; LA32:       # %bb.0: # %entry +; LA32-NEXT:    xvld $xr0, $a1, 0 +; LA32-NEXT:    xvld $xr1, $a2, 0 +; LA32-NEXT:    xvadd.d $xr0, $xr0, $xr1 +; LA32-NEXT:    xvaddi.du $xr0, $xr0, 1 +; LA32-NEXT:    xvsrai.d $xr0, $xr0, 1 +; LA32-NEXT:    xvst $xr0, $a0, 0 +; LA32-NEXT:    ret +; +; LA64-LABEL: xvavgr_d: +; LA64:       # %bb.0: # %entry +; LA64-NEXT:    xvld $xr0, $a1, 0 +; LA64-NEXT:    xvld $xr1, $a2, 0 +; LA64-NEXT:    xvavgr.d $xr0, $xr0, $xr1 +; LA64-NEXT:    xvst $xr0, $a0, 0 +; LA64-NEXT:    ret +entry: +  %va = load <4 x i64>, ptr %a +  %vb = load <4 x i64>, ptr %b +  %add = add <4 x i64> %va, %vb +  %add1 = add <4 x i64> %add, <i64 1, i64 1, i64 1, i64 1> +  %shr = ashr <4 x i64> %add1, <i64 1, i64 1, i64 1, i64 1> +  store <4 x i64> %shr, ptr %res +  ret void +} + +define void @xvavgr_bu(ptr %res, ptr %a, ptr %b) nounwind { +; CHECK-LABEL: xvavgr_bu: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    xvld $xr0, $a1, 0 +; CHECK-NEXT:    xvld $xr1, $a2, 0 +; CHECK-NEXT:    xvavgr.bu $xr0, $xr0, $xr1 +; CHECK-NEXT:    xvst $xr0, $a0, 0 +; CHECK-NEXT:    ret +entry: +  %va = load <32 x i8>, ptr %a +  %vb = load <32 x i8>, ptr %b +  %add = add <32 x i8> %va, %vb +  %add1 = add <32 x i8> %add, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1> +  %shr = lshr <32 x i8> %add1, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1> +  store <32 x i8> %shr, ptr %res +  ret void +} + +define void @xvavgr_hu(ptr %res, ptr %a, ptr %b) nounwind { +; CHECK-LABEL: xvavgr_hu: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    xvld $xr0, $a1, 0 +; CHECK-NEXT:    xvld $xr1, $a2, 0 +; CHECK-NEXT:    xvavgr.hu $xr0, $xr0, $xr1 +; CHECK-NEXT:    xvst $xr0, $a0, 0 +; CHECK-NEXT:    ret +entry: +  %va = load <16 x i16>, ptr %a +  %vb = load <16 x i16>, ptr %b +  %add = add <16 x i16> %va, %vb +  %add1 = add <16 x i16> %add, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1> +  %shr = lshr <16 x i16> %add1, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1> +  store <16 x i16> %shr, ptr %res +  ret void +} + +define void @xvavgr_wu(ptr %res, ptr %a, ptr %b) nounwind { +; CHECK-LABEL: xvavgr_wu: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    xvld $xr0, $a1, 0 +; CHECK-NEXT:    xvld $xr1, $a2, 0 +; CHECK-NEXT:    xvavgr.wu $xr0, $xr0, $xr1 +; CHECK-NEXT:    xvst $xr0, $a0, 0 +; CHECK-NEXT:    ret +entry: +  %va = load <8 x i32>, ptr %a +  %vb = load <8 x i32>, ptr %b +  %add = add <8 x i32> %va, %vb +  %add1 = add <8 x i32> %add, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1> +  %shr = lshr <8 x i32> %add1, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1> +  store <8 x i32> %shr, ptr %res +  ret void +} + +define void @xvavgr_du(ptr %res, ptr %a, ptr %b) nounwind { +; LA32-LABEL: xvavgr_du: +; LA32:       # %bb.0: # %entry +; LA32-NEXT:    xvld $xr0, $a1, 0 +; LA32-NEXT:    xvld $xr1, $a2, 0 +; LA32-NEXT:    xvadd.d $xr0, $xr0, $xr1 +; LA32-NEXT:    xvaddi.du $xr0, $xr0, 1 +; LA32-NEXT:    xvsrli.d $xr0, $xr0, 1 +; LA32-NEXT:    xvst $xr0, $a0, 0 +; LA32-NEXT:    ret +; +; LA64-LABEL: xvavgr_du: +; LA64:       # %bb.0: # %entry +; LA64-NEXT:    xvld $xr0, $a1, 0 +; LA64-NEXT:    xvld $xr1, $a2, 0 +; LA64-NEXT:    xvavgr.du $xr0, $xr0, $xr1 +; LA64-NEXT:    xvst $xr0, $a0, 0 +; LA64-NEXT:    ret +entry: +  %va = load <4 x i64>, ptr %a +  %vb = load <4 x i64>, ptr %b +  %add = add <4 x i64> %va, %vb +  %add1 = add <4 x i64> %add, <i64 1, i64 1, i64 1, i64 1> +  %shr = lshr <4 x i64> %add1, <i64 1, i64 1, i64 1, i64 1> +  store <4 x i64> %shr, ptr %res +  ret void +} diff --git a/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/avgfloor-ceil.ll b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/avgfloor-ceil.ll new file mode 100644 index 0000000..c82adcb --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/avgfloor-ceil.ll @@ -0,0 +1,379 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6 +; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lasx < %s | FileCheck %s +; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s + +define void @xvavg_b(ptr %res, ptr %a, ptr %b) nounwind { +; CHECK-LABEL: xvavg_b: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    xvld $xr0, $a1, 0 +; CHECK-NEXT:    xvld $xr1, $a2, 0 +; CHECK-NEXT:    xvand.v $xr2, $xr0, $xr1 +; CHECK-NEXT:    xvxor.v $xr0, $xr0, $xr1 +; CHECK-NEXT:    xvsrai.b $xr0, $xr0, 1 +; CHECK-NEXT:    xvadd.b $xr0, $xr2, $xr0 +; CHECK-NEXT:    xvst $xr0, $a0, 0 +; CHECK-NEXT:    ret +entry: +  %va = load <32 x i8>, ptr %a +  %vb = load <32 x i8>, ptr %b +  %ea = sext <32 x i8> %va to <32 x i16> +  %eb = sext <32 x i8> %vb to <32 x i16> +  %add = add <32 x i16> %ea, %eb +  %shr = lshr <32 x i16> %add, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1> +  %r = trunc <32 x i16> %shr to <32 x i8> +  store <32 x i8> %r, ptr %res +  ret void +} + +define void @xvavg_h(ptr %res, ptr %a, ptr %b) nounwind { +; CHECK-LABEL: xvavg_h: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    xvld $xr0, $a1, 0 +; CHECK-NEXT:    xvld $xr1, $a2, 0 +; CHECK-NEXT:    xvand.v $xr2, $xr0, $xr1 +; CHECK-NEXT:    xvxor.v $xr0, $xr0, $xr1 +; CHECK-NEXT:    xvsrai.h $xr0, $xr0, 1 +; CHECK-NEXT:    xvadd.h $xr0, $xr2, $xr0 +; CHECK-NEXT:    xvst $xr0, $a0, 0 +; CHECK-NEXT:    ret +entry: +  %va = load <16 x i16>, ptr %a +  %vb = load <16 x i16>, ptr %b +  %ea = sext <16 x i16> %va to <16 x i32> +  %eb = sext <16 x i16> %vb to <16 x i32> +  %add = add <16 x i32> %ea, %eb +  %shr = lshr <16 x i32> %add, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1> +  %r = trunc <16 x i32> %shr to <16 x i16> +  store <16 x i16> %r, ptr %res +  ret void +} + +define void @xvavg_w(ptr %res, ptr %a, ptr %b) nounwind { +; CHECK-LABEL: xvavg_w: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    xvld $xr0, $a1, 0 +; CHECK-NEXT:    xvld $xr1, $a2, 0 +; CHECK-NEXT:    xvand.v $xr2, $xr0, $xr1 +; CHECK-NEXT:    xvxor.v $xr0, $xr0, $xr1 +; CHECK-NEXT:    xvsrai.w $xr0, $xr0, 1 +; CHECK-NEXT:    xvadd.w $xr0, $xr2, $xr0 +; CHECK-NEXT:    xvst $xr0, $a0, 0 +; CHECK-NEXT:    ret +entry: +  %va = load <8 x i32>, ptr %a +  %vb = load <8 x i32>, ptr %b +  %ea = sext <8 x i32> %va to <8 x i64> +  %eb = sext <8 x i32> %vb to <8 x i64> +  %add = add <8 x i64> %ea, %eb +  %shr = lshr <8 x i64> %add, <i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1> +  %r = trunc <8 x i64> %shr to <8 x i32> +  store <8 x i32> %r, ptr %res +  ret void +} + +define void @xvavg_d(ptr %res, ptr %a, ptr %b) nounwind { +; CHECK-LABEL: xvavg_d: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    xvld $xr0, $a1, 0 +; CHECK-NEXT:    xvld $xr1, $a2, 0 +; CHECK-NEXT:    xvand.v $xr2, $xr0, $xr1 +; CHECK-NEXT:    xvxor.v $xr0, $xr0, $xr1 +; CHECK-NEXT:    xvsrai.d $xr0, $xr0, 1 +; CHECK-NEXT:    xvadd.d $xr0, $xr2, $xr0 +; CHECK-NEXT:    xvst $xr0, $a0, 0 +; CHECK-NEXT:    ret +entry: +  %va = load <4 x i64>, ptr %a +  %vb = load <4 x i64>, ptr %b +  %ea = sext <4 x i64> %va to <4 x i128> +  %eb = sext <4 x i64> %vb to <4 x i128> +  %add = add <4 x i128> %ea, %eb +  %shr = lshr <4 x i128> %add, <i128 1, i128 1, i128 1, i128 1> +  %r = trunc <4 x i128> %shr to <4 x i64> +  store <4 x i64> %r, ptr %res +  ret void +} + +define void @xvavg_bu(ptr %res, ptr %a, ptr %b) nounwind { +; CHECK-LABEL: xvavg_bu: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    xvld $xr0, $a1, 0 +; CHECK-NEXT:    xvld $xr1, $a2, 0 +; CHECK-NEXT:    xvand.v $xr2, $xr0, $xr1 +; CHECK-NEXT:    xvxor.v $xr0, $xr0, $xr1 +; CHECK-NEXT:    xvsrli.b $xr0, $xr0, 1 +; CHECK-NEXT:    xvadd.b $xr0, $xr2, $xr0 +; CHECK-NEXT:    xvst $xr0, $a0, 0 +; CHECK-NEXT:    ret +entry: +  %va = load <32 x i8>, ptr %a +  %vb = load <32 x i8>, ptr %b +  %ea = zext <32 x i8> %va to <32 x i16> +  %eb = zext <32 x i8> %vb to <32 x i16> +  %add = add <32 x i16> %ea, %eb +  %shr = lshr <32 x i16> %add, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1> +  %r = trunc <32 x i16> %shr to <32 x i8> +  store <32 x i8> %r, ptr %res +  ret void +} + +define void @xvavg_hu(ptr %res, ptr %a, ptr %b) nounwind { +; CHECK-LABEL: xvavg_hu: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    xvld $xr0, $a1, 0 +; CHECK-NEXT:    xvld $xr1, $a2, 0 +; CHECK-NEXT:    xvand.v $xr2, $xr0, $xr1 +; CHECK-NEXT:    xvxor.v $xr0, $xr0, $xr1 +; CHECK-NEXT:    xvsrli.h $xr0, $xr0, 1 +; CHECK-NEXT:    xvadd.h $xr0, $xr2, $xr0 +; CHECK-NEXT:    xvst $xr0, $a0, 0 +; CHECK-NEXT:    ret +entry: +  %va = load <16 x i16>, ptr %a +  %vb = load <16 x i16>, ptr %b +  %ea = zext <16 x i16> %va to <16 x i32> +  %eb = zext <16 x i16> %vb to <16 x i32> +  %add = add <16 x i32> %ea, %eb +  %shr = lshr <16 x i32> %add, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1> +  %r = trunc <16 x i32> %shr to <16 x i16> +  store <16 x i16> %r, ptr %res +  ret void +} + +define void @xvavg_wu(ptr %res, ptr %a, ptr %b) nounwind { +; CHECK-LABEL: xvavg_wu: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    xvld $xr0, $a1, 0 +; CHECK-NEXT:    xvld $xr1, $a2, 0 +; CHECK-NEXT:    xvand.v $xr2, $xr0, $xr1 +; CHECK-NEXT:    xvxor.v $xr0, $xr0, $xr1 +; CHECK-NEXT:    xvsrli.w $xr0, $xr0, 1 +; CHECK-NEXT:    xvadd.w $xr0, $xr2, $xr0 +; CHECK-NEXT:    xvst $xr0, $a0, 0 +; CHECK-NEXT:    ret +entry: +  %va = load <8 x i32>, ptr %a +  %vb = load <8 x i32>, ptr %b +  %ea = zext <8 x i32> %va to <8 x i64> +  %eb = zext <8 x i32> %vb to <8 x i64> +  %add = add <8 x i64> %ea, %eb +  %shr = lshr <8 x i64> %add, <i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1> +  %r = trunc <8 x i64> %shr to <8 x i32> +  store <8 x i32> %r, ptr %res +  ret void +} + +define void @xvavg_du(ptr %res, ptr %a, ptr %b) nounwind { +; CHECK-LABEL: xvavg_du: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    xvld $xr0, $a1, 0 +; CHECK-NEXT:    xvld $xr1, $a2, 0 +; CHECK-NEXT:    xvand.v $xr2, $xr0, $xr1 +; CHECK-NEXT:    xvxor.v $xr0, $xr0, $xr1 +; CHECK-NEXT:    xvsrli.d $xr0, $xr0, 1 +; CHECK-NEXT:    xvadd.d $xr0, $xr2, $xr0 +; CHECK-NEXT:    xvst $xr0, $a0, 0 +; CHECK-NEXT:    ret +entry: +  %va = load <4 x i64>, ptr %a +  %vb = load <4 x i64>, ptr %b +  %ea = zext <4 x i64> %va to <4 x i128> +  %eb = zext <4 x i64> %vb to <4 x i128> +  %add = add <4 x i128> %ea, %eb +  %shr = lshr <4 x i128> %add, <i128 1, i128 1, i128 1, i128 1> +  %r = trunc <4 x i128> %shr to <4 x i64> +  store <4 x i64> %r, ptr %res +  ret void +} + +define void @xvavgr_b(ptr %res, ptr %a, ptr %b) nounwind { +; CHECK-LABEL: xvavgr_b: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    xvld $xr0, $a1, 0 +; CHECK-NEXT:    xvld $xr1, $a2, 0 +; CHECK-NEXT:    xvor.v $xr2, $xr0, $xr1 +; CHECK-NEXT:    xvxor.v $xr0, $xr0, $xr1 +; CHECK-NEXT:    xvsrai.b $xr0, $xr0, 1 +; CHECK-NEXT:    xvsub.b $xr0, $xr2, $xr0 +; CHECK-NEXT:    xvst $xr0, $a0, 0 +; CHECK-NEXT:    ret +entry: +  %va = load <32 x i8>, ptr %a +  %vb = load <32 x i8>, ptr %b +  %ea = sext <32 x i8> %va to <32 x i16> +  %eb = sext <32 x i8> %vb to <32 x i16> +  %add = add <32 x i16> %ea, %eb +  %add1 = add <32 x i16> %add, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1> +  %shr = lshr <32 x i16> %add1, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1> +  %r = trunc <32 x i16> %shr to <32 x i8> +  store <32 x i8> %r, ptr %res +  ret void +} + +define void @xvavgr_h(ptr %res, ptr %a, ptr %b) nounwind { +; CHECK-LABEL: xvavgr_h: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    xvld $xr0, $a1, 0 +; CHECK-NEXT:    xvld $xr1, $a2, 0 +; CHECK-NEXT:    xvor.v $xr2, $xr0, $xr1 +; CHECK-NEXT:    xvxor.v $xr0, $xr0, $xr1 +; CHECK-NEXT:    xvsrai.h $xr0, $xr0, 1 +; CHECK-NEXT:    xvsub.h $xr0, $xr2, $xr0 +; CHECK-NEXT:    xvst $xr0, $a0, 0 +; CHECK-NEXT:    ret +entry: +  %va = load <16 x i16>, ptr %a +  %vb = load <16 x i16>, ptr %b +  %ea = sext <16 x i16> %va to <16 x i32> +  %eb = sext <16 x i16> %vb to <16 x i32> +  %add = add <16 x i32> %ea, %eb +  %add1 = add <16 x i32> %add, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1> +  %shr = lshr <16 x i32> %add1, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1> +  %r = trunc <16 x i32> %shr to <16 x i16> +  store <16 x i16> %r, ptr %res +  ret void +} + +define void @xvavgr_w(ptr %res, ptr %a, ptr %b) nounwind { +; CHECK-LABEL: xvavgr_w: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    xvld $xr0, $a1, 0 +; CHECK-NEXT:    xvld $xr1, $a2, 0 +; CHECK-NEXT:    xvor.v $xr2, $xr0, $xr1 +; CHECK-NEXT:    xvxor.v $xr0, $xr0, $xr1 +; CHECK-NEXT:    xvsrai.w $xr0, $xr0, 1 +; CHECK-NEXT:    xvsub.w $xr0, $xr2, $xr0 +; CHECK-NEXT:    xvst $xr0, $a0, 0 +; CHECK-NEXT:    ret +entry: +  %va = load <8 x i32>, ptr %a +  %vb = load <8 x i32>, ptr %b +  %ea = sext <8 x i32> %va to <8 x i64> +  %eb = sext <8 x i32> %vb to <8 x i64> +  %add = add <8 x i64> %ea, %eb +  %add1 = add <8 x i64> %add, <i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1> +  %shr = lshr <8 x i64> %add1, <i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1> +  %r = trunc <8 x i64> %shr to <8 x i32> +  store <8 x i32> %r, ptr %res +  ret void +} + +define void @xvavgr_d(ptr %res, ptr %a, ptr %b) nounwind { +; CHECK-LABEL: xvavgr_d: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    xvld $xr0, $a1, 0 +; CHECK-NEXT:    xvld $xr1, $a2, 0 +; CHECK-NEXT:    xvor.v $xr2, $xr0, $xr1 +; CHECK-NEXT:    xvxor.v $xr0, $xr0, $xr1 +; CHECK-NEXT:    xvsrai.d $xr0, $xr0, 1 +; CHECK-NEXT:    xvsub.d $xr0, $xr2, $xr0 +; CHECK-NEXT:    xvst $xr0, $a0, 0 +; CHECK-NEXT:    ret +entry: +  %va = load <4 x i64>, ptr %a +  %vb = load <4 x i64>, ptr %b +  %ea = sext <4 x i64> %va to <4 x i128> +  %eb = sext <4 x i64> %vb to <4 x i128> +  %add = add <4 x i128> %ea, %eb +  %add1 = add <4 x i128> %add, <i128 1, i128 1, i128 1, i128 1> +  %shr = lshr <4 x i128> %add1, <i128 1, i128 1, i128 1, i128 1> +  %r = trunc <4 x i128> %shr to <4 x i64> +  store <4 x i64> %r, ptr %res +  ret void +} + +define void @xvavgr_bu(ptr %res, ptr %a, ptr %b) nounwind { +; CHECK-LABEL: xvavgr_bu: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    xvld $xr0, $a1, 0 +; CHECK-NEXT:    xvld $xr1, $a2, 0 +; CHECK-NEXT:    xvor.v $xr2, $xr0, $xr1 +; CHECK-NEXT:    xvxor.v $xr0, $xr0, $xr1 +; CHECK-NEXT:    xvsrli.b $xr0, $xr0, 1 +; CHECK-NEXT:    xvsub.b $xr0, $xr2, $xr0 +; CHECK-NEXT:    xvst $xr0, $a0, 0 +; CHECK-NEXT:    ret +entry: +  %va = load <32 x i8>, ptr %a +  %vb = load <32 x i8>, ptr %b +  %ea = zext <32 x i8> %va to <32 x i16> +  %eb = zext <32 x i8> %vb to <32 x i16> +  %add = add <32 x i16> %ea, %eb +  %add1 = add <32 x i16> %add, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1> +  %shr = lshr <32 x i16> %add1, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1> +  %r = trunc <32 x i16> %shr to <32 x i8> +  store <32 x i8> %r, ptr %res +  ret void +} + +define void @xvavgr_hu(ptr %res, ptr %a, ptr %b) nounwind { +; CHECK-LABEL: xvavgr_hu: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    xvld $xr0, $a1, 0 +; CHECK-NEXT:    xvld $xr1, $a2, 0 +; CHECK-NEXT:    xvor.v $xr2, $xr0, $xr1 +; CHECK-NEXT:    xvxor.v $xr0, $xr0, $xr1 +; CHECK-NEXT:    xvsrli.h $xr0, $xr0, 1 +; CHECK-NEXT:    xvsub.h $xr0, $xr2, $xr0 +; CHECK-NEXT:    xvst $xr0, $a0, 0 +; CHECK-NEXT:    ret +entry: +  %va = load <16 x i16>, ptr %a +  %vb = load <16 x i16>, ptr %b +  %ea = zext <16 x i16> %va to <16 x i32> +  %eb = zext <16 x i16> %vb to <16 x i32> +  %add = add <16 x i32> %ea, %eb +  %add1 = add <16 x i32> %add, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1> +  %shr = lshr <16 x i32> %add1, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1> +  %r = trunc <16 x i32> %shr to <16 x i16> +  store <16 x i16> %r, ptr %res +  ret void +} + +define void @xvavgr_wu(ptr %res, ptr %a, ptr %b) nounwind { +; CHECK-LABEL: xvavgr_wu: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    xvld $xr0, $a1, 0 +; CHECK-NEXT:    xvld $xr1, $a2, 0 +; CHECK-NEXT:    xvor.v $xr2, $xr0, $xr1 +; CHECK-NEXT:    xvxor.v $xr0, $xr0, $xr1 +; CHECK-NEXT:    xvsrli.w $xr0, $xr0, 1 +; CHECK-NEXT:    xvsub.w $xr0, $xr2, $xr0 +; CHECK-NEXT:    xvst $xr0, $a0, 0 +; CHECK-NEXT:    ret +entry: +  %va = load <8 x i32>, ptr %a +  %vb = load <8 x i32>, ptr %b +  %ea = zext <8 x i32> %va to <8 x i64> +  %eb = zext <8 x i32> %vb to <8 x i64> +  %add = add <8 x i64> %ea, %eb +  %add1 = add <8 x i64> %add, <i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1> +  %shr = lshr <8 x i64> %add1, <i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1> +  %r = trunc <8 x i64> %shr to <8 x i32> +  store <8 x i32> %r, ptr %res +  ret void +} + +define void @xvavgr_du(ptr %res, ptr %a, ptr %b) nounwind { +; CHECK-LABEL: xvavgr_du: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    xvld $xr0, $a1, 0 +; CHECK-NEXT:    xvld $xr1, $a2, 0 +; CHECK-NEXT:    xvor.v $xr2, $xr0, $xr1 +; CHECK-NEXT:    xvxor.v $xr0, $xr0, $xr1 +; CHECK-NEXT:    xvsrli.d $xr0, $xr0, 1 +; CHECK-NEXT:    xvsub.d $xr0, $xr2, $xr0 +; CHECK-NEXT:    xvst $xr0, $a0, 0 +; CHECK-NEXT:    ret +entry: +  %va = load <4 x i64>, ptr %a +  %vb = load <4 x i64>, ptr %b +  %ea = zext <4 x i64> %va to <4 x i128> +  %eb = zext <4 x i64> %vb to <4 x i128> +  %add = add <4 x i128> %ea, %eb +  %add1 = add <4 x i128> %add, <i128 1, i128 1, i128 1, i128 1> +  %shr = lshr <4 x i128> %add1, <i128 1, i128 1, i128 1, i128 1> +  %r = trunc <4 x i128> %shr to <4 x i64> +  store <4 x i64> %r, ptr %res +  ret void +} diff --git a/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/flog2.ll b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/flog2.ll index 68f2e3a..6b5f575 100644 --- a/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/flog2.ll +++ b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/flog2.ll @@ -1,166 +1,17 @@  ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6 -; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lasx < %s | FileCheck %s --check-prefix=LA32 -; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s --check-prefix=LA64 +; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lasx < %s | FileCheck %s +; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s  declare <8 x float> @llvm.log2.v8f32(<8 x float>)  declare <4 x double> @llvm.log2.v4f64(<4 x double>)  define void @flog2_v8f32(ptr %res, ptr %a) nounwind { -; LA32-LABEL: flog2_v8f32: -; LA32:       # %bb.0: # %entry -; LA32-NEXT:    addi.w $sp, $sp, -128 -; LA32-NEXT:    st.w $ra, $sp, 124 # 4-byte Folded Spill -; LA32-NEXT:    st.w $fp, $sp, 120 # 4-byte Folded Spill -; LA32-NEXT:    xvld $xr0, $a1, 0 -; LA32-NEXT:    xvst $xr0, $sp, 80 # 32-byte Folded Spill -; LA32-NEXT:    move $fp, $a0 -; LA32-NEXT:    xvpickve.w $xr0, $xr0, 5 -; LA32-NEXT:    # kill: def $f0 killed $f0 killed $xr0 -; LA32-NEXT:    bl log2f -; LA32-NEXT:    # kill: def $f0 killed $f0 def $vr0 -; LA32-NEXT:    vst $vr0, $sp, 48 # 16-byte Folded Spill -; LA32-NEXT:    xvld $xr0, $sp, 80 # 32-byte Folded Reload -; LA32-NEXT:    xvpickve.w $xr0, $xr0, 4 -; LA32-NEXT:    # kill: def $f0 killed $f0 killed $xr0 -; LA32-NEXT:    bl log2f -; LA32-NEXT:    # kill: def $f0 killed $f0 def $xr0 -; LA32-NEXT:    vld $vr1, $sp, 48 # 16-byte Folded Reload -; LA32-NEXT:    vextrins.w $vr0, $vr1, 16 -; LA32-NEXT:    xvst $xr0, $sp, 48 # 32-byte Folded Spill -; LA32-NEXT:    xvld $xr0, $sp, 80 # 32-byte Folded Reload -; LA32-NEXT:    xvpickve.w $xr0, $xr0, 6 -; LA32-NEXT:    # kill: def $f0 killed $f0 killed $xr0 -; LA32-NEXT:    bl log2f -; LA32-NEXT:    # kill: def $f0 killed $f0 def $vr0 -; LA32-NEXT:    xvld $xr1, $sp, 48 # 32-byte Folded Reload -; LA32-NEXT:    vextrins.w $vr1, $vr0, 32 -; LA32-NEXT:    xvst $xr1, $sp, 48 # 32-byte Folded Spill -; LA32-NEXT:    xvld $xr0, $sp, 80 # 32-byte Folded Reload -; LA32-NEXT:    xvpickve.w $xr0, $xr0, 7 -; LA32-NEXT:    # kill: def $f0 killed $f0 killed $xr0 -; LA32-NEXT:    bl log2f -; LA32-NEXT:    # kill: def $f0 killed $f0 def $vr0 -; LA32-NEXT:    xvld $xr1, $sp, 48 # 32-byte Folded Reload -; LA32-NEXT:    vextrins.w $vr1, $vr0, 48 -; LA32-NEXT:    xvst $xr1, $sp, 48 # 32-byte Folded Spill -; LA32-NEXT:    xvld $xr0, $sp, 80 # 32-byte Folded Reload -; LA32-NEXT:    xvpickve.w $xr0, $xr0, 1 -; LA32-NEXT:    # kill: def $f0 killed $f0 killed $xr0 -; LA32-NEXT:    bl log2f -; LA32-NEXT:    # kill: def $f0 killed $f0 def $vr0 -; LA32-NEXT:    vst $vr0, $sp, 16 # 16-byte Folded Spill -; LA32-NEXT:    xvld $xr0, $sp, 80 # 32-byte Folded Reload -; LA32-NEXT:    xvpickve.w $xr0, $xr0, 0 -; LA32-NEXT:    # kill: def $f0 killed $f0 killed $xr0 -; LA32-NEXT:    bl log2f -; LA32-NEXT:    # kill: def $f0 killed $f0 def $xr0 -; LA32-NEXT:    vld $vr1, $sp, 16 # 16-byte Folded Reload -; LA32-NEXT:    vextrins.w $vr0, $vr1, 16 -; LA32-NEXT:    xvst $xr0, $sp, 16 # 32-byte Folded Spill -; LA32-NEXT:    xvld $xr0, $sp, 80 # 32-byte Folded Reload -; LA32-NEXT:    xvpickve.w $xr0, $xr0, 2 -; LA32-NEXT:    # kill: def $f0 killed $f0 killed $xr0 -; LA32-NEXT:    bl log2f -; LA32-NEXT:    # kill: def $f0 killed $f0 def $vr0 -; LA32-NEXT:    xvld $xr1, $sp, 16 # 32-byte Folded Reload -; LA32-NEXT:    vextrins.w $vr1, $vr0, 32 -; LA32-NEXT:    xvst $xr1, $sp, 16 # 32-byte Folded Spill -; LA32-NEXT:    xvld $xr0, $sp, 80 # 32-byte Folded Reload -; LA32-NEXT:    xvpickve.w $xr0, $xr0, 3 -; LA32-NEXT:    # kill: def $f0 killed $f0 killed $xr0 -; LA32-NEXT:    bl log2f -; LA32-NEXT:    # kill: def $f0 killed $f0 def $vr0 -; LA32-NEXT:    xvld $xr1, $sp, 16 # 32-byte Folded Reload -; LA32-NEXT:    vextrins.w $vr1, $vr0, 48 -; LA32-NEXT:    xvld $xr0, $sp, 48 # 32-byte Folded Reload -; LA32-NEXT:    xvpermi.q $xr1, $xr0, 2 -; LA32-NEXT:    xvst $xr1, $fp, 0 -; LA32-NEXT:    ld.w $fp, $sp, 120 # 4-byte Folded Reload -; LA32-NEXT:    ld.w $ra, $sp, 124 # 4-byte Folded Reload -; LA32-NEXT:    addi.w $sp, $sp, 128 -; LA32-NEXT:    ret -; -; LA64-LABEL: flog2_v8f32: -; LA64:       # %bb.0: # %entry -; LA64-NEXT:    addi.d $sp, $sp, -128 -; LA64-NEXT:    st.d $ra, $sp, 120 # 8-byte Folded Spill -; LA64-NEXT:    st.d $fp, $sp, 112 # 8-byte Folded Spill -; LA64-NEXT:    xvld $xr0, $a1, 0 -; LA64-NEXT:    xvst $xr0, $sp, 80 # 32-byte Folded Spill -; LA64-NEXT:    move $fp, $a0 -; LA64-NEXT:    xvpickve.w $xr0, $xr0, 5 -; LA64-NEXT:    # kill: def $f0 killed $f0 killed $xr0 -; LA64-NEXT:    pcaddu18i $ra, %call36(log2f) -; LA64-NEXT:    jirl $ra, $ra, 0 -; LA64-NEXT:    # kill: def $f0 killed $f0 def $vr0 -; LA64-NEXT:    vst $vr0, $sp, 48 # 16-byte Folded Spill -; LA64-NEXT:    xvld $xr0, $sp, 80 # 32-byte Folded Reload -; LA64-NEXT:    xvpickve.w $xr0, $xr0, 4 -; LA64-NEXT:    # kill: def $f0 killed $f0 killed $xr0 -; LA64-NEXT:    pcaddu18i $ra, %call36(log2f) -; LA64-NEXT:    jirl $ra, $ra, 0 -; LA64-NEXT:    # kill: def $f0 killed $f0 def $xr0 -; LA64-NEXT:    vld $vr1, $sp, 48 # 16-byte Folded Reload -; LA64-NEXT:    vextrins.w $vr0, $vr1, 16 -; LA64-NEXT:    xvst $xr0, $sp, 48 # 32-byte Folded Spill -; LA64-NEXT:    xvld $xr0, $sp, 80 # 32-byte Folded Reload -; LA64-NEXT:    xvpickve.w $xr0, $xr0, 6 -; LA64-NEXT:    # kill: def $f0 killed $f0 killed $xr0 -; LA64-NEXT:    pcaddu18i $ra, %call36(log2f) -; LA64-NEXT:    jirl $ra, $ra, 0 -; LA64-NEXT:    # kill: def $f0 killed $f0 def $vr0 -; LA64-NEXT:    xvld $xr1, $sp, 48 # 32-byte Folded Reload -; LA64-NEXT:    vextrins.w $vr1, $vr0, 32 -; LA64-NEXT:    xvst $xr1, $sp, 48 # 32-byte Folded Spill -; LA64-NEXT:    xvld $xr0, $sp, 80 # 32-byte Folded Reload -; LA64-NEXT:    xvpickve.w $xr0, $xr0, 7 -; LA64-NEXT:    # kill: def $f0 killed $f0 killed $xr0 -; LA64-NEXT:    pcaddu18i $ra, %call36(log2f) -; LA64-NEXT:    jirl $ra, $ra, 0 -; LA64-NEXT:    # kill: def $f0 killed $f0 def $vr0 -; LA64-NEXT:    xvld $xr1, $sp, 48 # 32-byte Folded Reload -; LA64-NEXT:    vextrins.w $vr1, $vr0, 48 -; LA64-NEXT:    xvst $xr1, $sp, 48 # 32-byte Folded Spill -; LA64-NEXT:    xvld $xr0, $sp, 80 # 32-byte Folded Reload -; LA64-NEXT:    xvpickve.w $xr0, $xr0, 1 -; LA64-NEXT:    # kill: def $f0 killed $f0 killed $xr0 -; LA64-NEXT:    pcaddu18i $ra, %call36(log2f) -; LA64-NEXT:    jirl $ra, $ra, 0 -; LA64-NEXT:    # kill: def $f0 killed $f0 def $vr0 -; LA64-NEXT:    vst $vr0, $sp, 16 # 16-byte Folded Spill -; LA64-NEXT:    xvld $xr0, $sp, 80 # 32-byte Folded Reload -; LA64-NEXT:    xvpickve.w $xr0, $xr0, 0 -; LA64-NEXT:    # kill: def $f0 killed $f0 killed $xr0 -; LA64-NEXT:    pcaddu18i $ra, %call36(log2f) -; LA64-NEXT:    jirl $ra, $ra, 0 -; LA64-NEXT:    # kill: def $f0 killed $f0 def $xr0 -; LA64-NEXT:    vld $vr1, $sp, 16 # 16-byte Folded Reload -; LA64-NEXT:    vextrins.w $vr0, $vr1, 16 -; LA64-NEXT:    xvst $xr0, $sp, 16 # 32-byte Folded Spill -; LA64-NEXT:    xvld $xr0, $sp, 80 # 32-byte Folded Reload -; LA64-NEXT:    xvpickve.w $xr0, $xr0, 2 -; LA64-NEXT:    # kill: def $f0 killed $f0 killed $xr0 -; LA64-NEXT:    pcaddu18i $ra, %call36(log2f) -; LA64-NEXT:    jirl $ra, $ra, 0 -; LA64-NEXT:    # kill: def $f0 killed $f0 def $vr0 -; LA64-NEXT:    xvld $xr1, $sp, 16 # 32-byte Folded Reload -; LA64-NEXT:    vextrins.w $vr1, $vr0, 32 -; LA64-NEXT:    xvst $xr1, $sp, 16 # 32-byte Folded Spill -; LA64-NEXT:    xvld $xr0, $sp, 80 # 32-byte Folded Reload -; LA64-NEXT:    xvpickve.w $xr0, $xr0, 3 -; LA64-NEXT:    # kill: def $f0 killed $f0 killed $xr0 -; LA64-NEXT:    pcaddu18i $ra, %call36(log2f) -; LA64-NEXT:    jirl $ra, $ra, 0 -; LA64-NEXT:    # kill: def $f0 killed $f0 def $vr0 -; LA64-NEXT:    xvld $xr1, $sp, 16 # 32-byte Folded Reload -; LA64-NEXT:    vextrins.w $vr1, $vr0, 48 -; LA64-NEXT:    xvld $xr0, $sp, 48 # 32-byte Folded Reload -; LA64-NEXT:    xvpermi.q $xr1, $xr0, 2 -; LA64-NEXT:    xvst $xr1, $fp, 0 -; LA64-NEXT:    ld.d $fp, $sp, 112 # 8-byte Folded Reload -; LA64-NEXT:    ld.d $ra, $sp, 120 # 8-byte Folded Reload -; LA64-NEXT:    addi.d $sp, $sp, 128 -; LA64-NEXT:    ret +; CHECK-LABEL: flog2_v8f32: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    xvld $xr0, $a1, 0 +; CHECK-NEXT:    xvflogb.s $xr0, $xr0 +; CHECK-NEXT:    xvst $xr0, $a0, 0 +; CHECK-NEXT:    ret  entry:    %v = load <8 x float>, ptr %a    %r = call <8 x float> @llvm.log2.v8f32(<8 x float> %v) @@ -169,93 +20,12 @@ entry:  }  define void @flog2_v4f64(ptr %res, ptr %a) nounwind { -; LA32-LABEL: flog2_v4f64: -; LA32:       # %bb.0: # %entry -; LA32-NEXT:    addi.w $sp, $sp, -112 -; LA32-NEXT:    st.w $ra, $sp, 108 # 4-byte Folded Spill -; LA32-NEXT:    st.w $fp, $sp, 104 # 4-byte Folded Spill -; LA32-NEXT:    xvld $xr0, $a1, 0 -; LA32-NEXT:    xvst $xr0, $sp, 64 # 32-byte Folded Spill -; LA32-NEXT:    move $fp, $a0 -; LA32-NEXT:    xvpickve.d $xr0, $xr0, 3 -; LA32-NEXT:    # kill: def $f0_64 killed $f0_64 killed $xr0 -; LA32-NEXT:    bl log2 -; LA32-NEXT:    # kill: def $f0_64 killed $f0_64 def $vr0 -; LA32-NEXT:    vst $vr0, $sp, 32 # 16-byte Folded Spill -; LA32-NEXT:    xvld $xr0, $sp, 64 # 32-byte Folded Reload -; LA32-NEXT:    xvpickve.d $xr0, $xr0, 2 -; LA32-NEXT:    # kill: def $f0_64 killed $f0_64 killed $xr0 -; LA32-NEXT:    bl log2 -; LA32-NEXT:    # kill: def $f0_64 killed $f0_64 def $xr0 -; LA32-NEXT:    vld $vr1, $sp, 32 # 16-byte Folded Reload -; LA32-NEXT:    vextrins.d $vr0, $vr1, 16 -; LA32-NEXT:    xvst $xr0, $sp, 32 # 32-byte Folded Spill -; LA32-NEXT:    xvld $xr0, $sp, 64 # 32-byte Folded Reload -; LA32-NEXT:    xvpickve.d $xr0, $xr0, 1 -; LA32-NEXT:    # kill: def $f0_64 killed $f0_64 killed $xr0 -; LA32-NEXT:    bl log2 -; LA32-NEXT:    # kill: def $f0_64 killed $f0_64 def $vr0 -; LA32-NEXT:    vst $vr0, $sp, 16 # 16-byte Folded Spill -; LA32-NEXT:    xvld $xr0, $sp, 64 # 32-byte Folded Reload -; LA32-NEXT:    xvpickve.d $xr0, $xr0, 0 -; LA32-NEXT:    # kill: def $f0_64 killed $f0_64 killed $xr0 -; LA32-NEXT:    bl log2 -; LA32-NEXT:    # kill: def $f0_64 killed $f0_64 def $xr0 -; LA32-NEXT:    vld $vr1, $sp, 16 # 16-byte Folded Reload -; LA32-NEXT:    vextrins.d $vr0, $vr1, 16 -; LA32-NEXT:    xvld $xr1, $sp, 32 # 32-byte Folded Reload -; LA32-NEXT:    xvpermi.q $xr0, $xr1, 2 -; LA32-NEXT:    xvst $xr0, $fp, 0 -; LA32-NEXT:    ld.w $fp, $sp, 104 # 4-byte Folded Reload -; LA32-NEXT:    ld.w $ra, $sp, 108 # 4-byte Folded Reload -; LA32-NEXT:    addi.w $sp, $sp, 112 -; LA32-NEXT:    ret -; -; LA64-LABEL: flog2_v4f64: -; LA64:       # %bb.0: # %entry -; LA64-NEXT:    addi.d $sp, $sp, -112 -; LA64-NEXT:    st.d $ra, $sp, 104 # 8-byte Folded Spill -; LA64-NEXT:    st.d $fp, $sp, 96 # 8-byte Folded Spill -; LA64-NEXT:    xvld $xr0, $a1, 0 -; LA64-NEXT:    xvst $xr0, $sp, 64 # 32-byte Folded Spill -; LA64-NEXT:    move $fp, $a0 -; LA64-NEXT:    xvpickve.d $xr0, $xr0, 3 -; LA64-NEXT:    # kill: def $f0_64 killed $f0_64 killed $xr0 -; LA64-NEXT:    pcaddu18i $ra, %call36(log2) -; LA64-NEXT:    jirl $ra, $ra, 0 -; LA64-NEXT:    # kill: def $f0_64 killed $f0_64 def $vr0 -; LA64-NEXT:    vst $vr0, $sp, 32 # 16-byte Folded Spill -; LA64-NEXT:    xvld $xr0, $sp, 64 # 32-byte Folded Reload -; LA64-NEXT:    xvpickve.d $xr0, $xr0, 2 -; LA64-NEXT:    # kill: def $f0_64 killed $f0_64 killed $xr0 -; LA64-NEXT:    pcaddu18i $ra, %call36(log2) -; LA64-NEXT:    jirl $ra, $ra, 0 -; LA64-NEXT:    # kill: def $f0_64 killed $f0_64 def $xr0 -; LA64-NEXT:    vld $vr1, $sp, 32 # 16-byte Folded Reload -; LA64-NEXT:    vextrins.d $vr0, $vr1, 16 -; LA64-NEXT:    xvst $xr0, $sp, 32 # 32-byte Folded Spill -; LA64-NEXT:    xvld $xr0, $sp, 64 # 32-byte Folded Reload -; LA64-NEXT:    xvpickve.d $xr0, $xr0, 1 -; LA64-NEXT:    # kill: def $f0_64 killed $f0_64 killed $xr0 -; LA64-NEXT:    pcaddu18i $ra, %call36(log2) -; LA64-NEXT:    jirl $ra, $ra, 0 -; LA64-NEXT:    # kill: def $f0_64 killed $f0_64 def $vr0 -; LA64-NEXT:    vst $vr0, $sp, 16 # 16-byte Folded Spill -; LA64-NEXT:    xvld $xr0, $sp, 64 # 32-byte Folded Reload -; LA64-NEXT:    xvpickve.d $xr0, $xr0, 0 -; LA64-NEXT:    # kill: def $f0_64 killed $f0_64 killed $xr0 -; LA64-NEXT:    pcaddu18i $ra, %call36(log2) -; LA64-NEXT:    jirl $ra, $ra, 0 -; LA64-NEXT:    # kill: def $f0_64 killed $f0_64 def $xr0 -; LA64-NEXT:    vld $vr1, $sp, 16 # 16-byte Folded Reload -; LA64-NEXT:    vextrins.d $vr0, $vr1, 16 -; LA64-NEXT:    xvld $xr1, $sp, 32 # 32-byte Folded Reload -; LA64-NEXT:    xvpermi.q $xr0, $xr1, 2 -; LA64-NEXT:    xvst $xr0, $fp, 0 -; LA64-NEXT:    ld.d $fp, $sp, 96 # 8-byte Folded Reload -; LA64-NEXT:    ld.d $ra, $sp, 104 # 8-byte Folded Reload -; LA64-NEXT:    addi.d $sp, $sp, 112 -; LA64-NEXT:    ret +; CHECK-LABEL: flog2_v4f64: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    xvld $xr0, $a1, 0 +; CHECK-NEXT:    xvflogb.d $xr0, $xr0 +; CHECK-NEXT:    xvst $xr0, $a0, 0 +; CHECK-NEXT:    ret  entry:    %v = load <4 x double>, ptr %a    %r = call <4 x double> @llvm.log2.v4f64(<4 x double> %v) | 
