diff options
Diffstat (limited to 'llvm/test/CodeGen/LoongArch/lasx')
| -rw-r--r-- | llvm/test/CodeGen/LoongArch/lasx/and-not-combine.ll | 398 | ||||
| -rw-r--r-- | llvm/test/CodeGen/LoongArch/lasx/ctpop-ctlz.ll | 63 | ||||
| -rw-r--r-- | llvm/test/CodeGen/LoongArch/lasx/fp-max-min.ll | 72 | ||||
| -rw-r--r-- | llvm/test/CodeGen/LoongArch/lasx/fp-rounding.ll | 132 | ||||
| -rw-r--r-- | llvm/test/CodeGen/LoongArch/lasx/intrinsic-conversion.ll | 303 | ||||
| -rw-r--r-- | llvm/test/CodeGen/LoongArch/lasx/ir-instruction/avg.ll | 321 | ||||
| -rw-r--r-- | llvm/test/CodeGen/LoongArch/lasx/ir-instruction/avgfloor-ceil.ll | 379 | ||||
| -rw-r--r-- | llvm/test/CodeGen/LoongArch/lasx/ir-instruction/flog2.ll | 258 | ||||
| -rw-r--r-- | llvm/test/CodeGen/LoongArch/lasx/rotl-rotr.ll | 248 | ||||
| -rw-r--r-- | llvm/test/CodeGen/LoongArch/lasx/shuffle-as-bswap.ll | 47 |
10 files changed, 1975 insertions, 246 deletions
diff --git a/llvm/test/CodeGen/LoongArch/lasx/and-not-combine.ll b/llvm/test/CodeGen/LoongArch/lasx/and-not-combine.ll index 6754959..5ed49d9 100644 --- a/llvm/test/CodeGen/LoongArch/lasx/and-not-combine.ll +++ b/llvm/test/CodeGen/LoongArch/lasx/and-not-combine.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6 -; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lasx < %s | FileCheck %s -; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s +; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lasx < %s | FileCheck %s --check-prefixes=CHECK,LA32 +; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s --check-prefixes=CHECK,LA64 define void @and_not_combine_v32i8(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind { ; CHECK-LABEL: and_not_combine_v32i8: @@ -85,3 +85,397 @@ entry: store <4 x i64> %and, ptr %res ret void } + +define void @pre_not_and_not_combine_v32i8(ptr %res, ptr %a, i8 %b) nounwind { +; CHECK-LABEL: pre_not_and_not_combine_v32i8: +; CHECK: # %bb.0: +; CHECK-NEXT: xvld $xr0, $a1, 0 +; CHECK-NEXT: xvreplgr2vr.b $xr1, $a2 +; CHECK-NEXT: xvnor.v $xr0, $xr0, $xr1 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <32 x i8>, ptr %a + %b.not = xor i8 %b, -1 + %b.not.ele = insertelement <32 x i8> poison, i8 %b.not, i64 0 + %v1.not = shufflevector <32 x i8> %b.not.ele, <32 x i8> poison, <32 x i32> zeroinitializer + %v0.not = xor <32 x i8> %v0, splat (i8 -1) + %and = and <32 x i8> %v0.not, %v1.not + store <32 x i8> %and, ptr %res + ret void +} + +define void @post_not_and_not_combine_v32i8(ptr %res, ptr %a, i8 %b) nounwind { +; CHECK-LABEL: post_not_and_not_combine_v32i8: +; CHECK: # %bb.0: +; CHECK-NEXT: xvld $xr0, $a1, 0 +; CHECK-NEXT: xvreplgr2vr.b $xr1, $a2 +; CHECK-NEXT: xvnor.v $xr0, $xr0, $xr1 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <32 x i8>, ptr %a + %b.ele = insertelement <32 x i8> poison, i8 %b, i64 0 + %v1 = shufflevector <32 x i8> %b.ele, <32 x i8> poison, <32 x i32> zeroinitializer + %v0.not = xor <32 x i8> %v0, splat (i8 -1) + %v1.not = xor <32 x i8> %v1, splat (i8 -1) + %and = and <32 x i8> %v0.not, %v1.not + store <32 x i8> %and, ptr %res + ret void +} + +define void @pre_not_and_not_combine_v16i16(ptr %res, ptr %a, i16 %b) nounwind { +; CHECK-LABEL: pre_not_and_not_combine_v16i16: +; CHECK: # %bb.0: +; CHECK-NEXT: xvld $xr0, $a1, 0 +; CHECK-NEXT: xvreplgr2vr.h $xr1, $a2 +; CHECK-NEXT: xvnor.v $xr0, $xr0, $xr1 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <16 x i16>, ptr %a + %b.not = xor i16 %b, -1 + %b.not.ele = insertelement <16 x i16> poison, i16 %b.not, i64 0 + %v1.not = shufflevector <16 x i16> %b.not.ele, <16 x i16> poison, <16 x i32> zeroinitializer + %v0.not = xor <16 x i16> %v0, splat (i16 -1) + %and = and <16 x i16> %v0.not, %v1.not + store <16 x i16> %and, ptr %res + ret void +} + +define void @post_not_and_not_combine_v16i16(ptr %res, ptr %a, i16 %b) nounwind { +; CHECK-LABEL: post_not_and_not_combine_v16i16: +; CHECK: # %bb.0: +; CHECK-NEXT: xvld $xr0, $a1, 0 +; CHECK-NEXT: xvreplgr2vr.h $xr1, $a2 +; CHECK-NEXT: xvnor.v $xr0, $xr0, $xr1 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <16 x i16>, ptr %a + %b.ele = insertelement <16 x i16> poison, i16 %b, i64 0 + %v1 = shufflevector <16 x i16> %b.ele, <16 x i16> poison, <16 x i32> zeroinitializer + %v0.not = xor <16 x i16> %v0, splat (i16 -1) + %v1.not = xor <16 x i16> %v1, splat (i16 -1) + %and = and <16 x i16> %v0.not, %v1.not + store <16 x i16> %and, ptr %res + ret void +} + +define void @pre_not_and_not_combine_v8i32(ptr %res, ptr %a, i32 %b) nounwind { +; CHECK-LABEL: pre_not_and_not_combine_v8i32: +; CHECK: # %bb.0: +; CHECK-NEXT: xvld $xr0, $a1, 0 +; CHECK-NEXT: xvreplgr2vr.w $xr1, $a2 +; CHECK-NEXT: xvnor.v $xr0, $xr0, $xr1 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <8 x i32>, ptr %a + %b.not = xor i32 %b, -1 + %b.not.ele = insertelement <8 x i32> poison, i32 %b.not, i64 0 + %v1.not = shufflevector <8 x i32> %b.not.ele, <8 x i32> poison, <8 x i32> zeroinitializer + %v0.not = xor <8 x i32> %v0, splat (i32 -1) + %and = and <8 x i32> %v0.not, %v1.not + store <8 x i32> %and, ptr %res + ret void +} + +define void @post_not_and_not_combine_v8i32(ptr %res, ptr %a, i32 %b) nounwind { +; CHECK-LABEL: post_not_and_not_combine_v8i32: +; CHECK: # %bb.0: +; CHECK-NEXT: xvld $xr0, $a1, 0 +; CHECK-NEXT: xvreplgr2vr.w $xr1, $a2 +; CHECK-NEXT: xvnor.v $xr0, $xr0, $xr1 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <8 x i32>, ptr %a + %b.ele = insertelement <8 x i32> poison, i32 %b, i64 0 + %v1 = shufflevector <8 x i32> %b.ele, <8 x i32> poison, <8 x i32> zeroinitializer + %v0.not = xor <8 x i32> %v0, splat (i32 -1) + %v1.not = xor <8 x i32> %v1, splat (i32 -1) + %and = and <8 x i32> %v0.not, %v1.not + store <8 x i32> %and, ptr %res + ret void +} + +define void @pre_not_and_not_combine_v4i64(ptr %res, ptr %a, i64 %b) nounwind { +; LA32-LABEL: pre_not_and_not_combine_v4i64: +; LA32: # %bb.0: +; LA32-NEXT: xvld $xr0, $a1, 0 +; LA32-NEXT: nor $a1, $a3, $zero +; LA32-NEXT: nor $a2, $a2, $zero +; LA32-NEXT: vinsgr2vr.w $vr1, $a2, 0 +; LA32-NEXT: vinsgr2vr.w $vr1, $a1, 1 +; LA32-NEXT: xvreplve0.d $xr1, $xr1 +; LA32-NEXT: xvandn.v $xr0, $xr0, $xr1 +; LA32-NEXT: xvst $xr0, $a0, 0 +; LA32-NEXT: ret +; +; LA64-LABEL: pre_not_and_not_combine_v4i64: +; LA64: # %bb.0: +; LA64-NEXT: xvld $xr0, $a1, 0 +; LA64-NEXT: xvreplgr2vr.d $xr1, $a2 +; LA64-NEXT: xvnor.v $xr0, $xr0, $xr1 +; LA64-NEXT: xvst $xr0, $a0, 0 +; LA64-NEXT: ret + %v0 = load <4 x i64>, ptr %a + %b.not = xor i64 %b, -1 + %b.not.ele = insertelement <4 x i64> poison, i64 %b.not, i64 0 + %v1.not = shufflevector <4 x i64> %b.not.ele, <4 x i64> poison, <4 x i32> zeroinitializer + %v0.not = xor <4 x i64> %v0, splat (i64 -1) + %and = and <4 x i64> %v0.not, %v1.not + store <4 x i64> %and, ptr %res + ret void +} + +define void @post_not_and_not_combine_v4i64(ptr %res, ptr %a, i64 %b) nounwind { +; LA32-LABEL: post_not_and_not_combine_v4i64: +; LA32: # %bb.0: +; LA32-NEXT: xvld $xr0, $a1, 0 +; LA32-NEXT: vinsgr2vr.w $vr1, $a2, 0 +; LA32-NEXT: vinsgr2vr.w $vr1, $a3, 1 +; LA32-NEXT: xvreplve0.d $xr1, $xr1 +; LA32-NEXT: xvnor.v $xr0, $xr0, $xr1 +; LA32-NEXT: xvst $xr0, $a0, 0 +; LA32-NEXT: ret +; +; LA64-LABEL: post_not_and_not_combine_v4i64: +; LA64: # %bb.0: +; LA64-NEXT: xvld $xr0, $a1, 0 +; LA64-NEXT: xvreplgr2vr.d $xr1, $a2 +; LA64-NEXT: xvnor.v $xr0, $xr0, $xr1 +; LA64-NEXT: xvst $xr0, $a0, 0 +; LA64-NEXT: ret + %v0 = load <4 x i64>, ptr %a + %b.ele = insertelement <4 x i64> poison, i64 %b, i64 0 + %v1 = shufflevector <4 x i64> %b.ele, <4 x i64> poison, <4 x i32> zeroinitializer + %v0.not = xor <4 x i64> %v0, splat (i64 -1) + %v1.not = xor <4 x i64> %v1, splat (i64 -1) + %and = and <4 x i64> %v0.not, %v1.not + store <4 x i64> %and, ptr %res + ret void +} + +define void @and_not_combine_splatimm_v32i8(ptr %res, ptr %a0) nounwind { +; CHECK-LABEL: and_not_combine_splatimm_v32i8: +; CHECK: # %bb.0: +; CHECK-NEXT: xvld $xr0, $a1, 0 +; CHECK-NEXT: xvnori.b $xr0, $xr0, 3 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <32 x i8>, ptr %a0 + %and = and <32 x i8> %v0, splat (i8 -4) + %xor = xor <32 x i8> %and, splat (i8 -4) + store <32 x i8> %xor, ptr %res + ret void +} + +define void @and_not_combine_splatimm_v16i16(ptr %res, ptr %a0) nounwind { +; CHECK-LABEL: and_not_combine_splatimm_v16i16: +; CHECK: # %bb.0: +; CHECK-NEXT: xvld $xr0, $a1, 0 +; CHECK-NEXT: xvrepli.h $xr1, -4 +; CHECK-NEXT: xvandn.v $xr0, $xr0, $xr1 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <16 x i16>, ptr %a0 + %and = and <16 x i16> %v0, splat (i16 -4) + %xor = xor <16 x i16> %and, splat (i16 -4) + store <16 x i16> %xor, ptr %res + ret void +} + +define void @and_not_combine_splatimm_v8i32(ptr %res, ptr %a0) nounwind { +; CHECK-LABEL: and_not_combine_splatimm_v8i32: +; CHECK: # %bb.0: +; CHECK-NEXT: xvld $xr0, $a1, 0 +; CHECK-NEXT: xvrepli.w $xr1, -4 +; CHECK-NEXT: xvandn.v $xr0, $xr0, $xr1 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <8 x i32>, ptr %a0 + %and = and <8 x i32> %v0, splat (i32 -4) + %xor = xor <8 x i32> %and, splat (i32 -4) + store <8 x i32> %xor, ptr %res + ret void +} + +define void @and_not_combine_splatimm_v4i64(ptr %res, ptr %a0) nounwind { +; CHECK-LABEL: and_not_combine_splatimm_v4i64: +; CHECK: # %bb.0: +; CHECK-NEXT: xvld $xr0, $a1, 0 +; CHECK-NEXT: xvrepli.d $xr1, -4 +; CHECK-NEXT: xvandn.v $xr0, $xr0, $xr1 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <4 x i64>, ptr %a0 + %and = and <4 x i64> %v0, splat (i64 -4) + %xor = xor <4 x i64> %and, splat (i64 -4) + store <4 x i64> %xor, ptr %res + ret void +} + +define void @and_or_not_combine_v32i8(ptr %pa, ptr %pb, ptr %pv, ptr %dst) nounwind { +; CHECK-LABEL: and_or_not_combine_v32i8: +; CHECK: # %bb.0: +; CHECK-NEXT: xvld $xr0, $a0, 0 +; CHECK-NEXT: xvld $xr1, $a2, 0 +; CHECK-NEXT: xvld $xr2, $a1, 0 +; CHECK-NEXT: xvseq.b $xr0, $xr1, $xr0 +; CHECK-NEXT: xvseq.b $xr1, $xr1, $xr2 +; CHECK-NEXT: xvand.v $xr0, $xr0, $xr1 +; CHECK-NEXT: xvnori.b $xr0, $xr0, 251 +; CHECK-NEXT: xvst $xr0, $a3, 0 +; CHECK-NEXT: ret + %a = load <32 x i8>, ptr %pa + %b = load <32 x i8>, ptr %pb + %v = load <32 x i8>, ptr %pv + %ca = icmp ne <32 x i8> %v, %a + %cb = icmp ne <32 x i8> %v, %b + %or = or <32 x i1> %ca, %cb + %ext = sext <32 x i1> %or to <32 x i8> + %and = and <32 x i8> %ext, splat (i8 4) + store <32 x i8> %and, ptr %dst + ret void +} + +define void @and_or_not_combine_v16i16(ptr %pa, ptr %pb, ptr %pv, ptr %dst) nounwind { +; CHECK-LABEL: and_or_not_combine_v16i16: +; CHECK: # %bb.0: +; CHECK-NEXT: xvld $xr0, $a0, 0 +; CHECK-NEXT: xvld $xr1, $a2, 0 +; CHECK-NEXT: xvld $xr2, $a1, 0 +; CHECK-NEXT: xvseq.h $xr0, $xr1, $xr0 +; CHECK-NEXT: xvseq.h $xr1, $xr1, $xr2 +; CHECK-NEXT: xvand.v $xr0, $xr0, $xr1 +; CHECK-NEXT: xvrepli.h $xr1, 4 +; CHECK-NEXT: xvandn.v $xr0, $xr0, $xr1 +; CHECK-NEXT: xvst $xr0, $a3, 0 +; CHECK-NEXT: ret + %a = load <16 x i16>, ptr %pa + %b = load <16 x i16>, ptr %pb + %v = load <16 x i16>, ptr %pv + %ca = icmp ne <16 x i16> %v, %a + %cb = icmp ne <16 x i16> %v, %b + %or = or <16 x i1> %ca, %cb + %ext = sext <16 x i1> %or to <16 x i16> + %and = and <16 x i16> %ext, splat (i16 4) + store <16 x i16> %and, ptr %dst + ret void +} + +define void @and_or_not_combine_v8i32(ptr %pa, ptr %pb, ptr %pv, ptr %dst) nounwind { +; CHECK-LABEL: and_or_not_combine_v8i32: +; CHECK: # %bb.0: +; CHECK-NEXT: xvld $xr0, $a0, 0 +; CHECK-NEXT: xvld $xr1, $a2, 0 +; CHECK-NEXT: xvld $xr2, $a1, 0 +; CHECK-NEXT: xvseq.w $xr0, $xr1, $xr0 +; CHECK-NEXT: xvseq.w $xr1, $xr1, $xr2 +; CHECK-NEXT: xvand.v $xr0, $xr0, $xr1 +; CHECK-NEXT: xvrepli.w $xr1, 4 +; CHECK-NEXT: xvandn.v $xr0, $xr0, $xr1 +; CHECK-NEXT: xvst $xr0, $a3, 0 +; CHECK-NEXT: ret + %a = load <8 x i32>, ptr %pa + %b = load <8 x i32>, ptr %pb + %v = load <8 x i32>, ptr %pv + %ca = icmp ne <8 x i32> %v, %a + %cb = icmp ne <8 x i32> %v, %b + %or = or <8 x i1> %ca, %cb + %ext = sext <8 x i1> %or to <8 x i32> + %and = and <8 x i32> %ext, splat (i32 4) + store <8 x i32> %and, ptr %dst + ret void +} + +define void @and_or_not_combine_v4i64(ptr %pa, ptr %pb, ptr %pv, ptr %dst) nounwind { +; CHECK-LABEL: and_or_not_combine_v4i64: +; CHECK: # %bb.0: +; CHECK-NEXT: xvld $xr0, $a0, 0 +; CHECK-NEXT: xvld $xr1, $a2, 0 +; CHECK-NEXT: xvld $xr2, $a1, 0 +; CHECK-NEXT: xvseq.d $xr0, $xr1, $xr0 +; CHECK-NEXT: xvseq.d $xr1, $xr1, $xr2 +; CHECK-NEXT: xvand.v $xr0, $xr0, $xr1 +; CHECK-NEXT: xvrepli.d $xr1, 4 +; CHECK-NEXT: xvandn.v $xr0, $xr0, $xr1 +; CHECK-NEXT: xvst $xr0, $a3, 0 +; CHECK-NEXT: ret + %a = load <4 x i64>, ptr %pa + %b = load <4 x i64>, ptr %pb + %v = load <4 x i64>, ptr %pv + %ca = icmp ne <4 x i64> %v, %a + %cb = icmp ne <4 x i64> %v, %b + %or = or <4 x i1> %ca, %cb + %ext = sext <4 x i1> %or to <4 x i64> + %and = and <4 x i64> %ext, splat (i64 4) + store <4 x i64> %and, ptr %dst + ret void +} + +define void @and_extract_subvector_not_combine_v32i8(ptr %pa, ptr %dst) nounwind { +; CHECK-LABEL: and_extract_subvector_not_combine_v32i8: +; CHECK: # %bb.0: +; CHECK-NEXT: xvld $xr0, $a0, 0 +; CHECK-NEXT: xvpermi.q $xr0, $xr0, 1 +; CHECK-NEXT: vnori.b $vr0, $vr0, 251 +; CHECK-NEXT: vst $vr0, $a1, 0 +; CHECK-NEXT: ret + %a = load volatile <32 x i8>, ptr %pa + %a.not = xor <32 x i8> %a, splat (i8 -1) + %subv = shufflevector <32 x i8> %a.not, <32 x i8> poison, + <16 x i32> <i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, + i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31> + %and = and <16 x i8> %subv, splat (i8 4) + store <16 x i8> %and, ptr %dst + ret void +} + +define void @and_extract_subvector_not_combine_v16i16(ptr %pa, ptr %dst) nounwind { +; CHECK-LABEL: and_extract_subvector_not_combine_v16i16: +; CHECK: # %bb.0: +; CHECK-NEXT: xvld $xr0, $a0, 0 +; CHECK-NEXT: xvpermi.q $xr0, $xr0, 1 +; CHECK-NEXT: vrepli.h $vr1, 4 +; CHECK-NEXT: vandn.v $vr0, $vr0, $vr1 +; CHECK-NEXT: vst $vr0, $a1, 0 +; CHECK-NEXT: ret + %a = load volatile <16 x i16>, ptr %pa + %a.not = xor <16 x i16> %a, splat (i16 -1) + %subv = shufflevector <16 x i16> %a.not, <16 x i16> poison, + <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> + %and = and <8 x i16> %subv, splat (i16 4) + store <8 x i16> %and, ptr %dst + ret void +} + +define void @and_extract_subvector_not_combine_v8i32(ptr %pa, ptr %dst) nounwind { +; CHECK-LABEL: and_extract_subvector_not_combine_v8i32: +; CHECK: # %bb.0: +; CHECK-NEXT: xvld $xr0, $a0, 0 +; CHECK-NEXT: xvpermi.q $xr0, $xr0, 1 +; CHECK-NEXT: vrepli.w $vr1, 4 +; CHECK-NEXT: vandn.v $vr0, $vr0, $vr1 +; CHECK-NEXT: vst $vr0, $a1, 0 +; CHECK-NEXT: ret + %a = load volatile <8 x i32>, ptr %pa + %a.not = xor <8 x i32> %a, splat (i32 -1) + %subv = shufflevector <8 x i32> %a.not, <8 x i32> poison, <4 x i32> <i32 4, i32 5, i32 6, i32 7> + %and = and <4 x i32> %subv, splat (i32 4) + store <4 x i32> %and, ptr %dst + ret void +} + +define void @and_extract_subvector_not_combine_v4i64(ptr %pa, ptr %dst) nounwind { +; CHECK-LABEL: and_extract_subvector_not_combine_v4i64: +; CHECK: # %bb.0: +; CHECK-NEXT: xvld $xr0, $a0, 0 +; CHECK-NEXT: xvpermi.q $xr0, $xr0, 1 +; CHECK-NEXT: vrepli.d $vr1, 4 +; CHECK-NEXT: vandn.v $vr0, $vr0, $vr1 +; CHECK-NEXT: vst $vr0, $a1, 0 +; CHECK-NEXT: ret + %a = load volatile <4 x i64>, ptr %pa + %a.not = xor <4 x i64> %a, splat (i64 -1) + %subv = shufflevector <4 x i64> %a.not, <4 x i64> poison, <2 x i32> <i32 2, i32 3> + %and = and <2 x i64> %subv, splat (i64 4) + store <2 x i64> %and, ptr %dst + ret void +} diff --git a/llvm/test/CodeGen/LoongArch/lasx/ctpop-ctlz.ll b/llvm/test/CodeGen/LoongArch/lasx/ctpop-ctlz.ll index ba2118f..b3155c9 100644 --- a/llvm/test/CodeGen/LoongArch/lasx/ctpop-ctlz.ll +++ b/llvm/test/CodeGen/LoongArch/lasx/ctpop-ctlz.ll @@ -106,6 +106,69 @@ define void @ctlz_v4i64(ptr %src, ptr %dst) nounwind { ret void } +define void @not_ctlz_v32i8(ptr %src, ptr %dst) nounwind { +; CHECK-LABEL: not_ctlz_v32i8: +; CHECK: # %bb.0: +; CHECK-NEXT: xvld $xr0, $a0, 0 +; CHECK-NEXT: xvxori.b $xr0, $xr0, 255 +; CHECK-NEXT: xvclz.b $xr0, $xr0 +; CHECK-NEXT: xvst $xr0, $a1, 0 +; CHECK-NEXT: ret + %v = load <32 x i8>, ptr %src + %neg = xor <32 x i8> %v, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1> + %res = call <32 x i8> @llvm.ctlz.v32i8(<32 x i8> %neg, i1 false) + store <32 x i8> %res, ptr %dst + ret void +} + +define void @not_ctlz_v16i16(ptr %src, ptr %dst) nounwind { +; CHECK-LABEL: not_ctlz_v16i16: +; CHECK: # %bb.0: +; CHECK-NEXT: xvld $xr0, $a0, 0 +; CHECK-NEXT: xvrepli.b $xr1, -1 +; CHECK-NEXT: xvxor.v $xr0, $xr0, $xr1 +; CHECK-NEXT: xvclz.h $xr0, $xr0 +; CHECK-NEXT: xvst $xr0, $a1, 0 +; CHECK-NEXT: ret + %v = load <16 x i16>, ptr %src + %neg = xor <16 x i16> %v, <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1> + %res = call <16 x i16> @llvm.ctlz.v16i16(<16 x i16> %neg, i1 false) + store <16 x i16> %res, ptr %dst + ret void +} + +define void @not_ctlz_v8i32(ptr %src, ptr %dst) nounwind { +; CHECK-LABEL: not_ctlz_v8i32: +; CHECK: # %bb.0: +; CHECK-NEXT: xvld $xr0, $a0, 0 +; CHECK-NEXT: xvrepli.b $xr1, -1 +; CHECK-NEXT: xvxor.v $xr0, $xr0, $xr1 +; CHECK-NEXT: xvclz.w $xr0, $xr0 +; CHECK-NEXT: xvst $xr0, $a1, 0 +; CHECK-NEXT: ret + %v = load <8 x i32>, ptr %src + %neg = xor <8 x i32> %v, <i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1> + %res = call <8 x i32> @llvm.ctlz.v8i32(<8 x i32> %neg, i1 false) + store <8 x i32> %res, ptr %dst + ret void +} + +define void @not_ctlz_v4i64(ptr %src, ptr %dst) nounwind { +; CHECK-LABEL: not_ctlz_v4i64: +; CHECK: # %bb.0: +; CHECK-NEXT: xvld $xr0, $a0, 0 +; CHECK-NEXT: xvrepli.b $xr1, -1 +; CHECK-NEXT: xvxor.v $xr0, $xr0, $xr1 +; CHECK-NEXT: xvclz.d $xr0, $xr0 +; CHECK-NEXT: xvst $xr0, $a1, 0 +; CHECK-NEXT: ret + %v = load <4 x i64>, ptr %src + %neg = xor <4 x i64> %v, <i64 -1, i64 -1, i64 -1, i64 -1> + %res = call <4 x i64> @llvm.ctlz.v4i64(<4 x i64> %neg, i1 false) + store <4 x i64> %res, ptr %dst + ret void +} + declare <32 x i8> @llvm.ctpop.v32i8(<32 x i8>) declare <16 x i16> @llvm.ctpop.v16i16(<16 x i16>) declare <8 x i32> @llvm.ctpop.v8i32(<8 x i32>) diff --git a/llvm/test/CodeGen/LoongArch/lasx/fp-max-min.ll b/llvm/test/CodeGen/LoongArch/lasx/fp-max-min.ll new file mode 100644 index 0000000..8e08e1e --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/fp-max-min.ll @@ -0,0 +1,72 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6 +; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lasx < %s | FileCheck %s +; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s + +define void @minnum_v8f32(ptr %res, ptr %x, ptr %y) nounwind { +; CHECK-LABEL: minnum_v8f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvld $xr0, $a1, 0 +; CHECK-NEXT: xvld $xr1, $a2, 0 +; CHECK-NEXT: xvfmin.s $xr0, $xr0, $xr1 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %v0 = load <8 x float>, ptr %x + %v1 = load <8 x float>, ptr %y + %r = call <8 x float> @llvm.minnum.v8f32(<8 x float> %v0, <8 x float> %v1) + store <8 x float> %r, ptr %res + ret void +} + +define void @minnum_v4f64(ptr %res, ptr %x, ptr %y) nounwind { +; CHECK-LABEL: minnum_v4f64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvld $xr0, $a1, 0 +; CHECK-NEXT: xvld $xr1, $a2, 0 +; CHECK-NEXT: xvfmin.d $xr0, $xr0, $xr1 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %v0 = load <4 x double>, ptr %x + %v1 = load <4 x double>, ptr %y + %r = call <4 x double> @llvm.minnum.v4f64(<4 x double> %v0, <4 x double> %v1) + store <4 x double> %r, ptr %res + ret void +} + +define void @maxnum_v8f32(ptr %res, ptr %x, ptr %y) nounwind { +; CHECK-LABEL: maxnum_v8f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvld $xr0, $a1, 0 +; CHECK-NEXT: xvld $xr1, $a2, 0 +; CHECK-NEXT: xvfmax.s $xr0, $xr0, $xr1 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %v0 = load <8 x float>, ptr %x + %v1 = load <8 x float>, ptr %y + %r = call <8 x float> @llvm.maxnum.v8f32(<8 x float> %v0, <8 x float> %v1) + store <8 x float> %r, ptr %res + ret void +} + +define void @maxnum_v4f64(ptr %res, ptr %x, ptr %y) nounwind { +; CHECK-LABEL: maxnum_v4f64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvld $xr0, $a1, 0 +; CHECK-NEXT: xvld $xr1, $a2, 0 +; CHECK-NEXT: xvfmax.d $xr0, $xr0, $xr1 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %v0 = load <4 x double>, ptr %x + %v1 = load <4 x double>, ptr %y + %r = call <4 x double> @llvm.maxnum.v4f64(<4 x double> %v0, <4 x double> %v1) + store <4 x double> %r, ptr %res + ret void +} + +declare <8 x float> @llvm.minnum.v8f32(<8 x float>, <8 x float>) +declare <4 x double> @llvm.minnum.v4f64(<4 x double>, <4 x double>) +declare <8 x float> @llvm.maxnum.v8f32(<8 x float>, <8 x float>) +declare <4 x double> @llvm.maxnum.v4f64(<4 x double>, <4 x double>) diff --git a/llvm/test/CodeGen/LoongArch/lasx/fp-rounding.ll b/llvm/test/CodeGen/LoongArch/lasx/fp-rounding.ll new file mode 100644 index 0000000..fa5f27e --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/fp-rounding.ll @@ -0,0 +1,132 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6 +; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lasx < %s | FileCheck %s +; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s + +;; ceilf +define void @ceil_v8f32(ptr %res, ptr %a0) nounwind { +; CHECK-LABEL: ceil_v8f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvld $xr0, $a1, 0 +; CHECK-NEXT: xvfrintrp.s $xr0, $xr0 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %v0 = load <8 x float>, ptr %a0 + %r = call <8 x float> @llvm.ceil.v8f32(<8 x float> %v0) + store <8 x float> %r, ptr %res + ret void +} + +;; ceil +define void @ceil_v4f64(ptr %res, ptr %a0) nounwind { +; CHECK-LABEL: ceil_v4f64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvld $xr0, $a1, 0 +; CHECK-NEXT: xvfrintrp.d $xr0, $xr0 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %v0 = load <4 x double>, ptr %a0 + %r = call <4 x double> @llvm.ceil.v4f64(<4 x double> %v0) + store <4 x double> %r, ptr %res + ret void +} + +;; floorf +define void @floor_v8f32(ptr %res, ptr %a0) nounwind { +; CHECK-LABEL: floor_v8f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvld $xr0, $a1, 0 +; CHECK-NEXT: xvfrintrm.s $xr0, $xr0 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %v0 = load <8 x float>, ptr %a0 + %r = call <8 x float> @llvm.floor.v8f32(<8 x float> %v0) + store <8 x float> %r, ptr %res + ret void +} + +;; floor +define void @floor_v4f64(ptr %res, ptr %a0) nounwind { +; CHECK-LABEL: floor_v4f64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvld $xr0, $a1, 0 +; CHECK-NEXT: xvfrintrm.d $xr0, $xr0 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %v0 = load <4 x double>, ptr %a0 + %r = call <4 x double> @llvm.floor.v4f64(<4 x double> %v0) + store <4 x double> %r, ptr %res + ret void +} + +;; truncf +define void @trunc_v8f32(ptr %res, ptr %a0) nounwind { +; CHECK-LABEL: trunc_v8f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvld $xr0, $a1, 0 +; CHECK-NEXT: xvfrintrz.s $xr0, $xr0 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %v0 = load <8 x float>, ptr %a0 + %r = call <8 x float> @llvm.trunc.v8f32(<8 x float> %v0) + store <8 x float> %r, ptr %res + ret void +} + +;; trunc +define void @trunc_v4f64(ptr %res, ptr %a0) nounwind { +; CHECK-LABEL: trunc_v4f64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvld $xr0, $a1, 0 +; CHECK-NEXT: xvfrintrz.d $xr0, $xr0 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %v0 = load <4 x double>, ptr %a0 + %r = call <4 x double> @llvm.trunc.v4f64(<4 x double> %v0) + store <4 x double> %r, ptr %res + ret void +} + +;; roundevenf +define void @roundeven_v8f32(ptr %res, ptr %a0) nounwind { +; CHECK-LABEL: roundeven_v8f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvld $xr0, $a1, 0 +; CHECK-NEXT: xvfrintrne.s $xr0, $xr0 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %v0 = load <8 x float>, ptr %a0 + %r = call <8 x float> @llvm.roundeven.v8f32(<8 x float> %v0) + store <8 x float> %r, ptr %res + ret void +} + +;; roundeven +define void @roundeven_v4f64(ptr %res, ptr %a0) nounwind { +; CHECK-LABEL: roundeven_v4f64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvld $xr0, $a1, 0 +; CHECK-NEXT: xvfrintrne.d $xr0, $xr0 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %v0 = load <4 x double>, ptr %a0 + %r = call <4 x double> @llvm.roundeven.v4f64(<4 x double> %v0) + store <4 x double> %r, ptr %res + ret void +} + +declare <8 x float> @llvm.ceil.v8f32(<8 x float>) +declare <4 x double> @llvm.ceil.v4f64(<4 x double>) +declare <8 x float> @llvm.floor.v8f32(<8 x float>) +declare <4 x double> @llvm.floor.v4f64(<4 x double>) +declare <8 x float> @llvm.trunc.v8f32(<8 x float>) +declare <4 x double> @llvm.trunc.v4f64(<4 x double>) +declare <8 x float> @llvm.roundeven.v8f32(<8 x float>) +declare <4 x double> @llvm.roundeven.v4f64(<4 x double>) diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-conversion.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-conversion.ll new file mode 100644 index 0000000..006713c --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-conversion.ll @@ -0,0 +1,303 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lasx < %s | FileCheck %s +; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s + +declare <8 x float> @llvm.loongarch.lasx.cast.128.s(<4 x float>) + +define void @lasx_cast_128_s(ptr %vd, ptr %va) { +; CHECK-LABEL: lasx_cast_128_s: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vld $vr0, $a1, 0 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %a = load <4 x float>, ptr %va + %b = call <8 x float> @llvm.loongarch.lasx.cast.128.s(<4 x float> %a) + store <8 x float> %b, ptr %vd + ret void +} + +declare <4 x double> @llvm.loongarch.lasx.cast.128.d(<2 x double>) + +define void @lasx_cast_128_d(ptr %vd, ptr %va) { +; CHECK-LABEL: lasx_cast_128_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vld $vr0, $a1, 0 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %a = load <2 x double>, ptr %va + %b = call <4 x double> @llvm.loongarch.lasx.cast.128.d(<2 x double> %a) + store <4 x double> %b, ptr %vd + ret void +} + +declare <4 x i64> @llvm.loongarch.lasx.cast.128(<2 x i64>) + +define void @lasx_cast_128(ptr %vd, ptr %va) { +; CHECK-LABEL: lasx_cast_128: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vld $vr0, $a1, 0 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %a = load <2 x i64>, ptr %va + %b = call <4 x i64> @llvm.loongarch.lasx.cast.128(<2 x i64> %a) + store <4 x i64> %b, ptr %vd + ret void +} + +declare <8 x float> @llvm.loongarch.lasx.concat.128.s(<4 x float>, <4 x float>) + +define void @lasx_concat_128_s(ptr %vd, ptr %va, ptr %vb) { +; CHECK-LABEL: lasx_concat_128_s: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vld $vr0, $a1, 0 +; CHECK-NEXT: vld $vr1, $a2, 0 +; CHECK-NEXT: xvpermi.q $xr0, $xr1, 2 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %a = load <4 x float>, ptr %va + %b = load <4 x float>, ptr %vb + %c = call <8 x float> @llvm.loongarch.lasx.concat.128.s(<4 x float> %a, <4 x float> %b) + store <8 x float> %c, ptr %vd + ret void +} + +declare <4 x double> @llvm.loongarch.lasx.concat.128.d(<2 x double>, <2 x double>) + +define void @lasx_concat_128_d(ptr %vd, ptr %va, ptr %vb) { +; CHECK-LABEL: lasx_concat_128_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vld $vr0, $a1, 0 +; CHECK-NEXT: vld $vr1, $a2, 0 +; CHECK-NEXT: xvpermi.q $xr0, $xr1, 2 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %a = load <2 x double>, ptr %va + %b = load <2 x double>, ptr %vb + %c = call <4 x double> @llvm.loongarch.lasx.concat.128.d(<2 x double> %a, <2 x double> %b) + store <4 x double> %c, ptr %vd + ret void +} + +declare <4 x i64> @llvm.loongarch.lasx.concat.128(<2 x i64>, <2 x i64>) + +define void @lasx_concat_128(ptr %vd, ptr %va, ptr %vb) { +; CHECK-LABEL: lasx_concat_128: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vld $vr0, $a1, 0 +; CHECK-NEXT: vld $vr1, $a2, 0 +; CHECK-NEXT: xvpermi.q $xr0, $xr1, 2 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %a = load <2 x i64>, ptr %va + %b = load <2 x i64>, ptr %vb + %c = call <4 x i64> @llvm.loongarch.lasx.concat.128(<2 x i64> %a, <2 x i64> %b) + store <4 x i64> %c, ptr %vd + ret void +} + +declare <4 x float> @llvm.loongarch.lasx.extract.128.lo.s(<8 x float>) + +define void @lasx_extract_128_lo_s(ptr %vd, ptr %va) { +; CHECK-LABEL: lasx_extract_128_lo_s: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvld $xr0, $a1, 0 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %a = load <8 x float>, ptr %va + %c = call <4 x float> @llvm.loongarch.lasx.extract.128.lo.s(<8 x float> %a) + store <4 x float> %c, ptr %vd + ret void +} + +declare <2 x double> @llvm.loongarch.lasx.extract.128.lo.d(<4 x double>) + +define void @lasx_extract_128_lo_d(ptr %vd, ptr %va) { +; CHECK-LABEL: lasx_extract_128_lo_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvld $xr0, $a1, 0 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %a = load <4 x double>, ptr %va + %c = call <2 x double> @llvm.loongarch.lasx.extract.128.lo.d(<4 x double> %a) + store <2 x double> %c, ptr %vd + ret void +} + +declare <2 x i64> @llvm.loongarch.lasx.extract.128.lo(<4 x i64>) + +define void @lasx_extract_128_lo(ptr %vd, ptr %va) { +; CHECK-LABEL: lasx_extract_128_lo: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvld $xr0, $a1, 0 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %a = load <4 x i64>, ptr %va + %c = call <2 x i64> @llvm.loongarch.lasx.extract.128.lo(<4 x i64> %a) + store <2 x i64> %c, ptr %vd + ret void +} + +declare <4 x float> @llvm.loongarch.lasx.extract.128.hi.s(<8 x float>) + +define void @lasx_extract_128_hi_s(ptr %vd, ptr %va) { +; CHECK-LABEL: lasx_extract_128_hi_s: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvld $xr0, $a1, 0 +; CHECK-NEXT: xvpermi.q $xr0, $xr0, 1 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %a = load <8 x float>, ptr %va + %c = call <4 x float> @llvm.loongarch.lasx.extract.128.hi.s(<8 x float> %a) + store <4 x float> %c, ptr %vd + ret void +} + +declare <2 x double> @llvm.loongarch.lasx.extract.128.hi.d(<4 x double>) + +define void @lasx_extract_128_hi_d(ptr %vd, ptr %va) { +; CHECK-LABEL: lasx_extract_128_hi_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvld $xr0, $a1, 0 +; CHECK-NEXT: xvpermi.q $xr0, $xr0, 1 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %a = load <4 x double>, ptr %va + %c = call <2 x double> @llvm.loongarch.lasx.extract.128.hi.d(<4 x double> %a) + store <2 x double> %c, ptr %vd + ret void +} + +declare <2 x i64> @llvm.loongarch.lasx.extract.128.hi(<4 x i64>) + +define void @lasx_extract_128_hi(ptr %vd, ptr %va) { +; CHECK-LABEL: lasx_extract_128_hi: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvld $xr0, $a1, 0 +; CHECK-NEXT: xvpermi.q $xr0, $xr0, 1 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %a = load <4 x i64>, ptr %va + %c = call <2 x i64> @llvm.loongarch.lasx.extract.128.hi(<4 x i64> %a) + store <2 x i64> %c, ptr %vd + ret void +} + +declare <8 x float> @llvm.loongarch.lasx.insert.128.lo.s(<8 x float>, <4 x float>) + +define void @lasx_insert_128_lo_s(ptr %vd, ptr %va, ptr %vb) { +; CHECK-LABEL: lasx_insert_128_lo_s: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvld $xr0, $a1, 0 +; CHECK-NEXT: vld $vr1, $a2, 0 +; CHECK-NEXT: xvpermi.q $xr0, $xr1, 48 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %a = load <8 x float>, ptr %va + %b = load <4 x float>, ptr %vb + %c = call <8 x float> @llvm.loongarch.lasx.insert.128.lo.s(<8 x float> %a, <4 x float> %b) + store <8 x float> %c, ptr %vd + ret void +} + +declare <4 x double> @llvm.loongarch.lasx.insert.128.lo.d(<4 x double>, <2 x double>) + +define void @lasx_insert_128_lo_d(ptr %vd, ptr %va, ptr %vb) { +; CHECK-LABEL: lasx_insert_128_lo_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvld $xr0, $a1, 0 +; CHECK-NEXT: vld $vr1, $a2, 0 +; CHECK-NEXT: xvpermi.q $xr0, $xr1, 48 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %a = load <4 x double>, ptr %va + %b = load <2 x double>, ptr %vb + %c = call <4 x double> @llvm.loongarch.lasx.insert.128.lo.d(<4 x double> %a, <2 x double> %b) + store <4 x double> %c, ptr %vd + ret void +} + +declare <4 x i64> @llvm.loongarch.lasx.insert.128.lo(<4 x i64>, <2 x i64>) + +define void @lasx_insert_128_lo(ptr %vd, ptr %va, ptr %vb) { +; CHECK-LABEL: lasx_insert_128_lo: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvld $xr0, $a1, 0 +; CHECK-NEXT: vld $vr1, $a2, 0 +; CHECK-NEXT: xvpermi.q $xr0, $xr1, 48 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %a = load <4 x i64>, ptr %va + %b = load <2 x i64>, ptr %vb + %c = call <4 x i64> @llvm.loongarch.lasx.insert.128.lo(<4 x i64> %a, <2 x i64> %b) + store <4 x i64> %c, ptr %vd + ret void +} + +declare <8 x float> @llvm.loongarch.lasx.insert.128.hi.s(<8 x float>, <4 x float>) + +define void @lasx_insert_128_hi_s(ptr %vd, ptr %va, ptr %vb) { +; CHECK-LABEL: lasx_insert_128_hi_s: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvld $xr0, $a1, 0 +; CHECK-NEXT: vld $vr1, $a2, 0 +; CHECK-NEXT: xvpermi.q $xr0, $xr1, 2 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %a = load <8 x float>, ptr %va + %b = load <4 x float>, ptr %vb + %c = call <8 x float> @llvm.loongarch.lasx.insert.128.hi.s(<8 x float> %a, <4 x float> %b) + store <8 x float> %c, ptr %vd + ret void +} + +declare <4 x double> @llvm.loongarch.lasx.insert.128.hi.d(<4 x double>, <2 x double>) + +define void @lasx_insert_128_hi_d(ptr %vd, ptr %va, ptr %vb) { +; CHECK-LABEL: lasx_insert_128_hi_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvld $xr0, $a1, 0 +; CHECK-NEXT: vld $vr1, $a2, 0 +; CHECK-NEXT: xvpermi.q $xr0, $xr1, 2 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %a = load <4 x double>, ptr %va + %b = load <2 x double>, ptr %vb + %c = call <4 x double> @llvm.loongarch.lasx.insert.128.hi.d(<4 x double> %a, <2 x double> %b) + store <4 x double> %c, ptr %vd + ret void +} + +declare <4 x i64> @llvm.loongarch.lasx.insert.128.hi(<4 x i64>, <2 x i64>) + +define void @lasx_insert_128_hi(ptr %vd, ptr %va, ptr %vb) { +; CHECK-LABEL: lasx_insert_128_hi: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvld $xr0, $a1, 0 +; CHECK-NEXT: vld $vr1, $a2, 0 +; CHECK-NEXT: xvpermi.q $xr0, $xr1, 2 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %a = load <4 x i64>, ptr %va + %b = load <2 x i64>, ptr %vb + %c = call <4 x i64> @llvm.loongarch.lasx.insert.128.hi(<4 x i64> %a, <2 x i64> %b) + store <4 x i64> %c, ptr %vd + ret void +} diff --git a/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/avg.ll b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/avg.ll new file mode 100644 index 0000000..5c5c199 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/avg.ll @@ -0,0 +1,321 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6 +; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lasx < %s | FileCheck %s --check-prefixes=CHECK,LA32 +; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s --check-prefixes=CHECK,LA64 + +define void @xvavg_b(ptr %res, ptr %a, ptr %b) nounwind { +; CHECK-LABEL: xvavg_b: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvld $xr0, $a1, 0 +; CHECK-NEXT: xvld $xr1, $a2, 0 +; CHECK-NEXT: xvavg.b $xr0, $xr0, $xr1 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %va = load <32 x i8>, ptr %a + %vb = load <32 x i8>, ptr %b + %add = add <32 x i8> %va, %vb + %shr = ashr <32 x i8> %add, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1> + store <32 x i8> %shr, ptr %res + ret void +} + +define void @xvavg_h(ptr %res, ptr %a, ptr %b) nounwind { +; CHECK-LABEL: xvavg_h: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvld $xr0, $a1, 0 +; CHECK-NEXT: xvld $xr1, $a2, 0 +; CHECK-NEXT: xvavg.h $xr0, $xr0, $xr1 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %va = load <16 x i16>, ptr %a + %vb = load <16 x i16>, ptr %b + %add = add <16 x i16> %va, %vb + %shr = ashr <16 x i16> %add, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1> + store <16 x i16> %shr, ptr %res + ret void +} + +define void @xvavg_w(ptr %res, ptr %a, ptr %b) nounwind { +; CHECK-LABEL: xvavg_w: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvld $xr0, $a1, 0 +; CHECK-NEXT: xvld $xr1, $a2, 0 +; CHECK-NEXT: xvavg.w $xr0, $xr0, $xr1 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %va = load <8 x i32>, ptr %a + %vb = load <8 x i32>, ptr %b + %add = add <8 x i32> %va, %vb + %shr = ashr <8 x i32> %add, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1> + store <8 x i32> %shr, ptr %res + ret void +} + +define void @xvavg_d(ptr %res, ptr %a, ptr %b) nounwind { +; LA32-LABEL: xvavg_d: +; LA32: # %bb.0: # %entry +; LA32-NEXT: xvld $xr0, $a1, 0 +; LA32-NEXT: xvld $xr1, $a2, 0 +; LA32-NEXT: xvadd.d $xr0, $xr0, $xr1 +; LA32-NEXT: xvsrai.d $xr0, $xr0, 1 +; LA32-NEXT: xvst $xr0, $a0, 0 +; LA32-NEXT: ret +; +; LA64-LABEL: xvavg_d: +; LA64: # %bb.0: # %entry +; LA64-NEXT: xvld $xr0, $a1, 0 +; LA64-NEXT: xvld $xr1, $a2, 0 +; LA64-NEXT: xvavg.d $xr0, $xr0, $xr1 +; LA64-NEXT: xvst $xr0, $a0, 0 +; LA64-NEXT: ret +entry: + %va = load <4 x i64>, ptr %a + %vb = load <4 x i64>, ptr %b + %add = add <4 x i64> %va, %vb + %shr = ashr <4 x i64> %add, <i64 1, i64 1, i64 1, i64 1> + store <4 x i64> %shr, ptr %res + ret void +} + +define void @xvavg_bu(ptr %res, ptr %a, ptr %b) nounwind { +; CHECK-LABEL: xvavg_bu: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvld $xr0, $a1, 0 +; CHECK-NEXT: xvld $xr1, $a2, 0 +; CHECK-NEXT: xvavg.bu $xr0, $xr0, $xr1 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %va = load <32 x i8>, ptr %a + %vb = load <32 x i8>, ptr %b + %add = add <32 x i8> %va, %vb + %shr = lshr <32 x i8> %add, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1> + store <32 x i8> %shr, ptr %res + ret void +} + +define void @xvavg_hu(ptr %res, ptr %a, ptr %b) nounwind { +; CHECK-LABEL: xvavg_hu: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvld $xr0, $a1, 0 +; CHECK-NEXT: xvld $xr1, $a2, 0 +; CHECK-NEXT: xvavg.hu $xr0, $xr0, $xr1 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %va = load <16 x i16>, ptr %a + %vb = load <16 x i16>, ptr %b + %add = add <16 x i16> %va, %vb + %shr = lshr <16 x i16> %add, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1> + store <16 x i16> %shr, ptr %res + ret void +} + +define void @xvavg_wu(ptr %res, ptr %a, ptr %b) nounwind { +; CHECK-LABEL: xvavg_wu: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvld $xr0, $a1, 0 +; CHECK-NEXT: xvld $xr1, $a2, 0 +; CHECK-NEXT: xvavg.wu $xr0, $xr0, $xr1 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %va = load <8 x i32>, ptr %a + %vb = load <8 x i32>, ptr %b + %add = add <8 x i32> %va, %vb + %shr = lshr <8 x i32> %add, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1> + store <8 x i32> %shr, ptr %res + ret void +} + +define void @xvavg_du(ptr %res, ptr %a, ptr %b) nounwind { +; LA32-LABEL: xvavg_du: +; LA32: # %bb.0: # %entry +; LA32-NEXT: xvld $xr0, $a1, 0 +; LA32-NEXT: xvld $xr1, $a2, 0 +; LA32-NEXT: xvadd.d $xr0, $xr0, $xr1 +; LA32-NEXT: xvsrli.d $xr0, $xr0, 1 +; LA32-NEXT: xvst $xr0, $a0, 0 +; LA32-NEXT: ret +; +; LA64-LABEL: xvavg_du: +; LA64: # %bb.0: # %entry +; LA64-NEXT: xvld $xr0, $a1, 0 +; LA64-NEXT: xvld $xr1, $a2, 0 +; LA64-NEXT: xvavg.du $xr0, $xr0, $xr1 +; LA64-NEXT: xvst $xr0, $a0, 0 +; LA64-NEXT: ret +entry: + %va = load <4 x i64>, ptr %a + %vb = load <4 x i64>, ptr %b + %add = add <4 x i64> %va, %vb + %shr = lshr <4 x i64> %add, <i64 1, i64 1, i64 1, i64 1> + store <4 x i64> %shr, ptr %res + ret void +} + +define void @xvavgr_b(ptr %res, ptr %a, ptr %b) nounwind { +; CHECK-LABEL: xvavgr_b: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvld $xr0, $a1, 0 +; CHECK-NEXT: xvld $xr1, $a2, 0 +; CHECK-NEXT: xvavgr.b $xr0, $xr0, $xr1 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %va = load <32 x i8>, ptr %a + %vb = load <32 x i8>, ptr %b + %add = add <32 x i8> %va, %vb + %add1 = add <32 x i8> %add, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1> + %shr = ashr <32 x i8> %add1, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1> + store <32 x i8> %shr, ptr %res + ret void +} + +define void @xvavgr_h(ptr %res, ptr %a, ptr %b) nounwind { +; CHECK-LABEL: xvavgr_h: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvld $xr0, $a1, 0 +; CHECK-NEXT: xvld $xr1, $a2, 0 +; CHECK-NEXT: xvavgr.h $xr0, $xr0, $xr1 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %va = load <16 x i16>, ptr %a + %vb = load <16 x i16>, ptr %b + %add = add <16 x i16> %va, %vb + %add1 = add <16 x i16> %add, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1> + %shr = ashr <16 x i16> %add1, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1> + store <16 x i16> %shr, ptr %res + ret void +} + +define void @xvavgr_w(ptr %res, ptr %a, ptr %b) nounwind { +; CHECK-LABEL: xvavgr_w: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvld $xr0, $a1, 0 +; CHECK-NEXT: xvld $xr1, $a2, 0 +; CHECK-NEXT: xvavgr.w $xr0, $xr0, $xr1 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %va = load <8 x i32>, ptr %a + %vb = load <8 x i32>, ptr %b + %add = add <8 x i32> %va, %vb + %add1 = add <8 x i32> %add, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1> + %shr = ashr <8 x i32> %add1, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1> + store <8 x i32> %shr, ptr %res + ret void +} + +define void @xvavgr_d(ptr %res, ptr %a, ptr %b) nounwind { +; LA32-LABEL: xvavgr_d: +; LA32: # %bb.0: # %entry +; LA32-NEXT: xvld $xr0, $a1, 0 +; LA32-NEXT: xvld $xr1, $a2, 0 +; LA32-NEXT: xvadd.d $xr0, $xr0, $xr1 +; LA32-NEXT: xvaddi.du $xr0, $xr0, 1 +; LA32-NEXT: xvsrai.d $xr0, $xr0, 1 +; LA32-NEXT: xvst $xr0, $a0, 0 +; LA32-NEXT: ret +; +; LA64-LABEL: xvavgr_d: +; LA64: # %bb.0: # %entry +; LA64-NEXT: xvld $xr0, $a1, 0 +; LA64-NEXT: xvld $xr1, $a2, 0 +; LA64-NEXT: xvavgr.d $xr0, $xr0, $xr1 +; LA64-NEXT: xvst $xr0, $a0, 0 +; LA64-NEXT: ret +entry: + %va = load <4 x i64>, ptr %a + %vb = load <4 x i64>, ptr %b + %add = add <4 x i64> %va, %vb + %add1 = add <4 x i64> %add, <i64 1, i64 1, i64 1, i64 1> + %shr = ashr <4 x i64> %add1, <i64 1, i64 1, i64 1, i64 1> + store <4 x i64> %shr, ptr %res + ret void +} + +define void @xvavgr_bu(ptr %res, ptr %a, ptr %b) nounwind { +; CHECK-LABEL: xvavgr_bu: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvld $xr0, $a1, 0 +; CHECK-NEXT: xvld $xr1, $a2, 0 +; CHECK-NEXT: xvavgr.bu $xr0, $xr0, $xr1 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %va = load <32 x i8>, ptr %a + %vb = load <32 x i8>, ptr %b + %add = add <32 x i8> %va, %vb + %add1 = add <32 x i8> %add, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1> + %shr = lshr <32 x i8> %add1, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1> + store <32 x i8> %shr, ptr %res + ret void +} + +define void @xvavgr_hu(ptr %res, ptr %a, ptr %b) nounwind { +; CHECK-LABEL: xvavgr_hu: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvld $xr0, $a1, 0 +; CHECK-NEXT: xvld $xr1, $a2, 0 +; CHECK-NEXT: xvavgr.hu $xr0, $xr0, $xr1 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %va = load <16 x i16>, ptr %a + %vb = load <16 x i16>, ptr %b + %add = add <16 x i16> %va, %vb + %add1 = add <16 x i16> %add, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1> + %shr = lshr <16 x i16> %add1, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1> + store <16 x i16> %shr, ptr %res + ret void +} + +define void @xvavgr_wu(ptr %res, ptr %a, ptr %b) nounwind { +; CHECK-LABEL: xvavgr_wu: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvld $xr0, $a1, 0 +; CHECK-NEXT: xvld $xr1, $a2, 0 +; CHECK-NEXT: xvavgr.wu $xr0, $xr0, $xr1 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %va = load <8 x i32>, ptr %a + %vb = load <8 x i32>, ptr %b + %add = add <8 x i32> %va, %vb + %add1 = add <8 x i32> %add, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1> + %shr = lshr <8 x i32> %add1, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1> + store <8 x i32> %shr, ptr %res + ret void +} + +define void @xvavgr_du(ptr %res, ptr %a, ptr %b) nounwind { +; LA32-LABEL: xvavgr_du: +; LA32: # %bb.0: # %entry +; LA32-NEXT: xvld $xr0, $a1, 0 +; LA32-NEXT: xvld $xr1, $a2, 0 +; LA32-NEXT: xvadd.d $xr0, $xr0, $xr1 +; LA32-NEXT: xvaddi.du $xr0, $xr0, 1 +; LA32-NEXT: xvsrli.d $xr0, $xr0, 1 +; LA32-NEXT: xvst $xr0, $a0, 0 +; LA32-NEXT: ret +; +; LA64-LABEL: xvavgr_du: +; LA64: # %bb.0: # %entry +; LA64-NEXT: xvld $xr0, $a1, 0 +; LA64-NEXT: xvld $xr1, $a2, 0 +; LA64-NEXT: xvavgr.du $xr0, $xr0, $xr1 +; LA64-NEXT: xvst $xr0, $a0, 0 +; LA64-NEXT: ret +entry: + %va = load <4 x i64>, ptr %a + %vb = load <4 x i64>, ptr %b + %add = add <4 x i64> %va, %vb + %add1 = add <4 x i64> %add, <i64 1, i64 1, i64 1, i64 1> + %shr = lshr <4 x i64> %add1, <i64 1, i64 1, i64 1, i64 1> + store <4 x i64> %shr, ptr %res + ret void +} diff --git a/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/avgfloor-ceil.ll b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/avgfloor-ceil.ll new file mode 100644 index 0000000..c82adcb --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/avgfloor-ceil.ll @@ -0,0 +1,379 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6 +; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lasx < %s | FileCheck %s +; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s + +define void @xvavg_b(ptr %res, ptr %a, ptr %b) nounwind { +; CHECK-LABEL: xvavg_b: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvld $xr0, $a1, 0 +; CHECK-NEXT: xvld $xr1, $a2, 0 +; CHECK-NEXT: xvand.v $xr2, $xr0, $xr1 +; CHECK-NEXT: xvxor.v $xr0, $xr0, $xr1 +; CHECK-NEXT: xvsrai.b $xr0, $xr0, 1 +; CHECK-NEXT: xvadd.b $xr0, $xr2, $xr0 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %va = load <32 x i8>, ptr %a + %vb = load <32 x i8>, ptr %b + %ea = sext <32 x i8> %va to <32 x i16> + %eb = sext <32 x i8> %vb to <32 x i16> + %add = add <32 x i16> %ea, %eb + %shr = lshr <32 x i16> %add, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1> + %r = trunc <32 x i16> %shr to <32 x i8> + store <32 x i8> %r, ptr %res + ret void +} + +define void @xvavg_h(ptr %res, ptr %a, ptr %b) nounwind { +; CHECK-LABEL: xvavg_h: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvld $xr0, $a1, 0 +; CHECK-NEXT: xvld $xr1, $a2, 0 +; CHECK-NEXT: xvand.v $xr2, $xr0, $xr1 +; CHECK-NEXT: xvxor.v $xr0, $xr0, $xr1 +; CHECK-NEXT: xvsrai.h $xr0, $xr0, 1 +; CHECK-NEXT: xvadd.h $xr0, $xr2, $xr0 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %va = load <16 x i16>, ptr %a + %vb = load <16 x i16>, ptr %b + %ea = sext <16 x i16> %va to <16 x i32> + %eb = sext <16 x i16> %vb to <16 x i32> + %add = add <16 x i32> %ea, %eb + %shr = lshr <16 x i32> %add, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1> + %r = trunc <16 x i32> %shr to <16 x i16> + store <16 x i16> %r, ptr %res + ret void +} + +define void @xvavg_w(ptr %res, ptr %a, ptr %b) nounwind { +; CHECK-LABEL: xvavg_w: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvld $xr0, $a1, 0 +; CHECK-NEXT: xvld $xr1, $a2, 0 +; CHECK-NEXT: xvand.v $xr2, $xr0, $xr1 +; CHECK-NEXT: xvxor.v $xr0, $xr0, $xr1 +; CHECK-NEXT: xvsrai.w $xr0, $xr0, 1 +; CHECK-NEXT: xvadd.w $xr0, $xr2, $xr0 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %va = load <8 x i32>, ptr %a + %vb = load <8 x i32>, ptr %b + %ea = sext <8 x i32> %va to <8 x i64> + %eb = sext <8 x i32> %vb to <8 x i64> + %add = add <8 x i64> %ea, %eb + %shr = lshr <8 x i64> %add, <i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1> + %r = trunc <8 x i64> %shr to <8 x i32> + store <8 x i32> %r, ptr %res + ret void +} + +define void @xvavg_d(ptr %res, ptr %a, ptr %b) nounwind { +; CHECK-LABEL: xvavg_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvld $xr0, $a1, 0 +; CHECK-NEXT: xvld $xr1, $a2, 0 +; CHECK-NEXT: xvand.v $xr2, $xr0, $xr1 +; CHECK-NEXT: xvxor.v $xr0, $xr0, $xr1 +; CHECK-NEXT: xvsrai.d $xr0, $xr0, 1 +; CHECK-NEXT: xvadd.d $xr0, $xr2, $xr0 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %va = load <4 x i64>, ptr %a + %vb = load <4 x i64>, ptr %b + %ea = sext <4 x i64> %va to <4 x i128> + %eb = sext <4 x i64> %vb to <4 x i128> + %add = add <4 x i128> %ea, %eb + %shr = lshr <4 x i128> %add, <i128 1, i128 1, i128 1, i128 1> + %r = trunc <4 x i128> %shr to <4 x i64> + store <4 x i64> %r, ptr %res + ret void +} + +define void @xvavg_bu(ptr %res, ptr %a, ptr %b) nounwind { +; CHECK-LABEL: xvavg_bu: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvld $xr0, $a1, 0 +; CHECK-NEXT: xvld $xr1, $a2, 0 +; CHECK-NEXT: xvand.v $xr2, $xr0, $xr1 +; CHECK-NEXT: xvxor.v $xr0, $xr0, $xr1 +; CHECK-NEXT: xvsrli.b $xr0, $xr0, 1 +; CHECK-NEXT: xvadd.b $xr0, $xr2, $xr0 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %va = load <32 x i8>, ptr %a + %vb = load <32 x i8>, ptr %b + %ea = zext <32 x i8> %va to <32 x i16> + %eb = zext <32 x i8> %vb to <32 x i16> + %add = add <32 x i16> %ea, %eb + %shr = lshr <32 x i16> %add, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1> + %r = trunc <32 x i16> %shr to <32 x i8> + store <32 x i8> %r, ptr %res + ret void +} + +define void @xvavg_hu(ptr %res, ptr %a, ptr %b) nounwind { +; CHECK-LABEL: xvavg_hu: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvld $xr0, $a1, 0 +; CHECK-NEXT: xvld $xr1, $a2, 0 +; CHECK-NEXT: xvand.v $xr2, $xr0, $xr1 +; CHECK-NEXT: xvxor.v $xr0, $xr0, $xr1 +; CHECK-NEXT: xvsrli.h $xr0, $xr0, 1 +; CHECK-NEXT: xvadd.h $xr0, $xr2, $xr0 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %va = load <16 x i16>, ptr %a + %vb = load <16 x i16>, ptr %b + %ea = zext <16 x i16> %va to <16 x i32> + %eb = zext <16 x i16> %vb to <16 x i32> + %add = add <16 x i32> %ea, %eb + %shr = lshr <16 x i32> %add, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1> + %r = trunc <16 x i32> %shr to <16 x i16> + store <16 x i16> %r, ptr %res + ret void +} + +define void @xvavg_wu(ptr %res, ptr %a, ptr %b) nounwind { +; CHECK-LABEL: xvavg_wu: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvld $xr0, $a1, 0 +; CHECK-NEXT: xvld $xr1, $a2, 0 +; CHECK-NEXT: xvand.v $xr2, $xr0, $xr1 +; CHECK-NEXT: xvxor.v $xr0, $xr0, $xr1 +; CHECK-NEXT: xvsrli.w $xr0, $xr0, 1 +; CHECK-NEXT: xvadd.w $xr0, $xr2, $xr0 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %va = load <8 x i32>, ptr %a + %vb = load <8 x i32>, ptr %b + %ea = zext <8 x i32> %va to <8 x i64> + %eb = zext <8 x i32> %vb to <8 x i64> + %add = add <8 x i64> %ea, %eb + %shr = lshr <8 x i64> %add, <i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1> + %r = trunc <8 x i64> %shr to <8 x i32> + store <8 x i32> %r, ptr %res + ret void +} + +define void @xvavg_du(ptr %res, ptr %a, ptr %b) nounwind { +; CHECK-LABEL: xvavg_du: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvld $xr0, $a1, 0 +; CHECK-NEXT: xvld $xr1, $a2, 0 +; CHECK-NEXT: xvand.v $xr2, $xr0, $xr1 +; CHECK-NEXT: xvxor.v $xr0, $xr0, $xr1 +; CHECK-NEXT: xvsrli.d $xr0, $xr0, 1 +; CHECK-NEXT: xvadd.d $xr0, $xr2, $xr0 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %va = load <4 x i64>, ptr %a + %vb = load <4 x i64>, ptr %b + %ea = zext <4 x i64> %va to <4 x i128> + %eb = zext <4 x i64> %vb to <4 x i128> + %add = add <4 x i128> %ea, %eb + %shr = lshr <4 x i128> %add, <i128 1, i128 1, i128 1, i128 1> + %r = trunc <4 x i128> %shr to <4 x i64> + store <4 x i64> %r, ptr %res + ret void +} + +define void @xvavgr_b(ptr %res, ptr %a, ptr %b) nounwind { +; CHECK-LABEL: xvavgr_b: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvld $xr0, $a1, 0 +; CHECK-NEXT: xvld $xr1, $a2, 0 +; CHECK-NEXT: xvor.v $xr2, $xr0, $xr1 +; CHECK-NEXT: xvxor.v $xr0, $xr0, $xr1 +; CHECK-NEXT: xvsrai.b $xr0, $xr0, 1 +; CHECK-NEXT: xvsub.b $xr0, $xr2, $xr0 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %va = load <32 x i8>, ptr %a + %vb = load <32 x i8>, ptr %b + %ea = sext <32 x i8> %va to <32 x i16> + %eb = sext <32 x i8> %vb to <32 x i16> + %add = add <32 x i16> %ea, %eb + %add1 = add <32 x i16> %add, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1> + %shr = lshr <32 x i16> %add1, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1> + %r = trunc <32 x i16> %shr to <32 x i8> + store <32 x i8> %r, ptr %res + ret void +} + +define void @xvavgr_h(ptr %res, ptr %a, ptr %b) nounwind { +; CHECK-LABEL: xvavgr_h: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvld $xr0, $a1, 0 +; CHECK-NEXT: xvld $xr1, $a2, 0 +; CHECK-NEXT: xvor.v $xr2, $xr0, $xr1 +; CHECK-NEXT: xvxor.v $xr0, $xr0, $xr1 +; CHECK-NEXT: xvsrai.h $xr0, $xr0, 1 +; CHECK-NEXT: xvsub.h $xr0, $xr2, $xr0 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %va = load <16 x i16>, ptr %a + %vb = load <16 x i16>, ptr %b + %ea = sext <16 x i16> %va to <16 x i32> + %eb = sext <16 x i16> %vb to <16 x i32> + %add = add <16 x i32> %ea, %eb + %add1 = add <16 x i32> %add, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1> + %shr = lshr <16 x i32> %add1, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1> + %r = trunc <16 x i32> %shr to <16 x i16> + store <16 x i16> %r, ptr %res + ret void +} + +define void @xvavgr_w(ptr %res, ptr %a, ptr %b) nounwind { +; CHECK-LABEL: xvavgr_w: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvld $xr0, $a1, 0 +; CHECK-NEXT: xvld $xr1, $a2, 0 +; CHECK-NEXT: xvor.v $xr2, $xr0, $xr1 +; CHECK-NEXT: xvxor.v $xr0, $xr0, $xr1 +; CHECK-NEXT: xvsrai.w $xr0, $xr0, 1 +; CHECK-NEXT: xvsub.w $xr0, $xr2, $xr0 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %va = load <8 x i32>, ptr %a + %vb = load <8 x i32>, ptr %b + %ea = sext <8 x i32> %va to <8 x i64> + %eb = sext <8 x i32> %vb to <8 x i64> + %add = add <8 x i64> %ea, %eb + %add1 = add <8 x i64> %add, <i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1> + %shr = lshr <8 x i64> %add1, <i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1> + %r = trunc <8 x i64> %shr to <8 x i32> + store <8 x i32> %r, ptr %res + ret void +} + +define void @xvavgr_d(ptr %res, ptr %a, ptr %b) nounwind { +; CHECK-LABEL: xvavgr_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvld $xr0, $a1, 0 +; CHECK-NEXT: xvld $xr1, $a2, 0 +; CHECK-NEXT: xvor.v $xr2, $xr0, $xr1 +; CHECK-NEXT: xvxor.v $xr0, $xr0, $xr1 +; CHECK-NEXT: xvsrai.d $xr0, $xr0, 1 +; CHECK-NEXT: xvsub.d $xr0, $xr2, $xr0 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %va = load <4 x i64>, ptr %a + %vb = load <4 x i64>, ptr %b + %ea = sext <4 x i64> %va to <4 x i128> + %eb = sext <4 x i64> %vb to <4 x i128> + %add = add <4 x i128> %ea, %eb + %add1 = add <4 x i128> %add, <i128 1, i128 1, i128 1, i128 1> + %shr = lshr <4 x i128> %add1, <i128 1, i128 1, i128 1, i128 1> + %r = trunc <4 x i128> %shr to <4 x i64> + store <4 x i64> %r, ptr %res + ret void +} + +define void @xvavgr_bu(ptr %res, ptr %a, ptr %b) nounwind { +; CHECK-LABEL: xvavgr_bu: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvld $xr0, $a1, 0 +; CHECK-NEXT: xvld $xr1, $a2, 0 +; CHECK-NEXT: xvor.v $xr2, $xr0, $xr1 +; CHECK-NEXT: xvxor.v $xr0, $xr0, $xr1 +; CHECK-NEXT: xvsrli.b $xr0, $xr0, 1 +; CHECK-NEXT: xvsub.b $xr0, $xr2, $xr0 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %va = load <32 x i8>, ptr %a + %vb = load <32 x i8>, ptr %b + %ea = zext <32 x i8> %va to <32 x i16> + %eb = zext <32 x i8> %vb to <32 x i16> + %add = add <32 x i16> %ea, %eb + %add1 = add <32 x i16> %add, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1> + %shr = lshr <32 x i16> %add1, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1> + %r = trunc <32 x i16> %shr to <32 x i8> + store <32 x i8> %r, ptr %res + ret void +} + +define void @xvavgr_hu(ptr %res, ptr %a, ptr %b) nounwind { +; CHECK-LABEL: xvavgr_hu: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvld $xr0, $a1, 0 +; CHECK-NEXT: xvld $xr1, $a2, 0 +; CHECK-NEXT: xvor.v $xr2, $xr0, $xr1 +; CHECK-NEXT: xvxor.v $xr0, $xr0, $xr1 +; CHECK-NEXT: xvsrli.h $xr0, $xr0, 1 +; CHECK-NEXT: xvsub.h $xr0, $xr2, $xr0 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %va = load <16 x i16>, ptr %a + %vb = load <16 x i16>, ptr %b + %ea = zext <16 x i16> %va to <16 x i32> + %eb = zext <16 x i16> %vb to <16 x i32> + %add = add <16 x i32> %ea, %eb + %add1 = add <16 x i32> %add, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1> + %shr = lshr <16 x i32> %add1, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1> + %r = trunc <16 x i32> %shr to <16 x i16> + store <16 x i16> %r, ptr %res + ret void +} + +define void @xvavgr_wu(ptr %res, ptr %a, ptr %b) nounwind { +; CHECK-LABEL: xvavgr_wu: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvld $xr0, $a1, 0 +; CHECK-NEXT: xvld $xr1, $a2, 0 +; CHECK-NEXT: xvor.v $xr2, $xr0, $xr1 +; CHECK-NEXT: xvxor.v $xr0, $xr0, $xr1 +; CHECK-NEXT: xvsrli.w $xr0, $xr0, 1 +; CHECK-NEXT: xvsub.w $xr0, $xr2, $xr0 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %va = load <8 x i32>, ptr %a + %vb = load <8 x i32>, ptr %b + %ea = zext <8 x i32> %va to <8 x i64> + %eb = zext <8 x i32> %vb to <8 x i64> + %add = add <8 x i64> %ea, %eb + %add1 = add <8 x i64> %add, <i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1> + %shr = lshr <8 x i64> %add1, <i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1> + %r = trunc <8 x i64> %shr to <8 x i32> + store <8 x i32> %r, ptr %res + ret void +} + +define void @xvavgr_du(ptr %res, ptr %a, ptr %b) nounwind { +; CHECK-LABEL: xvavgr_du: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvld $xr0, $a1, 0 +; CHECK-NEXT: xvld $xr1, $a2, 0 +; CHECK-NEXT: xvor.v $xr2, $xr0, $xr1 +; CHECK-NEXT: xvxor.v $xr0, $xr0, $xr1 +; CHECK-NEXT: xvsrli.d $xr0, $xr0, 1 +; CHECK-NEXT: xvsub.d $xr0, $xr2, $xr0 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %va = load <4 x i64>, ptr %a + %vb = load <4 x i64>, ptr %b + %ea = zext <4 x i64> %va to <4 x i128> + %eb = zext <4 x i64> %vb to <4 x i128> + %add = add <4 x i128> %ea, %eb + %add1 = add <4 x i128> %add, <i128 1, i128 1, i128 1, i128 1> + %shr = lshr <4 x i128> %add1, <i128 1, i128 1, i128 1, i128 1> + %r = trunc <4 x i128> %shr to <4 x i64> + store <4 x i64> %r, ptr %res + ret void +} diff --git a/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/flog2.ll b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/flog2.ll index 68f2e3a..6b5f575 100644 --- a/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/flog2.ll +++ b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/flog2.ll @@ -1,166 +1,17 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6 -; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lasx < %s | FileCheck %s --check-prefix=LA32 -; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s --check-prefix=LA64 +; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lasx < %s | FileCheck %s +; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s declare <8 x float> @llvm.log2.v8f32(<8 x float>) declare <4 x double> @llvm.log2.v4f64(<4 x double>) define void @flog2_v8f32(ptr %res, ptr %a) nounwind { -; LA32-LABEL: flog2_v8f32: -; LA32: # %bb.0: # %entry -; LA32-NEXT: addi.w $sp, $sp, -128 -; LA32-NEXT: st.w $ra, $sp, 124 # 4-byte Folded Spill -; LA32-NEXT: st.w $fp, $sp, 120 # 4-byte Folded Spill -; LA32-NEXT: xvld $xr0, $a1, 0 -; LA32-NEXT: xvst $xr0, $sp, 80 # 32-byte Folded Spill -; LA32-NEXT: move $fp, $a0 -; LA32-NEXT: xvpickve.w $xr0, $xr0, 5 -; LA32-NEXT: # kill: def $f0 killed $f0 killed $xr0 -; LA32-NEXT: bl log2f -; LA32-NEXT: # kill: def $f0 killed $f0 def $vr0 -; LA32-NEXT: vst $vr0, $sp, 48 # 16-byte Folded Spill -; LA32-NEXT: xvld $xr0, $sp, 80 # 32-byte Folded Reload -; LA32-NEXT: xvpickve.w $xr0, $xr0, 4 -; LA32-NEXT: # kill: def $f0 killed $f0 killed $xr0 -; LA32-NEXT: bl log2f -; LA32-NEXT: # kill: def $f0 killed $f0 def $xr0 -; LA32-NEXT: vld $vr1, $sp, 48 # 16-byte Folded Reload -; LA32-NEXT: vextrins.w $vr0, $vr1, 16 -; LA32-NEXT: xvst $xr0, $sp, 48 # 32-byte Folded Spill -; LA32-NEXT: xvld $xr0, $sp, 80 # 32-byte Folded Reload -; LA32-NEXT: xvpickve.w $xr0, $xr0, 6 -; LA32-NEXT: # kill: def $f0 killed $f0 killed $xr0 -; LA32-NEXT: bl log2f -; LA32-NEXT: # kill: def $f0 killed $f0 def $vr0 -; LA32-NEXT: xvld $xr1, $sp, 48 # 32-byte Folded Reload -; LA32-NEXT: vextrins.w $vr1, $vr0, 32 -; LA32-NEXT: xvst $xr1, $sp, 48 # 32-byte Folded Spill -; LA32-NEXT: xvld $xr0, $sp, 80 # 32-byte Folded Reload -; LA32-NEXT: xvpickve.w $xr0, $xr0, 7 -; LA32-NEXT: # kill: def $f0 killed $f0 killed $xr0 -; LA32-NEXT: bl log2f -; LA32-NEXT: # kill: def $f0 killed $f0 def $vr0 -; LA32-NEXT: xvld $xr1, $sp, 48 # 32-byte Folded Reload -; LA32-NEXT: vextrins.w $vr1, $vr0, 48 -; LA32-NEXT: xvst $xr1, $sp, 48 # 32-byte Folded Spill -; LA32-NEXT: xvld $xr0, $sp, 80 # 32-byte Folded Reload -; LA32-NEXT: xvpickve.w $xr0, $xr0, 1 -; LA32-NEXT: # kill: def $f0 killed $f0 killed $xr0 -; LA32-NEXT: bl log2f -; LA32-NEXT: # kill: def $f0 killed $f0 def $vr0 -; LA32-NEXT: vst $vr0, $sp, 16 # 16-byte Folded Spill -; LA32-NEXT: xvld $xr0, $sp, 80 # 32-byte Folded Reload -; LA32-NEXT: xvpickve.w $xr0, $xr0, 0 -; LA32-NEXT: # kill: def $f0 killed $f0 killed $xr0 -; LA32-NEXT: bl log2f -; LA32-NEXT: # kill: def $f0 killed $f0 def $xr0 -; LA32-NEXT: vld $vr1, $sp, 16 # 16-byte Folded Reload -; LA32-NEXT: vextrins.w $vr0, $vr1, 16 -; LA32-NEXT: xvst $xr0, $sp, 16 # 32-byte Folded Spill -; LA32-NEXT: xvld $xr0, $sp, 80 # 32-byte Folded Reload -; LA32-NEXT: xvpickve.w $xr0, $xr0, 2 -; LA32-NEXT: # kill: def $f0 killed $f0 killed $xr0 -; LA32-NEXT: bl log2f -; LA32-NEXT: # kill: def $f0 killed $f0 def $vr0 -; LA32-NEXT: xvld $xr1, $sp, 16 # 32-byte Folded Reload -; LA32-NEXT: vextrins.w $vr1, $vr0, 32 -; LA32-NEXT: xvst $xr1, $sp, 16 # 32-byte Folded Spill -; LA32-NEXT: xvld $xr0, $sp, 80 # 32-byte Folded Reload -; LA32-NEXT: xvpickve.w $xr0, $xr0, 3 -; LA32-NEXT: # kill: def $f0 killed $f0 killed $xr0 -; LA32-NEXT: bl log2f -; LA32-NEXT: # kill: def $f0 killed $f0 def $vr0 -; LA32-NEXT: xvld $xr1, $sp, 16 # 32-byte Folded Reload -; LA32-NEXT: vextrins.w $vr1, $vr0, 48 -; LA32-NEXT: xvld $xr0, $sp, 48 # 32-byte Folded Reload -; LA32-NEXT: xvpermi.q $xr1, $xr0, 2 -; LA32-NEXT: xvst $xr1, $fp, 0 -; LA32-NEXT: ld.w $fp, $sp, 120 # 4-byte Folded Reload -; LA32-NEXT: ld.w $ra, $sp, 124 # 4-byte Folded Reload -; LA32-NEXT: addi.w $sp, $sp, 128 -; LA32-NEXT: ret -; -; LA64-LABEL: flog2_v8f32: -; LA64: # %bb.0: # %entry -; LA64-NEXT: addi.d $sp, $sp, -128 -; LA64-NEXT: st.d $ra, $sp, 120 # 8-byte Folded Spill -; LA64-NEXT: st.d $fp, $sp, 112 # 8-byte Folded Spill -; LA64-NEXT: xvld $xr0, $a1, 0 -; LA64-NEXT: xvst $xr0, $sp, 80 # 32-byte Folded Spill -; LA64-NEXT: move $fp, $a0 -; LA64-NEXT: xvpickve.w $xr0, $xr0, 5 -; LA64-NEXT: # kill: def $f0 killed $f0 killed $xr0 -; LA64-NEXT: pcaddu18i $ra, %call36(log2f) -; LA64-NEXT: jirl $ra, $ra, 0 -; LA64-NEXT: # kill: def $f0 killed $f0 def $vr0 -; LA64-NEXT: vst $vr0, $sp, 48 # 16-byte Folded Spill -; LA64-NEXT: xvld $xr0, $sp, 80 # 32-byte Folded Reload -; LA64-NEXT: xvpickve.w $xr0, $xr0, 4 -; LA64-NEXT: # kill: def $f0 killed $f0 killed $xr0 -; LA64-NEXT: pcaddu18i $ra, %call36(log2f) -; LA64-NEXT: jirl $ra, $ra, 0 -; LA64-NEXT: # kill: def $f0 killed $f0 def $xr0 -; LA64-NEXT: vld $vr1, $sp, 48 # 16-byte Folded Reload -; LA64-NEXT: vextrins.w $vr0, $vr1, 16 -; LA64-NEXT: xvst $xr0, $sp, 48 # 32-byte Folded Spill -; LA64-NEXT: xvld $xr0, $sp, 80 # 32-byte Folded Reload -; LA64-NEXT: xvpickve.w $xr0, $xr0, 6 -; LA64-NEXT: # kill: def $f0 killed $f0 killed $xr0 -; LA64-NEXT: pcaddu18i $ra, %call36(log2f) -; LA64-NEXT: jirl $ra, $ra, 0 -; LA64-NEXT: # kill: def $f0 killed $f0 def $vr0 -; LA64-NEXT: xvld $xr1, $sp, 48 # 32-byte Folded Reload -; LA64-NEXT: vextrins.w $vr1, $vr0, 32 -; LA64-NEXT: xvst $xr1, $sp, 48 # 32-byte Folded Spill -; LA64-NEXT: xvld $xr0, $sp, 80 # 32-byte Folded Reload -; LA64-NEXT: xvpickve.w $xr0, $xr0, 7 -; LA64-NEXT: # kill: def $f0 killed $f0 killed $xr0 -; LA64-NEXT: pcaddu18i $ra, %call36(log2f) -; LA64-NEXT: jirl $ra, $ra, 0 -; LA64-NEXT: # kill: def $f0 killed $f0 def $vr0 -; LA64-NEXT: xvld $xr1, $sp, 48 # 32-byte Folded Reload -; LA64-NEXT: vextrins.w $vr1, $vr0, 48 -; LA64-NEXT: xvst $xr1, $sp, 48 # 32-byte Folded Spill -; LA64-NEXT: xvld $xr0, $sp, 80 # 32-byte Folded Reload -; LA64-NEXT: xvpickve.w $xr0, $xr0, 1 -; LA64-NEXT: # kill: def $f0 killed $f0 killed $xr0 -; LA64-NEXT: pcaddu18i $ra, %call36(log2f) -; LA64-NEXT: jirl $ra, $ra, 0 -; LA64-NEXT: # kill: def $f0 killed $f0 def $vr0 -; LA64-NEXT: vst $vr0, $sp, 16 # 16-byte Folded Spill -; LA64-NEXT: xvld $xr0, $sp, 80 # 32-byte Folded Reload -; LA64-NEXT: xvpickve.w $xr0, $xr0, 0 -; LA64-NEXT: # kill: def $f0 killed $f0 killed $xr0 -; LA64-NEXT: pcaddu18i $ra, %call36(log2f) -; LA64-NEXT: jirl $ra, $ra, 0 -; LA64-NEXT: # kill: def $f0 killed $f0 def $xr0 -; LA64-NEXT: vld $vr1, $sp, 16 # 16-byte Folded Reload -; LA64-NEXT: vextrins.w $vr0, $vr1, 16 -; LA64-NEXT: xvst $xr0, $sp, 16 # 32-byte Folded Spill -; LA64-NEXT: xvld $xr0, $sp, 80 # 32-byte Folded Reload -; LA64-NEXT: xvpickve.w $xr0, $xr0, 2 -; LA64-NEXT: # kill: def $f0 killed $f0 killed $xr0 -; LA64-NEXT: pcaddu18i $ra, %call36(log2f) -; LA64-NEXT: jirl $ra, $ra, 0 -; LA64-NEXT: # kill: def $f0 killed $f0 def $vr0 -; LA64-NEXT: xvld $xr1, $sp, 16 # 32-byte Folded Reload -; LA64-NEXT: vextrins.w $vr1, $vr0, 32 -; LA64-NEXT: xvst $xr1, $sp, 16 # 32-byte Folded Spill -; LA64-NEXT: xvld $xr0, $sp, 80 # 32-byte Folded Reload -; LA64-NEXT: xvpickve.w $xr0, $xr0, 3 -; LA64-NEXT: # kill: def $f0 killed $f0 killed $xr0 -; LA64-NEXT: pcaddu18i $ra, %call36(log2f) -; LA64-NEXT: jirl $ra, $ra, 0 -; LA64-NEXT: # kill: def $f0 killed $f0 def $vr0 -; LA64-NEXT: xvld $xr1, $sp, 16 # 32-byte Folded Reload -; LA64-NEXT: vextrins.w $vr1, $vr0, 48 -; LA64-NEXT: xvld $xr0, $sp, 48 # 32-byte Folded Reload -; LA64-NEXT: xvpermi.q $xr1, $xr0, 2 -; LA64-NEXT: xvst $xr1, $fp, 0 -; LA64-NEXT: ld.d $fp, $sp, 112 # 8-byte Folded Reload -; LA64-NEXT: ld.d $ra, $sp, 120 # 8-byte Folded Reload -; LA64-NEXT: addi.d $sp, $sp, 128 -; LA64-NEXT: ret +; CHECK-LABEL: flog2_v8f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvld $xr0, $a1, 0 +; CHECK-NEXT: xvflogb.s $xr0, $xr0 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret entry: %v = load <8 x float>, ptr %a %r = call <8 x float> @llvm.log2.v8f32(<8 x float> %v) @@ -169,93 +20,12 @@ entry: } define void @flog2_v4f64(ptr %res, ptr %a) nounwind { -; LA32-LABEL: flog2_v4f64: -; LA32: # %bb.0: # %entry -; LA32-NEXT: addi.w $sp, $sp, -112 -; LA32-NEXT: st.w $ra, $sp, 108 # 4-byte Folded Spill -; LA32-NEXT: st.w $fp, $sp, 104 # 4-byte Folded Spill -; LA32-NEXT: xvld $xr0, $a1, 0 -; LA32-NEXT: xvst $xr0, $sp, 64 # 32-byte Folded Spill -; LA32-NEXT: move $fp, $a0 -; LA32-NEXT: xvpickve.d $xr0, $xr0, 3 -; LA32-NEXT: # kill: def $f0_64 killed $f0_64 killed $xr0 -; LA32-NEXT: bl log2 -; LA32-NEXT: # kill: def $f0_64 killed $f0_64 def $vr0 -; LA32-NEXT: vst $vr0, $sp, 32 # 16-byte Folded Spill -; LA32-NEXT: xvld $xr0, $sp, 64 # 32-byte Folded Reload -; LA32-NEXT: xvpickve.d $xr0, $xr0, 2 -; LA32-NEXT: # kill: def $f0_64 killed $f0_64 killed $xr0 -; LA32-NEXT: bl log2 -; LA32-NEXT: # kill: def $f0_64 killed $f0_64 def $xr0 -; LA32-NEXT: vld $vr1, $sp, 32 # 16-byte Folded Reload -; LA32-NEXT: vextrins.d $vr0, $vr1, 16 -; LA32-NEXT: xvst $xr0, $sp, 32 # 32-byte Folded Spill -; LA32-NEXT: xvld $xr0, $sp, 64 # 32-byte Folded Reload -; LA32-NEXT: xvpickve.d $xr0, $xr0, 1 -; LA32-NEXT: # kill: def $f0_64 killed $f0_64 killed $xr0 -; LA32-NEXT: bl log2 -; LA32-NEXT: # kill: def $f0_64 killed $f0_64 def $vr0 -; LA32-NEXT: vst $vr0, $sp, 16 # 16-byte Folded Spill -; LA32-NEXT: xvld $xr0, $sp, 64 # 32-byte Folded Reload -; LA32-NEXT: xvpickve.d $xr0, $xr0, 0 -; LA32-NEXT: # kill: def $f0_64 killed $f0_64 killed $xr0 -; LA32-NEXT: bl log2 -; LA32-NEXT: # kill: def $f0_64 killed $f0_64 def $xr0 -; LA32-NEXT: vld $vr1, $sp, 16 # 16-byte Folded Reload -; LA32-NEXT: vextrins.d $vr0, $vr1, 16 -; LA32-NEXT: xvld $xr1, $sp, 32 # 32-byte Folded Reload -; LA32-NEXT: xvpermi.q $xr0, $xr1, 2 -; LA32-NEXT: xvst $xr0, $fp, 0 -; LA32-NEXT: ld.w $fp, $sp, 104 # 4-byte Folded Reload -; LA32-NEXT: ld.w $ra, $sp, 108 # 4-byte Folded Reload -; LA32-NEXT: addi.w $sp, $sp, 112 -; LA32-NEXT: ret -; -; LA64-LABEL: flog2_v4f64: -; LA64: # %bb.0: # %entry -; LA64-NEXT: addi.d $sp, $sp, -112 -; LA64-NEXT: st.d $ra, $sp, 104 # 8-byte Folded Spill -; LA64-NEXT: st.d $fp, $sp, 96 # 8-byte Folded Spill -; LA64-NEXT: xvld $xr0, $a1, 0 -; LA64-NEXT: xvst $xr0, $sp, 64 # 32-byte Folded Spill -; LA64-NEXT: move $fp, $a0 -; LA64-NEXT: xvpickve.d $xr0, $xr0, 3 -; LA64-NEXT: # kill: def $f0_64 killed $f0_64 killed $xr0 -; LA64-NEXT: pcaddu18i $ra, %call36(log2) -; LA64-NEXT: jirl $ra, $ra, 0 -; LA64-NEXT: # kill: def $f0_64 killed $f0_64 def $vr0 -; LA64-NEXT: vst $vr0, $sp, 32 # 16-byte Folded Spill -; LA64-NEXT: xvld $xr0, $sp, 64 # 32-byte Folded Reload -; LA64-NEXT: xvpickve.d $xr0, $xr0, 2 -; LA64-NEXT: # kill: def $f0_64 killed $f0_64 killed $xr0 -; LA64-NEXT: pcaddu18i $ra, %call36(log2) -; LA64-NEXT: jirl $ra, $ra, 0 -; LA64-NEXT: # kill: def $f0_64 killed $f0_64 def $xr0 -; LA64-NEXT: vld $vr1, $sp, 32 # 16-byte Folded Reload -; LA64-NEXT: vextrins.d $vr0, $vr1, 16 -; LA64-NEXT: xvst $xr0, $sp, 32 # 32-byte Folded Spill -; LA64-NEXT: xvld $xr0, $sp, 64 # 32-byte Folded Reload -; LA64-NEXT: xvpickve.d $xr0, $xr0, 1 -; LA64-NEXT: # kill: def $f0_64 killed $f0_64 killed $xr0 -; LA64-NEXT: pcaddu18i $ra, %call36(log2) -; LA64-NEXT: jirl $ra, $ra, 0 -; LA64-NEXT: # kill: def $f0_64 killed $f0_64 def $vr0 -; LA64-NEXT: vst $vr0, $sp, 16 # 16-byte Folded Spill -; LA64-NEXT: xvld $xr0, $sp, 64 # 32-byte Folded Reload -; LA64-NEXT: xvpickve.d $xr0, $xr0, 0 -; LA64-NEXT: # kill: def $f0_64 killed $f0_64 killed $xr0 -; LA64-NEXT: pcaddu18i $ra, %call36(log2) -; LA64-NEXT: jirl $ra, $ra, 0 -; LA64-NEXT: # kill: def $f0_64 killed $f0_64 def $xr0 -; LA64-NEXT: vld $vr1, $sp, 16 # 16-byte Folded Reload -; LA64-NEXT: vextrins.d $vr0, $vr1, 16 -; LA64-NEXT: xvld $xr1, $sp, 32 # 32-byte Folded Reload -; LA64-NEXT: xvpermi.q $xr0, $xr1, 2 -; LA64-NEXT: xvst $xr0, $fp, 0 -; LA64-NEXT: ld.d $fp, $sp, 96 # 8-byte Folded Reload -; LA64-NEXT: ld.d $ra, $sp, 104 # 8-byte Folded Reload -; LA64-NEXT: addi.d $sp, $sp, 112 -; LA64-NEXT: ret +; CHECK-LABEL: flog2_v4f64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvld $xr0, $a1, 0 +; CHECK-NEXT: xvflogb.d $xr0, $xr0 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret entry: %v = load <4 x double>, ptr %a %r = call <4 x double> @llvm.log2.v4f64(<4 x double> %v) diff --git a/llvm/test/CodeGen/LoongArch/lasx/rotl-rotr.ll b/llvm/test/CodeGen/LoongArch/lasx/rotl-rotr.ll new file mode 100644 index 0000000..6b8ab2c --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/rotl-rotr.ll @@ -0,0 +1,248 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6 +; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lasx < %s | FileCheck %s --check-prefixes=CHECK,LA32 +; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s --check-prefixes=CHECK,LA64 + +define void @rotl_v32i8(ptr %dst, ptr %src, i8 signext %a0) nounwind { +; CHECK-LABEL: rotl_v32i8: +; CHECK: # %bb.0: +; CHECK-NEXT: xvld $xr0, $a1, 0 +; CHECK-NEXT: xvreplgr2vr.b $xr1, $a2 +; CHECK-NEXT: xvneg.b $xr1, $xr1 +; CHECK-NEXT: xvrotr.b $xr0, $xr0, $xr1 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <32 x i8>, ptr %src + %v1.ele = insertelement <32 x i8> poison, i8 %a0, i8 0 + %v1 = shufflevector <32 x i8> %v1.ele, <32 x i8> poison, <32 x i32> zeroinitializer + %v1.sub = sub <32 x i8> splat (i8 8), %v1 + %b = shl <32 x i8> %v0, %v1 + %c = lshr <32 x i8> %v0, %v1.sub + %d = or <32 x i8> %b, %c + store <32 x i8> %d, ptr %dst + ret void +} + +define void @rotr_v32i8(ptr %dst, ptr %src, i8 signext %a0) nounwind { +; CHECK-LABEL: rotr_v32i8: +; CHECK: # %bb.0: +; CHECK-NEXT: xvld $xr0, $a1, 0 +; CHECK-NEXT: xvreplgr2vr.b $xr1, $a2 +; CHECK-NEXT: xvrotr.b $xr0, $xr0, $xr1 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <32 x i8>, ptr %src + %v1.ele = insertelement <32 x i8> poison, i8 %a0, i8 0 + %v1 = shufflevector <32 x i8> %v1.ele, <32 x i8> poison, <32 x i32> zeroinitializer + %v1.sub = sub <32 x i8> splat (i8 8), %v1 + %b = lshr <32 x i8> %v0, %v1 + %c = shl <32 x i8> %v0, %v1.sub + %d = or <32 x i8> %b, %c + store <32 x i8> %d, ptr %dst + ret void +} + +define void @rotr_v32i8_imm(ptr %dst, ptr %src) nounwind { +; CHECK-LABEL: rotr_v32i8_imm: +; CHECK: # %bb.0: +; CHECK-NEXT: xvld $xr0, $a1, 0 +; CHECK-NEXT: xvrotri.b $xr0, $xr0, 2 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <32 x i8>, ptr %src + %b = lshr <32 x i8> %v0, splat (i8 2) + %c = shl <32 x i8> %v0, splat (i8 6) + %d = or <32 x i8> %b, %c + store <32 x i8> %d, ptr %dst + ret void +} + +define void @rotl_v16i16(ptr %dst, ptr %src, i16 signext %a0) nounwind { +; CHECK-LABEL: rotl_v16i16: +; CHECK: # %bb.0: +; CHECK-NEXT: xvld $xr0, $a1, 0 +; CHECK-NEXT: xvreplgr2vr.h $xr1, $a2 +; CHECK-NEXT: xvneg.h $xr1, $xr1 +; CHECK-NEXT: xvrotr.h $xr0, $xr0, $xr1 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <16 x i16>, ptr %src + %v1.ele = insertelement <16 x i16> poison, i16 %a0, i16 0 + %v1 = shufflevector <16 x i16> %v1.ele, <16 x i16> poison, <16 x i32> zeroinitializer + %v1.sub = sub <16 x i16> splat (i16 16), %v1 + %b = shl <16 x i16> %v0, %v1 + %c = lshr <16 x i16> %v0, %v1.sub + %d = or <16 x i16> %b, %c + store <16 x i16> %d, ptr %dst + ret void +} + +define void @rotr_v16i16(ptr %dst, ptr %src, i16 signext %a0) nounwind { +; CHECK-LABEL: rotr_v16i16: +; CHECK: # %bb.0: +; CHECK-NEXT: xvld $xr0, $a1, 0 +; CHECK-NEXT: xvreplgr2vr.h $xr1, $a2 +; CHECK-NEXT: xvrotr.h $xr0, $xr0, $xr1 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <16 x i16>, ptr %src + %v1.ele = insertelement <16 x i16> poison, i16 %a0, i16 0 + %v1 = shufflevector <16 x i16> %v1.ele, <16 x i16> poison, <16 x i32> zeroinitializer + %v1.sub = sub <16 x i16> splat (i16 16), %v1 + %b = lshr <16 x i16> %v0, %v1 + %c = shl <16 x i16> %v0, %v1.sub + %d = or <16 x i16> %b, %c + store <16 x i16> %d, ptr %dst + ret void +} + +define void @rotr_v16i16_imm(ptr %dst, ptr %src) nounwind { +; CHECK-LABEL: rotr_v16i16_imm: +; CHECK: # %bb.0: +; CHECK-NEXT: xvld $xr0, $a1, 0 +; CHECK-NEXT: xvrotri.h $xr0, $xr0, 2 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <16 x i16>, ptr %src + %b = lshr <16 x i16> %v0, splat (i16 2) + %c = shl <16 x i16> %v0, splat (i16 14) + %d = or <16 x i16> %b, %c + store <16 x i16> %d, ptr %dst + ret void +} + +define void @rotl_v8i32(ptr %dst, ptr %src, i32 signext %a0) nounwind { +; CHECK-LABEL: rotl_v8i32: +; CHECK: # %bb.0: +; CHECK-NEXT: xvld $xr0, $a1, 0 +; CHECK-NEXT: xvreplgr2vr.w $xr1, $a2 +; CHECK-NEXT: xvneg.w $xr1, $xr1 +; CHECK-NEXT: xvrotr.w $xr0, $xr0, $xr1 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <8 x i32>, ptr %src + %v1.ele = insertelement <8 x i32> poison, i32 %a0, i32 0 + %v1 = shufflevector <8 x i32> %v1.ele, <8 x i32> poison, <8 x i32> zeroinitializer + %v1.sub = sub <8 x i32> splat (i32 32), %v1 + %b = shl <8 x i32> %v0, %v1 + %c = lshr <8 x i32> %v0, %v1.sub + %d = or <8 x i32> %b, %c + store <8 x i32> %d, ptr %dst + ret void +} + +define void @rotr_v8i32(ptr %dst, ptr %src, i32 signext %a0) nounwind { +; CHECK-LABEL: rotr_v8i32: +; CHECK: # %bb.0: +; CHECK-NEXT: xvld $xr0, $a1, 0 +; CHECK-NEXT: xvreplgr2vr.w $xr1, $a2 +; CHECK-NEXT: xvrotr.w $xr0, $xr0, $xr1 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <8 x i32>, ptr %src + %v1.ele = insertelement <8 x i32> poison, i32 %a0, i32 0 + %v1 = shufflevector <8 x i32> %v1.ele, <8 x i32> poison, <8 x i32> zeroinitializer + %v1.sub = sub <8 x i32> splat (i32 32), %v1 + %b = lshr <8 x i32> %v0, %v1 + %c = shl <8 x i32> %v0, %v1.sub + %d = or <8 x i32> %b, %c + store <8 x i32> %d, ptr %dst + ret void +} + +define void @rotr_v8i32_imm(ptr %dst, ptr %src) nounwind { +; CHECK-LABEL: rotr_v8i32_imm: +; CHECK: # %bb.0: +; CHECK-NEXT: xvld $xr0, $a1, 0 +; CHECK-NEXT: xvrotri.w $xr0, $xr0, 2 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <8 x i32>, ptr %src + %b = lshr <8 x i32> %v0, splat (i32 2) + %c = shl <8 x i32> %v0, splat (i32 30) + %d = or <8 x i32> %b, %c + store <8 x i32> %d, ptr %dst + ret void +} + +define void @rotl_v4i64(ptr %dst, ptr %src, i64 %a0) nounwind { +; LA32-LABEL: rotl_v4i64: +; LA32: # %bb.0: +; LA32-NEXT: xvld $xr0, $a1, 0 +; LA32-NEXT: vinsgr2vr.w $vr1, $a2, 0 +; LA32-NEXT: vinsgr2vr.w $vr1, $a2, 2 +; LA32-NEXT: xvpermi.q $xr1, $xr1, 2 +; LA32-NEXT: xvneg.d $xr1, $xr1 +; LA32-NEXT: xvrotr.d $xr0, $xr0, $xr1 +; LA32-NEXT: xvst $xr0, $a0, 0 +; LA32-NEXT: ret +; +; LA64-LABEL: rotl_v4i64: +; LA64: # %bb.0: +; LA64-NEXT: xvld $xr0, $a1, 0 +; LA64-NEXT: xvreplgr2vr.d $xr1, $a2 +; LA64-NEXT: xvneg.d $xr1, $xr1 +; LA64-NEXT: xvrotr.d $xr0, $xr0, $xr1 +; LA64-NEXT: xvst $xr0, $a0, 0 +; LA64-NEXT: ret + %v0 = load <4 x i64>, ptr %src + %v1.ele = insertelement <4 x i64> poison, i64 %a0, i64 0 + %v1 = shufflevector <4 x i64> %v1.ele, <4 x i64> poison, <4 x i32> zeroinitializer + %v1.sub = sub <4 x i64> splat (i64 64), %v1 + %b = shl <4 x i64> %v0, %v1 + %c = lshr <4 x i64> %v0, %v1.sub + %d = or <4 x i64> %b, %c + store <4 x i64> %d, ptr %dst + ret void +} + +define void @rotr_v4i64(ptr %dst, ptr %src, i64 %a0) nounwind { +; LA32-LABEL: rotr_v4i64: +; LA32: # %bb.0: +; LA32-NEXT: xvld $xr0, $a1, 0 +; LA32-NEXT: vinsgr2vr.w $vr1, $a2, 0 +; LA32-NEXT: vinsgr2vr.w $vr1, $a2, 2 +; LA32-NEXT: xvpermi.q $xr1, $xr1, 2 +; LA32-NEXT: xvrotr.d $xr0, $xr0, $xr1 +; LA32-NEXT: xvst $xr0, $a0, 0 +; LA32-NEXT: ret +; +; LA64-LABEL: rotr_v4i64: +; LA64: # %bb.0: +; LA64-NEXT: xvld $xr0, $a1, 0 +; LA64-NEXT: xvreplgr2vr.d $xr1, $a2 +; LA64-NEXT: xvrotr.d $xr0, $xr0, $xr1 +; LA64-NEXT: xvst $xr0, $a0, 0 +; LA64-NEXT: ret + %v0 = load <4 x i64>, ptr %src + %v1.ele = insertelement <4 x i64> poison, i64 %a0, i64 0 + %v1 = shufflevector <4 x i64> %v1.ele, <4 x i64> poison, <4 x i32> zeroinitializer + %v1.sub = sub <4 x i64> splat (i64 64), %v1 + %b = lshr <4 x i64> %v0, %v1 + %c = shl <4 x i64> %v0, %v1.sub + %d = or <4 x i64> %b, %c + store <4 x i64> %d, ptr %dst + ret void +} + +define void @rotr_v4i64_imm(ptr %dst, ptr %src) nounwind { +; LA32-LABEL: rotr_v4i64_imm: +; LA32: # %bb.0: +; LA32-NEXT: xvld $xr0, $a1, 0 +; LA32-NEXT: xvrepli.w $xr1, -62 +; LA32-NEXT: xvrotr.d $xr0, $xr0, $xr1 +; LA32-NEXT: xvst $xr0, $a0, 0 +; LA32-NEXT: ret +; +; LA64-LABEL: rotr_v4i64_imm: +; LA64: # %bb.0: +; LA64-NEXT: xvld $xr0, $a1, 0 +; LA64-NEXT: xvrotri.d $xr0, $xr0, 2 +; LA64-NEXT: xvst $xr0, $a0, 0 +; LA64-NEXT: ret + %v0 = load <4 x i64>, ptr %src + %b = lshr <4 x i64> %v0, splat (i64 2) + %c = shl <4 x i64> %v0, splat (i64 62) + %d = or <4 x i64> %b, %c + store <4 x i64> %d, ptr %dst + ret void +} diff --git a/llvm/test/CodeGen/LoongArch/lasx/shuffle-as-bswap.ll b/llvm/test/CodeGen/LoongArch/lasx/shuffle-as-bswap.ll new file mode 100644 index 0000000..1c9038a --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/shuffle-as-bswap.ll @@ -0,0 +1,47 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6 +; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lasx < %s | FileCheck %s +; RUN: llc --mtriple=loongarch64 --mattr=+lasx %s -o - | FileCheck %s + +define void @shufflevector_bswap_h(ptr %res, ptr %a) nounwind { +; CHECK-LABEL: shufflevector_bswap_h: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvld $xr0, $a1, 0 +; CHECK-NEXT: xvshuf4i.b $xr0, $xr0, 177 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %va = load <32 x i8>, ptr %a + %b = shufflevector <32 x i8> %va, <32 x i8> poison, <32 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6, i32 9, i32 8, i32 11, i32 10, i32 13, i32 12, i32 15, i32 14, i32 17, i32 16, i32 19, i32 18, i32 21, i32 20, i32 23, i32 22, i32 25, i32 24, i32 27, i32 26, i32 29, i32 28, i32 31, i32 30> + store <32 x i8> %b, ptr %res + ret void +} + +define void @shufflevector_bswap_w(ptr %res, ptr %a) nounwind { +; CHECK-LABEL: shufflevector_bswap_w: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvld $xr0, $a1, 0 +; CHECK-NEXT: xvshuf4i.b $xr0, $xr0, 27 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %va = load <32 x i8>, ptr %a + %b = shufflevector <32 x i8> %va, <32 x i8> poison, <32 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4, i32 11, i32 10, i32 9, i32 8, i32 15, i32 14, i32 13, i32 12, i32 19, i32 18, i32 17, i32 16, i32 23, i32 22, i32 21, i32 20, i32 27, i32 26, i32 25, i32 24, i32 31, i32 30, i32 29, i32 28> + store <32 x i8> %b, ptr %res + ret void +} + +define void @shufflevector_bswap_d(ptr %res, ptr %a) nounwind { +; CHECK-LABEL: shufflevector_bswap_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvld $xr0, $a1, 0 +; CHECK-NEXT: pcalau12i $a1, %pc_hi20(.LCPI2_0) +; CHECK-NEXT: xvld $xr1, $a1, %pc_lo12(.LCPI2_0) +; CHECK-NEXT: xvshuf.b $xr0, $xr0, $xr0, $xr1 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %va = load <32 x i8>, ptr %a + %b = shufflevector <32 x i8> %va, <32 x i8> poison, <32 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 23, i32 22, i32 21, i32 20, i32 19, i32 18, i32 17, i32 16, i32 31, i32 30, i32 29, i32 28, i32 27, i32 26, i32 25, i32 24> + store <32 x i8> %b, ptr %res + ret void +} |
