diff options
author | WANG Rui <wangrui@loongson.cn> | 2025-09-10 16:12:22 +0800 |
---|---|---|
committer | WANG Rui <wangrui@loongson.cn> | 2025-09-18 12:18:35 +0800 |
commit | aceb3189bf4384a6c34f3f89eac0a09dbe582e2f (patch) | |
tree | c81553faaeadfb8e56dfd5a02eb9181a47212cc1 | |
parent | 39c8df3d0160ef26f239b1ebb49da63e36963308 (diff) | |
download | llvm-users/hev/llvm-lasx-lsx-conversion.zip llvm-users/hev/llvm-lasx-lsx-conversion.tar.gz llvm-users/hev/llvm-lasx-lsx-conversion.tar.bz2 |
[llvm][LoongArch] Introduce LASX and LSX conversion intrinsicsusers/hev/llvm-lasx-lsx-conversion
This patch introduces the LASX and LSX conversion intrinsics:
- <8 x float> @llvm.loongarch.lasx.cast.128.s(<4 x float>)
- <4 x double> @llvm.loongarch.lasx.cast.128.d(<2 x double>)
- <4 x i64> @llvm.loongarch.lasx.cast.128(<2 x i64>)
- <8 x float> @llvm.loongarch.lasx.concat.128.s(<4 x float>, <4 x float>)
- <4 x double> @llvm.loongarch.lasx.concat.128.d(<2 x double>, <2 x double>)
- <4 x i64> @llvm.loongarch.lasx.concat.128(<2 x i64>, <2 x i64>)
- <4 x float> @llvm.loongarch.lasx.extract.128.lo.s(<8 x float>)
- <2 x double> @llvm.loongarch.lasx.extract.128.lo.d(<4 x double>)
- <2 x i64> @llvm.loongarch.lasx.extract.128.lo(<4 x i64>)
- <4 x float> @llvm.loongarch.lasx.extract.128.hi.s(<8 x float>)
- <2 x double> @llvm.loongarch.lasx.extract.128.hi.d(<4 x double>)
- <2 x i64> @llvm.loongarch.lasx.extract.128.hi(<4 x i64>)
- <8 x float> @llvm.loongarch.lasx.insert.128.lo.s(<8 x float>, <4 x float>)
- <4 x double> @llvm.loongarch.lasx.insert.128.lo.d(<4 x double>, <2 x double>)
- <4 x i64> @llvm.loongarch.lasx.insert.128.lo(<4 x i64>, <2 x i64>)
- <8 x float> @llvm.loongarch.lasx.insert.128.hi.s(<8 x float>, <4 x float>)
- <4 x double> @llvm.loongarch.lasx.insert.128.hi.d(<4 x double>, <2 x double>)
- <4 x i64> @llvm.loongarch.lasx.insert.128.hi(<4 x i64>, <2 x i64>)
-rw-r--r-- | llvm/include/llvm/IR/IntrinsicsLoongArch.td | 38 | ||||
-rw-r--r-- | llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp | 5 | ||||
-rw-r--r-- | llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td | 31 | ||||
-rw-r--r-- | llvm/test/CodeGen/LoongArch/lasx/intrinsic-conversion.ll | 234 |
4 files changed, 308 insertions, 0 deletions
diff --git a/llvm/include/llvm/IR/IntrinsicsLoongArch.td b/llvm/include/llvm/IR/IntrinsicsLoongArch.td index 84026aa..1c46965 100644 --- a/llvm/include/llvm/IR/IntrinsicsLoongArch.td +++ b/llvm/include/llvm/IR/IntrinsicsLoongArch.td @@ -1192,4 +1192,42 @@ def int_loongarch_lasx_xvstelm_w def int_loongarch_lasx_xvstelm_d : VecInt<[], [llvm_v4i64_ty, llvm_ptr_ty, llvm_i32_ty, llvm_i32_ty], [IntrWriteMem, IntrArgMemOnly, ImmArg<ArgIndex<2>>, ImmArg<ArgIndex<3>>]>; + +// LASX and LSX conversion +def int_loongarch_lasx_cast_128_s + : VecInt<[llvm_v8f32_ty], [llvm_v4f32_ty], [IntrNoMem]>; +def int_loongarch_lasx_cast_128_d + : VecInt<[llvm_v4f64_ty], [llvm_v2f64_ty], [IntrNoMem]>; +def int_loongarch_lasx_cast_128 + : VecInt<[llvm_v4i64_ty], [llvm_v2i64_ty], [IntrNoMem]>; +def int_loongarch_lasx_concat_128_s + : VecInt<[llvm_v8f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty], [IntrNoMem]>; +def int_loongarch_lasx_concat_128_d + : VecInt<[llvm_v4f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty], [IntrNoMem]>; +def int_loongarch_lasx_concat_128 + : VecInt<[llvm_v4i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>; +def int_loongarch_lasx_extract_128_lo_s + : VecInt<[llvm_v4f32_ty], [llvm_v8f32_ty], [IntrNoMem]>; +def int_loongarch_lasx_extract_128_lo_d + : VecInt<[llvm_v2f64_ty], [llvm_v4f64_ty], [IntrNoMem]>; +def int_loongarch_lasx_extract_128_lo + : VecInt<[llvm_v2i64_ty], [llvm_v4i64_ty], [IntrNoMem]>; +def int_loongarch_lasx_extract_128_hi_s + : VecInt<[llvm_v4f32_ty], [llvm_v8f32_ty], [IntrNoMem]>; +def int_loongarch_lasx_extract_128_hi_d + : VecInt<[llvm_v2f64_ty], [llvm_v4f64_ty], [IntrNoMem]>; +def int_loongarch_lasx_extract_128_hi + : VecInt<[llvm_v2i64_ty], [llvm_v4i64_ty], [IntrNoMem]>; +def int_loongarch_lasx_insert_128_lo_s + : VecInt<[llvm_v8f32_ty], [llvm_v8f32_ty, llvm_v4f32_ty], [IntrNoMem]>; +def int_loongarch_lasx_insert_128_lo_d + : VecInt<[llvm_v4f64_ty], [llvm_v4f64_ty, llvm_v2f64_ty], [IntrNoMem]>; +def int_loongarch_lasx_insert_128_lo + : VecInt<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v2i64_ty], [IntrNoMem]>; +def int_loongarch_lasx_insert_128_hi_s + : VecInt<[llvm_v8f32_ty], [llvm_v8f32_ty, llvm_v4f32_ty], [IntrNoMem]>; +def int_loongarch_lasx_insert_128_hi_d + : VecInt<[llvm_v4f64_ty], [llvm_v4f64_ty, llvm_v2f64_ty], [IntrNoMem]>; +def int_loongarch_lasx_insert_128_hi + : VecInt<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v2i64_ty], [IntrNoMem]>; } // TargetPrefix = "loongarch" diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp index 634914d..1ae5fc8 100644 --- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp +++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp @@ -6168,6 +6168,11 @@ performINTRINSIC_WO_CHAINCombine(SDNode *N, SelectionDAG &DAG, N->getOperand(1), DAG.getNode(ISD::ANY_EXTEND, DL, Subtarget.getGRLenVT(), N->getOperand(2))); + case Intrinsic::loongarch_lasx_concat_128_s: + case Intrinsic::loongarch_lasx_concat_128_d: + case Intrinsic::loongarch_lasx_concat_128: + return DAG.getNode(ISD::CONCAT_VECTORS, DL, N->getValueType(0), + N->getOperand(1), N->getOperand(2)); } return SDValue(); } diff --git a/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td index a79c01c..5fed702 100644 --- a/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td +++ b/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td @@ -2028,6 +2028,37 @@ defm : subvector_subreg_lowering<LSX128, v2f64, LASX256, v4f64, 2, sub_128>; defm : subvector_subreg_lowering<LSX128, v8i16, LASX256, v16i16, 8, sub_128>; defm : subvector_subreg_lowering<LSX128, v16i8, LASX256, v32i8, 16, sub_128>; +// LASX and LSX conversion +def : Pat<(int_loongarch_lasx_cast_128_s (v4f32 LSX128:$src)), + (INSERT_SUBREG (IMPLICIT_DEF), LSX128:$src, sub_128)>; +def : Pat<(int_loongarch_lasx_cast_128_d (v2f64 LSX128:$src)), + (INSERT_SUBREG (IMPLICIT_DEF), LSX128:$src, sub_128)>; +def : Pat<(int_loongarch_lasx_cast_128 (v2i64 LSX128:$src)), + (INSERT_SUBREG (IMPLICIT_DEF), LSX128:$src, sub_128)>; +def : Pat<(int_loongarch_lasx_extract_128_lo_s (v8f32 LASX256:$src)), + (EXTRACT_SUBREG LASX256:$src, sub_128)>; +def : Pat<(int_loongarch_lasx_extract_128_lo_d (v4f64 LASX256:$src)), + (EXTRACT_SUBREG LASX256:$src, sub_128)>; +def : Pat<(int_loongarch_lasx_extract_128_lo (v4i64 LASX256:$src)), + (EXTRACT_SUBREG LASX256:$src, sub_128)>; +def : Pat<(int_loongarch_lasx_extract_128_hi_s (v8f32 LASX256:$src)), + (EXTRACT_SUBREG (XVPERMI_Q (IMPLICIT_DEF), LASX256:$src, 1), sub_128)>; +def : Pat<(int_loongarch_lasx_extract_128_hi_d (v4f64 LASX256:$src)), + (EXTRACT_SUBREG (XVPERMI_Q (IMPLICIT_DEF), LASX256:$src, 1), sub_128)>; +def : Pat<(int_loongarch_lasx_extract_128_hi (v4i64 LASX256:$src)), + (EXTRACT_SUBREG (XVPERMI_Q (IMPLICIT_DEF), LASX256:$src, 1), sub_128)>; +def : Pat<(int_loongarch_lasx_insert_128_lo_s (v8f32 LASX256:$src), (v4f32 LSX128:$lo)), + (XVPERMI_Q LASX256:$src, (INSERT_SUBREG (IMPLICIT_DEF), LSX128:$lo, sub_128), 48)>; +def : Pat<(int_loongarch_lasx_insert_128_lo_d (v4f64 LASX256:$src), (v2f64 LSX128:$lo)), + (XVPERMI_Q LASX256:$src, (INSERT_SUBREG (IMPLICIT_DEF), LSX128:$lo, sub_128), 48)>; +def : Pat<(int_loongarch_lasx_insert_128_lo (v4i64 LASX256:$src), (v2i64 LSX128:$lo)), + (XVPERMI_Q LASX256:$src, (INSERT_SUBREG (IMPLICIT_DEF), LSX128:$lo, sub_128), 48)>; +def : Pat<(int_loongarch_lasx_insert_128_hi_s (v8f32 LASX256:$src), (v4f32 LSX128:$lo)), + (XVPERMI_Q LASX256:$src, (INSERT_SUBREG (IMPLICIT_DEF), LSX128:$lo, sub_128), 2)>; +def : Pat<(int_loongarch_lasx_insert_128_hi_d (v4f64 LASX256:$src), (v2f64 LSX128:$lo)), + (XVPERMI_Q LASX256:$src, (INSERT_SUBREG (IMPLICIT_DEF), LSX128:$lo, sub_128), 2)>; +def : Pat<(int_loongarch_lasx_insert_128_hi (v4i64 LASX256:$src), (v2i64 LSX128:$lo)), + (XVPERMI_Q LASX256:$src, (INSERT_SUBREG (IMPLICIT_DEF), LSX128:$lo, sub_128), 2)>; } // Predicates = [HasExtLASX] /// Intrinsic pattern diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-conversion.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-conversion.ll new file mode 100644 index 0000000..7b418cf --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-conversion.ll @@ -0,0 +1,234 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lasx < %s | FileCheck %s +; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s + +declare <8 x float> @llvm.loongarch.lasx.cast.128.s(<4 x float>) + +define <8 x float> @lasx_cast_128_s(<4 x float> %va) { +; CHECK-LABEL: lasx_cast_128_s: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: # kill: def $vr0 killed $vr0 def $xr0 +; CHECK-NEXT: ret +entry: + %res = call <8 x float> @llvm.loongarch.lasx.cast.128.s(<4 x float> %va) + ret <8 x float> %res +} + +declare <4 x double> @llvm.loongarch.lasx.cast.128.d(<2 x double>) + +define <4 x double> @lasx_cast_128_d(<2 x double> %va) { +; CHECK-LABEL: lasx_cast_128_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: # kill: def $vr0 killed $vr0 def $xr0 +; CHECK-NEXT: ret +entry: + %res = call <4 x double> @llvm.loongarch.lasx.cast.128.d(<2 x double> %va) + ret <4 x double> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.cast.128(<2 x i64>) + +define <4 x i64> @lasx_cast_128(<2 x i64> %va) { +; CHECK-LABEL: lasx_cast_128: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: # kill: def $vr0 killed $vr0 def $xr0 +; CHECK-NEXT: ret +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.cast.128(<2 x i64> %va) + ret <4 x i64> %res +} + +declare <8 x float> @llvm.loongarch.lasx.concat.128.s(<4 x float>, <4 x float>) + +define <8 x float> @lasx_concat_128_s(<4 x float> %va, <4 x float> %vb) { +; CHECK-LABEL: lasx_concat_128_s: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: # kill: def $vr1 killed $vr1 def $xr1 +; CHECK-NEXT: # kill: def $vr0 killed $vr0 def $xr0 +; CHECK-NEXT: xvpermi.q $xr0, $xr1, 2 +; CHECK-NEXT: ret +entry: + %res = call <8 x float> @llvm.loongarch.lasx.concat.128.s(<4 x float> %va, <4 x float> %vb) + ret <8 x float> %res +} + +declare <4 x double> @llvm.loongarch.lasx.concat.128.d(<2 x double>, <2 x double>) + +define <4 x double> @lasx_concat_128_d(<2 x double> %va, <2 x double> %vb) { +; CHECK-LABEL: lasx_concat_128_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: # kill: def $vr1 killed $vr1 def $xr1 +; CHECK-NEXT: # kill: def $vr0 killed $vr0 def $xr0 +; CHECK-NEXT: xvpermi.q $xr0, $xr1, 2 +; CHECK-NEXT: ret +entry: + %res = call <4 x double> @llvm.loongarch.lasx.concat.128.d(<2 x double> %va, <2 x double> %vb) + ret <4 x double> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.concat.128(<2 x i64>, <2 x i64>) + +define <4 x i64> @lasx_concat_128(<2 x i64> %va, <2 x i64> %vb) { +; CHECK-LABEL: lasx_concat_128: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: # kill: def $vr1 killed $vr1 def $xr1 +; CHECK-NEXT: # kill: def $vr0 killed $vr0 def $xr0 +; CHECK-NEXT: xvpermi.q $xr0, $xr1, 2 +; CHECK-NEXT: ret +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.concat.128(<2 x i64> %va, <2 x i64> %vb) + ret <4 x i64> %res +} + +declare <4 x float> @llvm.loongarch.lasx.extract.128.lo.s(<8 x float>) + +define <4 x float> @lasx_extract_128_lo_s(<8 x float> %va) { +; CHECK-LABEL: lasx_extract_128_lo_s: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: # kill: def $vr0 killed $vr0 killed $xr0 +; CHECK-NEXT: ret +entry: + %res = call <4 x float> @llvm.loongarch.lasx.extract.128.lo.s(<8 x float> %va) + ret <4 x float> %res +} + +declare <2 x double> @llvm.loongarch.lasx.extract.128.lo.d(<4 x double>) + +define <2 x double> @lasx_extract_128_lo_d(<4 x double> %va) { +; CHECK-LABEL: lasx_extract_128_lo_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: # kill: def $vr0 killed $vr0 killed $xr0 +; CHECK-NEXT: ret +entry: + %res = call <2 x double> @llvm.loongarch.lasx.extract.128.lo.d(<4 x double> %va) + ret <2 x double> %res +} + +declare <2 x i64> @llvm.loongarch.lasx.extract.128.lo(<4 x i64>) + +define <2 x i64> @lasx_extract_128_lo(<4 x i64> %va) { +; CHECK-LABEL: lasx_extract_128_lo: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: # kill: def $vr0 killed $vr0 killed $xr0 +; CHECK-NEXT: ret +entry: + %res = call <2 x i64> @llvm.loongarch.lasx.extract.128.lo(<4 x i64> %va) + ret <2 x i64> %res +} + +declare <4 x float> @llvm.loongarch.lasx.extract.128.hi.s(<8 x float>) + +define <4 x float> @lasx_extract_128_hi_s(<8 x float> %va) { +; CHECK-LABEL: lasx_extract_128_hi_s: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvpermi.q $xr0, $xr0, 1 +; CHECK-NEXT: # kill: def $vr0 killed $vr0 killed $xr0 +; CHECK-NEXT: ret +entry: + %res = call <4 x float> @llvm.loongarch.lasx.extract.128.hi.s(<8 x float> %va) + ret <4 x float> %res +} + +declare <2 x double> @llvm.loongarch.lasx.extract.128.hi.d(<4 x double>) + +define <2 x double> @lasx_extract_128_hi_d(<4 x double> %va) { +; CHECK-LABEL: lasx_extract_128_hi_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvpermi.q $xr0, $xr0, 1 +; CHECK-NEXT: # kill: def $vr0 killed $vr0 killed $xr0 +; CHECK-NEXT: ret +entry: + %res = call <2 x double> @llvm.loongarch.lasx.extract.128.hi.d(<4 x double> %va) + ret <2 x double> %res +} + +declare <2 x i64> @llvm.loongarch.lasx.extract.128.hi(<4 x i64>) + +define <2 x i64> @lasx_extract_128_hi(<4 x i64> %va) { +; CHECK-LABEL: lasx_extract_128_hi: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvpermi.q $xr0, $xr0, 1 +; CHECK-NEXT: # kill: def $vr0 killed $vr0 killed $xr0 +; CHECK-NEXT: ret +entry: + %res = call <2 x i64> @llvm.loongarch.lasx.extract.128.hi(<4 x i64> %va) + ret <2 x i64> %res +} + +declare <8 x float> @llvm.loongarch.lasx.insert.128.lo.s(<8 x float>, <4 x float>) + +define <8 x float> @lasx_insert_128_lo_s(<8 x float> %va, <4 x float> %vb) { +; CHECK-LABEL: lasx_insert_128_lo_s: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: # kill: def $vr1 killed $vr1 def $xr1 +; CHECK-NEXT: xvpermi.q $xr0, $xr1, 48 +; CHECK-NEXT: ret +entry: + %res = call <8 x float> @llvm.loongarch.lasx.insert.128.lo.s(<8 x float> %va, <4 x float> %vb) + ret <8 x float> %res +} + +declare <4 x double> @llvm.loongarch.lasx.insert.128.lo.d(<4 x double>, <2 x double>) + +define <4 x double> @lasx_insert_128_lo_d(<4 x double> %va, <2 x double> %vb) { +; CHECK-LABEL: lasx_insert_128_lo_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: # kill: def $vr1 killed $vr1 def $xr1 +; CHECK-NEXT: xvpermi.q $xr0, $xr1, 48 +; CHECK-NEXT: ret +entry: + %res = call <4 x double> @llvm.loongarch.lasx.insert.128.lo.d(<4 x double> %va, <2 x double> %vb) + ret <4 x double> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.insert.128.lo(<4 x i64>, <2 x i64>) + +define <4 x i64> @lasx_insert_128_lo(<4 x i64> %va, <2 x i64> %vb) { +; CHECK-LABEL: lasx_insert_128_lo: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: # kill: def $vr1 killed $vr1 def $xr1 +; CHECK-NEXT: xvpermi.q $xr0, $xr1, 48 +; CHECK-NEXT: ret +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.insert.128.lo(<4 x i64> %va, <2 x i64> %vb) + ret <4 x i64> %res +} + +declare <8 x float> @llvm.loongarch.lasx.insert.128.hi.s(<8 x float>, <4 x float>) + +define <8 x float> @lasx_insert_128_hi_s(<8 x float> %va, <4 x float> %vb) { +; CHECK-LABEL: lasx_insert_128_hi_s: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: # kill: def $vr1 killed $vr1 def $xr1 +; CHECK-NEXT: xvpermi.q $xr0, $xr1, 2 +; CHECK-NEXT: ret +entry: + %res = call <8 x float> @llvm.loongarch.lasx.insert.128.hi.s(<8 x float> %va, <4 x float> %vb) + ret <8 x float> %res +} + +declare <4 x double> @llvm.loongarch.lasx.insert.128.hi.d(<4 x double>, <2 x double>) + +define <4 x double> @lasx_insert_128_hi_d(<4 x double> %va, <2 x double> %vb) { +; CHECK-LABEL: lasx_insert_128_hi_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: # kill: def $vr1 killed $vr1 def $xr1 +; CHECK-NEXT: xvpermi.q $xr0, $xr1, 2 +; CHECK-NEXT: ret +entry: + %res = call <4 x double> @llvm.loongarch.lasx.insert.128.hi.d(<4 x double> %va, <2 x double> %vb) + ret <4 x double> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.insert.128.hi(<4 x i64>, <2 x i64>) + +define <4 x i64> @lasx_insert_128_hi(<4 x i64> %va, <2 x i64> %vb) { +; CHECK-LABEL: lasx_insert_128_hi: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: # kill: def $vr1 killed $vr1 def $xr1 +; CHECK-NEXT: xvpermi.q $xr0, $xr1, 2 +; CHECK-NEXT: ret +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.insert.128.hi(<4 x i64> %va, <2 x i64> %vb) + ret <4 x i64> %res +} |