diff options
8 files changed, 66 insertions, 292 deletions
diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp index 027e62176ec1..80bb63e7d6d5 100644 --- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp +++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp @@ -90,9 +90,11 @@ LoongArchTargetLowering::LoongArchTargetLowering(const TargetMachine &TM, addRegisterClass(MVT::f64, &LoongArch::FPR64RegClass); static const MVT::SimpleValueType LSXVTs[] = { - MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64, MVT::v4f32, MVT::v2f64}; + MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64, + MVT::v1i128, MVT::v4f32, MVT::v2f64}; static const MVT::SimpleValueType LASXVTs[] = { - MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64, MVT::v8f32, MVT::v4f64}; + MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64, + MVT::v2i128, MVT::v8f32, MVT::v4f64}; if (Subtarget.hasExtLSX()) for (MVT VT : LSXVTs) @@ -397,6 +399,7 @@ LoongArchTargetLowering::LoongArchTargetLowering(const TargetMachine &TM, } setOperationAction(ISD::FP_ROUND, MVT::v2f32, Custom); setOperationAction(ISD::FP_EXTEND, MVT::v2f32, Custom); + setOperationAction({ISD::ADD, ISD::SUB}, MVT::v1i128, Legal); } // Set operations for 'LASX' feature. @@ -469,6 +472,7 @@ LoongArchTargetLowering::LoongArchTargetLowering(const TargetMachine &TM, } setOperationAction(ISD::FP_ROUND, MVT::v4f32, Custom); setOperationAction(ISD::FP_EXTEND, MVT::v4f64, Custom); + setOperationAction({ISD::ADD, ISD::SUB}, MVT::v2i128, Legal); } // Set DAG combine for LA32 and LA64. diff --git a/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td index 6b1f7fd59afb..f925c185a69e 100644 --- a/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td +++ b/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td @@ -1153,6 +1153,19 @@ multiclass PatXrXrF<SDPatternOperator OpNode, string Inst> { (!cast<LAInst>(Inst#"_D") LASX256:$xj, LASX256:$xk)>; } +multiclass PatXrXrQ<SDPatternOperator OpNode, string Inst> { + def : Pat<(OpNode (v32i8 LASX256:$xj), (v32i8 LASX256:$xk)), + (!cast<LAInst>(Inst#"_B") LASX256:$xj, LASX256:$xk)>; + def : Pat<(OpNode (v16i16 LASX256:$xj), (v16i16 LASX256:$xk)), + (!cast<LAInst>(Inst#"_H") LASX256:$xj, LASX256:$xk)>; + def : Pat<(OpNode (v8i32 LASX256:$xj), (v8i32 LASX256:$xk)), + (!cast<LAInst>(Inst#"_W") LASX256:$xj, LASX256:$xk)>; + def : Pat<(OpNode (v4i64 LASX256:$xj), (v4i64 LASX256:$xk)), + (!cast<LAInst>(Inst#"_D") LASX256:$xj, LASX256:$xk)>; + def : Pat<(OpNode (v2i128 LASX256:$xj), (v2i128 LASX256:$xk)), + (!cast<LAInst>(Inst#"_Q") LASX256:$xj, LASX256:$xk)>; +} + multiclass PatXrXrU<SDPatternOperator OpNode, string Inst> { def : Pat<(OpNode (v32i8 LASX256:$xj), (v32i8 LASX256:$xk)), (!cast<LAInst>(Inst#"_BU") LASX256:$xj, LASX256:$xk)>; @@ -1347,10 +1360,10 @@ multiclass PairInsertExtractPatV4<ValueType vecty, ValueType elemty> { let Predicates = [HasExtLASX] in { -// XVADD_{B/H/W/D} -defm : PatXrXr<add, "XVADD">; -// XVSUB_{B/H/W/D} -defm : PatXrXr<sub, "XVSUB">; +// XVADD_{B/H/W/D/Q} +defm : PatXrXrQ<add, "XVADD">; +// XVSUB_{B/H/W/D/Q} +defm : PatXrXrQ<sub, "XVSUB">; // XVADDI_{B/H/W/D}U defm : PatXrNimm5<sub, "XVADDI">; diff --git a/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td index 857a2bee7662..39c2b3f958c0 100644 --- a/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td +++ b/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td @@ -1356,6 +1356,19 @@ multiclass PatVrVrF<SDPatternOperator OpNode, string Inst> { (!cast<LAInst>(Inst#"_D") LSX128:$vj, LSX128:$vk)>; } +multiclass PatVrVrQ<SDPatternOperator OpNode, string Inst> { + def : Pat<(OpNode (v16i8 LSX128:$vj), (v16i8 LSX128:$vk)), + (!cast<LAInst>(Inst#"_B") LSX128:$vj, LSX128:$vk)>; + def : Pat<(OpNode (v8i16 LSX128:$vj), (v8i16 LSX128:$vk)), + (!cast<LAInst>(Inst#"_H") LSX128:$vj, LSX128:$vk)>; + def : Pat<(OpNode (v4i32 LSX128:$vj), (v4i32 LSX128:$vk)), + (!cast<LAInst>(Inst#"_W") LSX128:$vj, LSX128:$vk)>; + def : Pat<(OpNode (v2i64 LSX128:$vj), (v2i64 LSX128:$vk)), + (!cast<LAInst>(Inst#"_D") LSX128:$vj, LSX128:$vk)>; + def : Pat<(OpNode (v1i128 LSX128:$vj), (v1i128 LSX128:$vk)), + (!cast<LAInst>(Inst#"_Q") LSX128:$vj, LSX128:$vk)>; +} + multiclass PatVrVrU<SDPatternOperator OpNode, string Inst> { def : Pat<(OpNode (v16i8 LSX128:$vj), (v16i8 LSX128:$vk)), (!cast<LAInst>(Inst#"_BU") LSX128:$vj, LSX128:$vk)>; @@ -1576,10 +1589,10 @@ multiclass VAvgrPat<SDPatternOperator OpNode, string Inst, ValueType vt> { let Predicates = [HasExtLSX] in { -// VADD_{B/H/W/D} -defm : PatVrVr<add, "VADD">; -// VSUB_{B/H/W/D} -defm : PatVrVr<sub, "VSUB">; +// VADD_{B/H/W/D/Q} +defm : PatVrVrQ<add, "VADD">; +// VSUB_{B/H/W/D/Q} +defm : PatVrVrQ<sub, "VSUB">; // VADDI_{B/H/W/D}U defm : PatVrNimm5<sub, "VADDI">; diff --git a/llvm/lib/Target/LoongArch/LoongArchRegisterInfo.td b/llvm/lib/Target/LoongArch/LoongArchRegisterInfo.td index 2a8cdf953e00..8008e3a218e3 100644 --- a/llvm/lib/Target/LoongArch/LoongArchRegisterInfo.td +++ b/llvm/lib/Target/LoongArch/LoongArchRegisterInfo.td @@ -202,7 +202,7 @@ def VR#I : LoongArchReg128<!cast<LoongArchReg64>("F"#I#"_64"), "vr"#I>, DwarfRegAlias<!cast<LoongArchReg64>("F"#I#"_64")>; def LSX128 : RegisterClass<"LoongArch", - [v4f32, v2f64, v16i8, v8i16, v4i32, v2i64], + [v4f32, v2f64, v16i8, v8i16, v4i32, v2i64, v1i128], 128, (sequence "VR%u", 0, 31)>; // LASX registers @@ -212,7 +212,7 @@ def XR#I : LoongArchReg256<!cast<LoongArchReg128>("VR"#I), "xr"#I>, DwarfRegAlias<!cast<LoongArchReg128>("VR"#I)>; def LASX256 : RegisterClass<"LoongArch", - [v8f32, v4f64, v32i8, v16i16, v8i32, v4i64], + [v8f32, v4f64, v32i8, v16i16, v8i32, v4i64, v2i128], 256, (sequence "XR%u", 0, 31)>; // Scratchpad registers diff --git a/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/add.ll b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/add.ll index 9ec873e9e043..ad63e17f5fbb 100644 --- a/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/add.ll +++ b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/add.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4 -; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lasx < %s | FileCheck %s --check-prefixes=CHECK,LA32 -; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s --check-prefixes=CHECK,LA64 +; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lasx < %s | FileCheck %s +; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s define void @add_v32i8(ptr %res, ptr %a0, ptr %a1) nounwind { ; CHECK-LABEL: add_v32i8: @@ -123,97 +123,10 @@ entry: } define <2 x i128> @add_v2i128(<2 x i128> %a, <2 x i128> %b) { -; LA32-LABEL: add_v2i128: -; LA32: # %bb.0: # %entry -; LA32-NEXT: addi.w $sp, $sp, -16 -; LA32-NEXT: .cfi_def_cfa_offset 16 -; LA32-NEXT: st.w $fp, $sp, 12 # 4-byte Folded Spill -; LA32-NEXT: .cfi_offset 22, -4 -; LA32-NEXT: ld.w $a3, $a2, 28 -; LA32-NEXT: ld.w $a4, $a1, 28 -; LA32-NEXT: ld.w $a5, $a2, 24 -; LA32-NEXT: ld.w $a6, $a1, 24 -; LA32-NEXT: ld.w $a7, $a2, 16 -; LA32-NEXT: ld.w $t0, $a1, 16 -; LA32-NEXT: ld.w $t1, $a2, 20 -; LA32-NEXT: ld.w $t2, $a1, 20 -; LA32-NEXT: ld.w $t3, $a2, 12 -; LA32-NEXT: ld.w $t4, $a1, 12 -; LA32-NEXT: ld.w $t5, $a2, 0 -; LA32-NEXT: ld.w $t6, $a2, 4 -; LA32-NEXT: ld.w $t7, $a1, 4 -; LA32-NEXT: ld.w $t8, $a1, 0 -; LA32-NEXT: ld.w $a2, $a2, 8 -; LA32-NEXT: ld.w $a1, $a1, 8 -; LA32-NEXT: add.w $t6, $t7, $t6 -; LA32-NEXT: add.w $t5, $t8, $t5 -; LA32-NEXT: sltu $t8, $t5, $t8 -; LA32-NEXT: add.w $t6, $t6, $t8 -; LA32-NEXT: sltu $fp, $t6, $t7 -; LA32-NEXT: xor $t7, $t6, $t7 -; LA32-NEXT: sltui $t7, $t7, 1 -; LA32-NEXT: masknez $fp, $fp, $t7 -; LA32-NEXT: maskeqz $t7, $t8, $t7 -; LA32-NEXT: or $t7, $t7, $fp -; LA32-NEXT: add.w $a2, $a1, $a2 -; LA32-NEXT: add.w $t7, $a2, $t7 -; LA32-NEXT: sltu $t8, $t7, $a2 -; LA32-NEXT: add.w $t3, $t4, $t3 -; LA32-NEXT: sltu $a1, $a2, $a1 -; LA32-NEXT: add.w $a1, $t3, $a1 -; LA32-NEXT: add.w $a1, $a1, $t8 -; LA32-NEXT: add.w $a2, $t2, $t1 -; LA32-NEXT: add.w $a7, $t0, $a7 -; LA32-NEXT: sltu $t0, $a7, $t0 -; LA32-NEXT: add.w $a2, $a2, $t0 -; LA32-NEXT: sltu $t1, $a2, $t2 -; LA32-NEXT: xor $t2, $a2, $t2 -; LA32-NEXT: sltui $t2, $t2, 1 -; LA32-NEXT: masknez $t1, $t1, $t2 -; LA32-NEXT: maskeqz $t0, $t0, $t2 -; LA32-NEXT: or $t0, $t0, $t1 -; LA32-NEXT: add.w $a5, $a6, $a5 -; LA32-NEXT: add.w $t0, $a5, $t0 -; LA32-NEXT: sltu $t1, $t0, $a5 -; LA32-NEXT: add.w $a3, $a4, $a3 -; LA32-NEXT: sltu $a4, $a5, $a6 -; LA32-NEXT: add.w $a3, $a3, $a4 -; LA32-NEXT: add.w $a3, $a3, $t1 -; LA32-NEXT: st.w $a7, $a0, 16 -; LA32-NEXT: st.w $t5, $a0, 0 -; LA32-NEXT: st.w $a2, $a0, 20 -; LA32-NEXT: st.w $t6, $a0, 4 -; LA32-NEXT: st.w $t0, $a0, 24 -; LA32-NEXT: st.w $t7, $a0, 8 -; LA32-NEXT: st.w $a3, $a0, 28 -; LA32-NEXT: st.w $a1, $a0, 12 -; LA32-NEXT: ld.w $fp, $sp, 12 # 4-byte Folded Reload -; LA32-NEXT: addi.w $sp, $sp, 16 -; LA32-NEXT: ret -; -; LA64-LABEL: add_v2i128: -; LA64: # %bb.0: # %entry -; LA64-NEXT: ld.d $a3, $a2, 16 -; LA64-NEXT: ld.d $a4, $a1, 16 -; LA64-NEXT: ld.d $a5, $a2, 0 -; LA64-NEXT: ld.d $a6, $a2, 8 -; LA64-NEXT: ld.d $a7, $a1, 8 -; LA64-NEXT: ld.d $t0, $a1, 0 -; LA64-NEXT: ld.d $a2, $a2, 24 -; LA64-NEXT: ld.d $a1, $a1, 24 -; LA64-NEXT: add.d $a6, $a7, $a6 -; LA64-NEXT: add.d $a5, $t0, $a5 -; LA64-NEXT: sltu $a7, $a5, $t0 -; LA64-NEXT: add.d $a6, $a6, $a7 -; LA64-NEXT: add.d $a1, $a1, $a2 -; LA64-NEXT: add.d $a2, $a4, $a3 -; LA64-NEXT: sltu $a3, $a2, $a4 -; LA64-NEXT: add.d $a1, $a1, $a3 -; LA64-NEXT: st.d $a2, $a0, 16 -; LA64-NEXT: st.d $a5, $a0, 0 -; LA64-NEXT: st.d $a1, $a0, 24 -; LA64-NEXT: st.d $a6, $a0, 8 -; LA64-NEXT: ret +; CHECK-LABEL: add_v2i128: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvadd.q $xr0, $xr0, $xr1 +; CHECK-NEXT: ret entry: %0 = add <2 x i128> %a, %b ret <2 x i128> %0 diff --git a/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/sub.ll b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/sub.ll index fc706ce62a4c..5605ccaedceb 100644 --- a/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/sub.ll +++ b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/sub.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4 -; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lasx < %s | FileCheck %s --check-prefixes=CHECK,LA32 -; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s --check-prefixes=CHECK,LA64 +; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lasx < %s | FileCheck %s +; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s define void @sub_v32i8(ptr %res, ptr %a0, ptr %a1) nounwind { ; CHECK-LABEL: sub_v32i8: @@ -67,106 +67,10 @@ entry: } define <2 x i128> @sub_v2i128(<2 x i128> %a, <2 x i128> %b) { -; LA32-LABEL: sub_v2i128: -; LA32: # %bb.0: # %entry -; LA32-NEXT: addi.w $sp, $sp, -16 -; LA32-NEXT: .cfi_def_cfa_offset 16 -; LA32-NEXT: st.w $fp, $sp, 12 # 4-byte Folded Spill -; LA32-NEXT: st.w $s0, $sp, 8 # 4-byte Folded Spill -; LA32-NEXT: st.w $s1, $sp, 4 # 4-byte Folded Spill -; LA32-NEXT: st.w $s2, $sp, 0 # 4-byte Folded Spill -; LA32-NEXT: .cfi_offset 22, -4 -; LA32-NEXT: .cfi_offset 23, -8 -; LA32-NEXT: .cfi_offset 24, -12 -; LA32-NEXT: .cfi_offset 25, -16 -; LA32-NEXT: ld.w $a3, $a2, 28 -; LA32-NEXT: ld.w $a4, $a1, 28 -; LA32-NEXT: ld.w $a5, $a2, 24 -; LA32-NEXT: ld.w $a6, $a1, 24 -; LA32-NEXT: ld.w $a7, $a2, 16 -; LA32-NEXT: ld.w $t0, $a1, 16 -; LA32-NEXT: ld.w $t1, $a2, 20 -; LA32-NEXT: ld.w $t2, $a1, 20 -; LA32-NEXT: ld.w $t3, $a2, 12 -; LA32-NEXT: ld.w $t4, $a1, 12 -; LA32-NEXT: ld.w $t5, $a2, 8 -; LA32-NEXT: ld.w $t6, $a2, 4 -; LA32-NEXT: ld.w $t7, $a1, 4 -; LA32-NEXT: ld.w $t8, $a1, 8 -; LA32-NEXT: ld.w $a2, $a2, 0 -; LA32-NEXT: ld.w $a1, $a1, 0 -; LA32-NEXT: sltu $fp, $t7, $t6 -; LA32-NEXT: xor $s0, $t7, $t6 -; LA32-NEXT: sltui $s0, $s0, 1 -; LA32-NEXT: masknez $fp, $fp, $s0 -; LA32-NEXT: sltu $s1, $a1, $a2 -; LA32-NEXT: maskeqz $s0, $s1, $s0 -; LA32-NEXT: or $fp, $s0, $fp -; LA32-NEXT: sub.w $s0, $t8, $t5 -; LA32-NEXT: sltu $s2, $s0, $fp -; LA32-NEXT: sltu $t5, $t8, $t5 -; LA32-NEXT: sub.w $t3, $t4, $t3 -; LA32-NEXT: sub.w $t3, $t3, $t5 -; LA32-NEXT: sub.w $t3, $t3, $s2 -; LA32-NEXT: sltu $t4, $t2, $t1 -; LA32-NEXT: xor $t5, $t2, $t1 -; LA32-NEXT: sltui $t5, $t5, 1 -; LA32-NEXT: masknez $t4, $t4, $t5 -; LA32-NEXT: sltu $t8, $t0, $a7 -; LA32-NEXT: maskeqz $t5, $t8, $t5 -; LA32-NEXT: or $t4, $t5, $t4 -; LA32-NEXT: sub.w $t5, $a6, $a5 -; LA32-NEXT: sltu $s2, $t5, $t4 -; LA32-NEXT: sltu $a5, $a6, $a5 -; LA32-NEXT: sub.w $a3, $a4, $a3 -; LA32-NEXT: sub.w $a3, $a3, $a5 -; LA32-NEXT: sub.w $a3, $a3, $s2 -; LA32-NEXT: sub.w $a4, $s0, $fp -; LA32-NEXT: sub.w $a5, $t5, $t4 -; LA32-NEXT: sub.w $a6, $t7, $t6 -; LA32-NEXT: sub.w $a6, $a6, $s1 -; LA32-NEXT: sub.w $t1, $t2, $t1 -; LA32-NEXT: sub.w $t1, $t1, $t8 -; LA32-NEXT: sub.w $a1, $a1, $a2 -; LA32-NEXT: sub.w $a2, $t0, $a7 -; LA32-NEXT: st.w $a2, $a0, 16 -; LA32-NEXT: st.w $a1, $a0, 0 -; LA32-NEXT: st.w $t1, $a0, 20 -; LA32-NEXT: st.w $a6, $a0, 4 -; LA32-NEXT: st.w $a5, $a0, 24 -; LA32-NEXT: st.w $a4, $a0, 8 -; LA32-NEXT: st.w $a3, $a0, 28 -; LA32-NEXT: st.w $t3, $a0, 12 -; LA32-NEXT: ld.w $s2, $sp, 0 # 4-byte Folded Reload -; LA32-NEXT: ld.w $s1, $sp, 4 # 4-byte Folded Reload -; LA32-NEXT: ld.w $s0, $sp, 8 # 4-byte Folded Reload -; LA32-NEXT: ld.w $fp, $sp, 12 # 4-byte Folded Reload -; LA32-NEXT: addi.w $sp, $sp, 16 -; LA32-NEXT: ret -; -; LA64-LABEL: sub_v2i128: -; LA64: # %bb.0: # %entry -; LA64-NEXT: ld.d $a3, $a2, 24 -; LA64-NEXT: ld.d $a4, $a1, 24 -; LA64-NEXT: ld.d $a5, $a2, 8 -; LA64-NEXT: ld.d $a6, $a2, 0 -; LA64-NEXT: ld.d $a7, $a1, 0 -; LA64-NEXT: ld.d $t0, $a1, 8 -; LA64-NEXT: ld.d $a2, $a2, 16 -; LA64-NEXT: ld.d $a1, $a1, 16 -; LA64-NEXT: sltu $t1, $a7, $a6 -; LA64-NEXT: sub.d $a5, $t0, $a5 -; LA64-NEXT: sub.d $a5, $a5, $t1 -; LA64-NEXT: sltu $t0, $a1, $a2 -; LA64-NEXT: sub.d $a3, $a4, $a3 -; LA64-NEXT: sub.d $a3, $a3, $t0 -; LA64-NEXT: sub.d $a4, $a7, $a6 -; LA64-NEXT: sub.d $a1, $a1, $a2 -; LA64-NEXT: st.d $a1, $a0, 16 -; LA64-NEXT: st.d $a4, $a0, 0 -; LA64-NEXT: st.d $a3, $a0, 24 -; LA64-NEXT: st.d $a5, $a0, 8 -; LA64-NEXT: ret +; CHECK-LABEL: sub_v2i128: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvsub.q $xr0, $xr0, $xr1 +; CHECK-NEXT: ret entry: %0 = sub <2 x i128> %a, %b ret <2 x i128> %0 diff --git a/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/add.ll b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/add.ll index 4fe1d1ffa975..ab9b6bc757f1 100644 --- a/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/add.ll +++ b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/add.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4 -; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lsx < %s | FileCheck %s --check-prefixes=CHECK,LA32 -; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s --check-prefixes=CHECK,LA64 +; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lsx < %s | FileCheck %s +; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s define void @add_v16i8(ptr %res, ptr %a0, ptr %a1) nounwind { ; CHECK-LABEL: add_v16i8: @@ -67,47 +67,10 @@ entry: } define <1 x i128> @add_v1i128(<1 x i128> %a, <1 x i128> %b) nounwind { -; LA32-LABEL: add_v1i128: -; LA32: # %bb.0: # %entry -; LA32-NEXT: ld.w $a3, $a2, 12 -; LA32-NEXT: ld.w $a4, $a1, 12 -; LA32-NEXT: ld.w $a5, $a2, 0 -; LA32-NEXT: ld.w $a6, $a2, 4 -; LA32-NEXT: ld.w $a7, $a1, 4 -; LA32-NEXT: ld.w $t0, $a1, 0 -; LA32-NEXT: ld.w $a2, $a2, 8 -; LA32-NEXT: ld.w $a1, $a1, 8 -; LA32-NEXT: add.w $a6, $a7, $a6 -; LA32-NEXT: add.w $a5, $t0, $a5 -; LA32-NEXT: sltu $t0, $a5, $t0 -; LA32-NEXT: add.w $a6, $a6, $t0 -; LA32-NEXT: sltu $t1, $a6, $a7 -; LA32-NEXT: xor $a7, $a6, $a7 -; LA32-NEXT: sltui $a7, $a7, 1 -; LA32-NEXT: masknez $t1, $t1, $a7 -; LA32-NEXT: maskeqz $a7, $t0, $a7 -; LA32-NEXT: or $a7, $a7, $t1 -; LA32-NEXT: add.w $a2, $a1, $a2 -; LA32-NEXT: add.w $a7, $a2, $a7 -; LA32-NEXT: sltu $t0, $a7, $a2 -; LA32-NEXT: add.w $a3, $a4, $a3 -; LA32-NEXT: sltu $a1, $a2, $a1 -; LA32-NEXT: add.w $a1, $a3, $a1 -; LA32-NEXT: add.w $a1, $a1, $t0 -; LA32-NEXT: st.w $a5, $a0, 0 -; LA32-NEXT: st.w $a6, $a0, 4 -; LA32-NEXT: st.w $a7, $a0, 8 -; LA32-NEXT: st.w $a1, $a0, 12 -; LA32-NEXT: ret -; -; LA64-LABEL: add_v1i128: -; LA64: # %bb.0: # %entry -; LA64-NEXT: add.d $a1, $a1, $a3 -; LA64-NEXT: add.d $a2, $a0, $a2 -; LA64-NEXT: sltu $a0, $a2, $a0 -; LA64-NEXT: add.d $a1, $a1, $a0 -; LA64-NEXT: move $a0, $a2 -; LA64-NEXT: ret +; CHECK-LABEL: add_v1i128: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vadd.q $vr0, $vr0, $vr1 +; CHECK-NEXT: ret entry: %0 = add <1 x i128> %a, %b ret <1 x i128> %0 diff --git a/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/sub.ll b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/sub.ll index a66f7977c438..f44960c0522f 100644 --- a/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/sub.ll +++ b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/sub.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4 -; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lsx < %s | FileCheck %s --check-prefixes=CHECK,LA32 -; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s --check-prefixes=CHECK,LA64 +; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lsx < %s | FileCheck %s +; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s define void @sub_v16i8(ptr %res, ptr %a0, ptr %a1) nounwind { ; CHECK-LABEL: sub_v16i8: @@ -67,46 +67,10 @@ entry: } define <1 x i128> @sub_v1i128(<1 x i128> %a, <1 x i128> %b) nounwind { -; LA32-LABEL: sub_v1i128: -; LA32: # %bb.0: # %entry -; LA32-NEXT: ld.w $a3, $a2, 12 -; LA32-NEXT: ld.w $a4, $a1, 12 -; LA32-NEXT: ld.w $a5, $a2, 8 -; LA32-NEXT: ld.w $a6, $a2, 4 -; LA32-NEXT: ld.w $a7, $a1, 4 -; LA32-NEXT: ld.w $t0, $a1, 8 -; LA32-NEXT: ld.w $a2, $a2, 0 -; LA32-NEXT: ld.w $a1, $a1, 0 -; LA32-NEXT: sltu $t1, $a7, $a6 -; LA32-NEXT: xor $t2, $a7, $a6 -; LA32-NEXT: sltui $t2, $t2, 1 -; LA32-NEXT: masknez $t1, $t1, $t2 -; LA32-NEXT: sltu $t3, $a1, $a2 -; LA32-NEXT: maskeqz $t2, $t3, $t2 -; LA32-NEXT: or $t1, $t2, $t1 -; LA32-NEXT: sub.w $t2, $t0, $a5 -; LA32-NEXT: sltu $t4, $t2, $t1 -; LA32-NEXT: sltu $a5, $t0, $a5 -; LA32-NEXT: sub.w $a3, $a4, $a3 -; LA32-NEXT: sub.w $a3, $a3, $a5 -; LA32-NEXT: sub.w $a3, $a3, $t4 -; LA32-NEXT: sub.w $a4, $t2, $t1 -; LA32-NEXT: sub.w $a5, $a7, $a6 -; LA32-NEXT: sub.w $a5, $a5, $t3 -; LA32-NEXT: sub.w $a1, $a1, $a2 -; LA32-NEXT: st.w $a1, $a0, 0 -; LA32-NEXT: st.w $a5, $a0, 4 -; LA32-NEXT: st.w $a4, $a0, 8 -; LA32-NEXT: st.w $a3, $a0, 12 -; LA32-NEXT: ret -; -; LA64-LABEL: sub_v1i128: -; LA64: # %bb.0: # %entry -; LA64-NEXT: sltu $a4, $a0, $a2 -; LA64-NEXT: sub.d $a1, $a1, $a3 -; LA64-NEXT: sub.d $a1, $a1, $a4 -; LA64-NEXT: sub.d $a0, $a0, $a2 -; LA64-NEXT: ret +; CHECK-LABEL: sub_v1i128: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsub.q $vr0, $vr0, $vr1 +; CHECK-NEXT: ret entry: %0 = sub <1 x i128> %a, %b ret <1 x i128> %0 |
