aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp8
-rw-r--r--llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td21
-rw-r--r--llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td21
-rw-r--r--llvm/lib/Target/LoongArch/LoongArchRegisterInfo.td4
-rw-r--r--llvm/test/CodeGen/LoongArch/lasx/ir-instruction/add.ll99
-rw-r--r--llvm/test/CodeGen/LoongArch/lasx/ir-instruction/sub.ll108
-rw-r--r--llvm/test/CodeGen/LoongArch/lsx/ir-instruction/add.ll49
-rw-r--r--llvm/test/CodeGen/LoongArch/lsx/ir-instruction/sub.ll48
8 files changed, 66 insertions, 292 deletions
diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
index 027e62176ec1..80bb63e7d6d5 100644
--- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
+++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
@@ -90,9 +90,11 @@ LoongArchTargetLowering::LoongArchTargetLowering(const TargetMachine &TM,
addRegisterClass(MVT::f64, &LoongArch::FPR64RegClass);
static const MVT::SimpleValueType LSXVTs[] = {
- MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64, MVT::v4f32, MVT::v2f64};
+ MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64,
+ MVT::v1i128, MVT::v4f32, MVT::v2f64};
static const MVT::SimpleValueType LASXVTs[] = {
- MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64, MVT::v8f32, MVT::v4f64};
+ MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64,
+ MVT::v2i128, MVT::v8f32, MVT::v4f64};
if (Subtarget.hasExtLSX())
for (MVT VT : LSXVTs)
@@ -397,6 +399,7 @@ LoongArchTargetLowering::LoongArchTargetLowering(const TargetMachine &TM,
}
setOperationAction(ISD::FP_ROUND, MVT::v2f32, Custom);
setOperationAction(ISD::FP_EXTEND, MVT::v2f32, Custom);
+ setOperationAction({ISD::ADD, ISD::SUB}, MVT::v1i128, Legal);
}
// Set operations for 'LASX' feature.
@@ -469,6 +472,7 @@ LoongArchTargetLowering::LoongArchTargetLowering(const TargetMachine &TM,
}
setOperationAction(ISD::FP_ROUND, MVT::v4f32, Custom);
setOperationAction(ISD::FP_EXTEND, MVT::v4f64, Custom);
+ setOperationAction({ISD::ADD, ISD::SUB}, MVT::v2i128, Legal);
}
// Set DAG combine for LA32 and LA64.
diff --git a/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td
index 6b1f7fd59afb..f925c185a69e 100644
--- a/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td
+++ b/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td
@@ -1153,6 +1153,19 @@ multiclass PatXrXrF<SDPatternOperator OpNode, string Inst> {
(!cast<LAInst>(Inst#"_D") LASX256:$xj, LASX256:$xk)>;
}
+multiclass PatXrXrQ<SDPatternOperator OpNode, string Inst> {
+ def : Pat<(OpNode (v32i8 LASX256:$xj), (v32i8 LASX256:$xk)),
+ (!cast<LAInst>(Inst#"_B") LASX256:$xj, LASX256:$xk)>;
+ def : Pat<(OpNode (v16i16 LASX256:$xj), (v16i16 LASX256:$xk)),
+ (!cast<LAInst>(Inst#"_H") LASX256:$xj, LASX256:$xk)>;
+ def : Pat<(OpNode (v8i32 LASX256:$xj), (v8i32 LASX256:$xk)),
+ (!cast<LAInst>(Inst#"_W") LASX256:$xj, LASX256:$xk)>;
+ def : Pat<(OpNode (v4i64 LASX256:$xj), (v4i64 LASX256:$xk)),
+ (!cast<LAInst>(Inst#"_D") LASX256:$xj, LASX256:$xk)>;
+ def : Pat<(OpNode (v2i128 LASX256:$xj), (v2i128 LASX256:$xk)),
+ (!cast<LAInst>(Inst#"_Q") LASX256:$xj, LASX256:$xk)>;
+}
+
multiclass PatXrXrU<SDPatternOperator OpNode, string Inst> {
def : Pat<(OpNode (v32i8 LASX256:$xj), (v32i8 LASX256:$xk)),
(!cast<LAInst>(Inst#"_BU") LASX256:$xj, LASX256:$xk)>;
@@ -1347,10 +1360,10 @@ multiclass PairInsertExtractPatV4<ValueType vecty, ValueType elemty> {
let Predicates = [HasExtLASX] in {
-// XVADD_{B/H/W/D}
-defm : PatXrXr<add, "XVADD">;
-// XVSUB_{B/H/W/D}
-defm : PatXrXr<sub, "XVSUB">;
+// XVADD_{B/H/W/D/Q}
+defm : PatXrXrQ<add, "XVADD">;
+// XVSUB_{B/H/W/D/Q}
+defm : PatXrXrQ<sub, "XVSUB">;
// XVADDI_{B/H/W/D}U
defm : PatXrNimm5<sub, "XVADDI">;
diff --git a/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td
index 857a2bee7662..39c2b3f958c0 100644
--- a/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td
+++ b/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td
@@ -1356,6 +1356,19 @@ multiclass PatVrVrF<SDPatternOperator OpNode, string Inst> {
(!cast<LAInst>(Inst#"_D") LSX128:$vj, LSX128:$vk)>;
}
+multiclass PatVrVrQ<SDPatternOperator OpNode, string Inst> {
+ def : Pat<(OpNode (v16i8 LSX128:$vj), (v16i8 LSX128:$vk)),
+ (!cast<LAInst>(Inst#"_B") LSX128:$vj, LSX128:$vk)>;
+ def : Pat<(OpNode (v8i16 LSX128:$vj), (v8i16 LSX128:$vk)),
+ (!cast<LAInst>(Inst#"_H") LSX128:$vj, LSX128:$vk)>;
+ def : Pat<(OpNode (v4i32 LSX128:$vj), (v4i32 LSX128:$vk)),
+ (!cast<LAInst>(Inst#"_W") LSX128:$vj, LSX128:$vk)>;
+ def : Pat<(OpNode (v2i64 LSX128:$vj), (v2i64 LSX128:$vk)),
+ (!cast<LAInst>(Inst#"_D") LSX128:$vj, LSX128:$vk)>;
+ def : Pat<(OpNode (v1i128 LSX128:$vj), (v1i128 LSX128:$vk)),
+ (!cast<LAInst>(Inst#"_Q") LSX128:$vj, LSX128:$vk)>;
+}
+
multiclass PatVrVrU<SDPatternOperator OpNode, string Inst> {
def : Pat<(OpNode (v16i8 LSX128:$vj), (v16i8 LSX128:$vk)),
(!cast<LAInst>(Inst#"_BU") LSX128:$vj, LSX128:$vk)>;
@@ -1576,10 +1589,10 @@ multiclass VAvgrPat<SDPatternOperator OpNode, string Inst, ValueType vt> {
let Predicates = [HasExtLSX] in {
-// VADD_{B/H/W/D}
-defm : PatVrVr<add, "VADD">;
-// VSUB_{B/H/W/D}
-defm : PatVrVr<sub, "VSUB">;
+// VADD_{B/H/W/D/Q}
+defm : PatVrVrQ<add, "VADD">;
+// VSUB_{B/H/W/D/Q}
+defm : PatVrVrQ<sub, "VSUB">;
// VADDI_{B/H/W/D}U
defm : PatVrNimm5<sub, "VADDI">;
diff --git a/llvm/lib/Target/LoongArch/LoongArchRegisterInfo.td b/llvm/lib/Target/LoongArch/LoongArchRegisterInfo.td
index 2a8cdf953e00..8008e3a218e3 100644
--- a/llvm/lib/Target/LoongArch/LoongArchRegisterInfo.td
+++ b/llvm/lib/Target/LoongArch/LoongArchRegisterInfo.td
@@ -202,7 +202,7 @@ def VR#I : LoongArchReg128<!cast<LoongArchReg64>("F"#I#"_64"), "vr"#I>,
DwarfRegAlias<!cast<LoongArchReg64>("F"#I#"_64")>;
def LSX128 : RegisterClass<"LoongArch",
- [v4f32, v2f64, v16i8, v8i16, v4i32, v2i64],
+ [v4f32, v2f64, v16i8, v8i16, v4i32, v2i64, v1i128],
128, (sequence "VR%u", 0, 31)>;
// LASX registers
@@ -212,7 +212,7 @@ def XR#I : LoongArchReg256<!cast<LoongArchReg128>("VR"#I), "xr"#I>,
DwarfRegAlias<!cast<LoongArchReg128>("VR"#I)>;
def LASX256 : RegisterClass<"LoongArch",
- [v8f32, v4f64, v32i8, v16i16, v8i32, v4i64],
+ [v8f32, v4f64, v32i8, v16i16, v8i32, v4i64, v2i128],
256, (sequence "XR%u", 0, 31)>;
// Scratchpad registers
diff --git a/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/add.ll b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/add.ll
index 9ec873e9e043..ad63e17f5fbb 100644
--- a/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/add.ll
+++ b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/add.ll
@@ -1,6 +1,6 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
-; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lasx < %s | FileCheck %s --check-prefixes=CHECK,LA32
-; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s --check-prefixes=CHECK,LA64
+; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lasx < %s | FileCheck %s
+; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s
define void @add_v32i8(ptr %res, ptr %a0, ptr %a1) nounwind {
; CHECK-LABEL: add_v32i8:
@@ -123,97 +123,10 @@ entry:
}
define <2 x i128> @add_v2i128(<2 x i128> %a, <2 x i128> %b) {
-; LA32-LABEL: add_v2i128:
-; LA32: # %bb.0: # %entry
-; LA32-NEXT: addi.w $sp, $sp, -16
-; LA32-NEXT: .cfi_def_cfa_offset 16
-; LA32-NEXT: st.w $fp, $sp, 12 # 4-byte Folded Spill
-; LA32-NEXT: .cfi_offset 22, -4
-; LA32-NEXT: ld.w $a3, $a2, 28
-; LA32-NEXT: ld.w $a4, $a1, 28
-; LA32-NEXT: ld.w $a5, $a2, 24
-; LA32-NEXT: ld.w $a6, $a1, 24
-; LA32-NEXT: ld.w $a7, $a2, 16
-; LA32-NEXT: ld.w $t0, $a1, 16
-; LA32-NEXT: ld.w $t1, $a2, 20
-; LA32-NEXT: ld.w $t2, $a1, 20
-; LA32-NEXT: ld.w $t3, $a2, 12
-; LA32-NEXT: ld.w $t4, $a1, 12
-; LA32-NEXT: ld.w $t5, $a2, 0
-; LA32-NEXT: ld.w $t6, $a2, 4
-; LA32-NEXT: ld.w $t7, $a1, 4
-; LA32-NEXT: ld.w $t8, $a1, 0
-; LA32-NEXT: ld.w $a2, $a2, 8
-; LA32-NEXT: ld.w $a1, $a1, 8
-; LA32-NEXT: add.w $t6, $t7, $t6
-; LA32-NEXT: add.w $t5, $t8, $t5
-; LA32-NEXT: sltu $t8, $t5, $t8
-; LA32-NEXT: add.w $t6, $t6, $t8
-; LA32-NEXT: sltu $fp, $t6, $t7
-; LA32-NEXT: xor $t7, $t6, $t7
-; LA32-NEXT: sltui $t7, $t7, 1
-; LA32-NEXT: masknez $fp, $fp, $t7
-; LA32-NEXT: maskeqz $t7, $t8, $t7
-; LA32-NEXT: or $t7, $t7, $fp
-; LA32-NEXT: add.w $a2, $a1, $a2
-; LA32-NEXT: add.w $t7, $a2, $t7
-; LA32-NEXT: sltu $t8, $t7, $a2
-; LA32-NEXT: add.w $t3, $t4, $t3
-; LA32-NEXT: sltu $a1, $a2, $a1
-; LA32-NEXT: add.w $a1, $t3, $a1
-; LA32-NEXT: add.w $a1, $a1, $t8
-; LA32-NEXT: add.w $a2, $t2, $t1
-; LA32-NEXT: add.w $a7, $t0, $a7
-; LA32-NEXT: sltu $t0, $a7, $t0
-; LA32-NEXT: add.w $a2, $a2, $t0
-; LA32-NEXT: sltu $t1, $a2, $t2
-; LA32-NEXT: xor $t2, $a2, $t2
-; LA32-NEXT: sltui $t2, $t2, 1
-; LA32-NEXT: masknez $t1, $t1, $t2
-; LA32-NEXT: maskeqz $t0, $t0, $t2
-; LA32-NEXT: or $t0, $t0, $t1
-; LA32-NEXT: add.w $a5, $a6, $a5
-; LA32-NEXT: add.w $t0, $a5, $t0
-; LA32-NEXT: sltu $t1, $t0, $a5
-; LA32-NEXT: add.w $a3, $a4, $a3
-; LA32-NEXT: sltu $a4, $a5, $a6
-; LA32-NEXT: add.w $a3, $a3, $a4
-; LA32-NEXT: add.w $a3, $a3, $t1
-; LA32-NEXT: st.w $a7, $a0, 16
-; LA32-NEXT: st.w $t5, $a0, 0
-; LA32-NEXT: st.w $a2, $a0, 20
-; LA32-NEXT: st.w $t6, $a0, 4
-; LA32-NEXT: st.w $t0, $a0, 24
-; LA32-NEXT: st.w $t7, $a0, 8
-; LA32-NEXT: st.w $a3, $a0, 28
-; LA32-NEXT: st.w $a1, $a0, 12
-; LA32-NEXT: ld.w $fp, $sp, 12 # 4-byte Folded Reload
-; LA32-NEXT: addi.w $sp, $sp, 16
-; LA32-NEXT: ret
-;
-; LA64-LABEL: add_v2i128:
-; LA64: # %bb.0: # %entry
-; LA64-NEXT: ld.d $a3, $a2, 16
-; LA64-NEXT: ld.d $a4, $a1, 16
-; LA64-NEXT: ld.d $a5, $a2, 0
-; LA64-NEXT: ld.d $a6, $a2, 8
-; LA64-NEXT: ld.d $a7, $a1, 8
-; LA64-NEXT: ld.d $t0, $a1, 0
-; LA64-NEXT: ld.d $a2, $a2, 24
-; LA64-NEXT: ld.d $a1, $a1, 24
-; LA64-NEXT: add.d $a6, $a7, $a6
-; LA64-NEXT: add.d $a5, $t0, $a5
-; LA64-NEXT: sltu $a7, $a5, $t0
-; LA64-NEXT: add.d $a6, $a6, $a7
-; LA64-NEXT: add.d $a1, $a1, $a2
-; LA64-NEXT: add.d $a2, $a4, $a3
-; LA64-NEXT: sltu $a3, $a2, $a4
-; LA64-NEXT: add.d $a1, $a1, $a3
-; LA64-NEXT: st.d $a2, $a0, 16
-; LA64-NEXT: st.d $a5, $a0, 0
-; LA64-NEXT: st.d $a1, $a0, 24
-; LA64-NEXT: st.d $a6, $a0, 8
-; LA64-NEXT: ret
+; CHECK-LABEL: add_v2i128:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: xvadd.q $xr0, $xr0, $xr1
+; CHECK-NEXT: ret
entry:
%0 = add <2 x i128> %a, %b
ret <2 x i128> %0
diff --git a/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/sub.ll b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/sub.ll
index fc706ce62a4c..5605ccaedceb 100644
--- a/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/sub.ll
+++ b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/sub.ll
@@ -1,6 +1,6 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
-; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lasx < %s | FileCheck %s --check-prefixes=CHECK,LA32
-; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s --check-prefixes=CHECK,LA64
+; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lasx < %s | FileCheck %s
+; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s
define void @sub_v32i8(ptr %res, ptr %a0, ptr %a1) nounwind {
; CHECK-LABEL: sub_v32i8:
@@ -67,106 +67,10 @@ entry:
}
define <2 x i128> @sub_v2i128(<2 x i128> %a, <2 x i128> %b) {
-; LA32-LABEL: sub_v2i128:
-; LA32: # %bb.0: # %entry
-; LA32-NEXT: addi.w $sp, $sp, -16
-; LA32-NEXT: .cfi_def_cfa_offset 16
-; LA32-NEXT: st.w $fp, $sp, 12 # 4-byte Folded Spill
-; LA32-NEXT: st.w $s0, $sp, 8 # 4-byte Folded Spill
-; LA32-NEXT: st.w $s1, $sp, 4 # 4-byte Folded Spill
-; LA32-NEXT: st.w $s2, $sp, 0 # 4-byte Folded Spill
-; LA32-NEXT: .cfi_offset 22, -4
-; LA32-NEXT: .cfi_offset 23, -8
-; LA32-NEXT: .cfi_offset 24, -12
-; LA32-NEXT: .cfi_offset 25, -16
-; LA32-NEXT: ld.w $a3, $a2, 28
-; LA32-NEXT: ld.w $a4, $a1, 28
-; LA32-NEXT: ld.w $a5, $a2, 24
-; LA32-NEXT: ld.w $a6, $a1, 24
-; LA32-NEXT: ld.w $a7, $a2, 16
-; LA32-NEXT: ld.w $t0, $a1, 16
-; LA32-NEXT: ld.w $t1, $a2, 20
-; LA32-NEXT: ld.w $t2, $a1, 20
-; LA32-NEXT: ld.w $t3, $a2, 12
-; LA32-NEXT: ld.w $t4, $a1, 12
-; LA32-NEXT: ld.w $t5, $a2, 8
-; LA32-NEXT: ld.w $t6, $a2, 4
-; LA32-NEXT: ld.w $t7, $a1, 4
-; LA32-NEXT: ld.w $t8, $a1, 8
-; LA32-NEXT: ld.w $a2, $a2, 0
-; LA32-NEXT: ld.w $a1, $a1, 0
-; LA32-NEXT: sltu $fp, $t7, $t6
-; LA32-NEXT: xor $s0, $t7, $t6
-; LA32-NEXT: sltui $s0, $s0, 1
-; LA32-NEXT: masknez $fp, $fp, $s0
-; LA32-NEXT: sltu $s1, $a1, $a2
-; LA32-NEXT: maskeqz $s0, $s1, $s0
-; LA32-NEXT: or $fp, $s0, $fp
-; LA32-NEXT: sub.w $s0, $t8, $t5
-; LA32-NEXT: sltu $s2, $s0, $fp
-; LA32-NEXT: sltu $t5, $t8, $t5
-; LA32-NEXT: sub.w $t3, $t4, $t3
-; LA32-NEXT: sub.w $t3, $t3, $t5
-; LA32-NEXT: sub.w $t3, $t3, $s2
-; LA32-NEXT: sltu $t4, $t2, $t1
-; LA32-NEXT: xor $t5, $t2, $t1
-; LA32-NEXT: sltui $t5, $t5, 1
-; LA32-NEXT: masknez $t4, $t4, $t5
-; LA32-NEXT: sltu $t8, $t0, $a7
-; LA32-NEXT: maskeqz $t5, $t8, $t5
-; LA32-NEXT: or $t4, $t5, $t4
-; LA32-NEXT: sub.w $t5, $a6, $a5
-; LA32-NEXT: sltu $s2, $t5, $t4
-; LA32-NEXT: sltu $a5, $a6, $a5
-; LA32-NEXT: sub.w $a3, $a4, $a3
-; LA32-NEXT: sub.w $a3, $a3, $a5
-; LA32-NEXT: sub.w $a3, $a3, $s2
-; LA32-NEXT: sub.w $a4, $s0, $fp
-; LA32-NEXT: sub.w $a5, $t5, $t4
-; LA32-NEXT: sub.w $a6, $t7, $t6
-; LA32-NEXT: sub.w $a6, $a6, $s1
-; LA32-NEXT: sub.w $t1, $t2, $t1
-; LA32-NEXT: sub.w $t1, $t1, $t8
-; LA32-NEXT: sub.w $a1, $a1, $a2
-; LA32-NEXT: sub.w $a2, $t0, $a7
-; LA32-NEXT: st.w $a2, $a0, 16
-; LA32-NEXT: st.w $a1, $a0, 0
-; LA32-NEXT: st.w $t1, $a0, 20
-; LA32-NEXT: st.w $a6, $a0, 4
-; LA32-NEXT: st.w $a5, $a0, 24
-; LA32-NEXT: st.w $a4, $a0, 8
-; LA32-NEXT: st.w $a3, $a0, 28
-; LA32-NEXT: st.w $t3, $a0, 12
-; LA32-NEXT: ld.w $s2, $sp, 0 # 4-byte Folded Reload
-; LA32-NEXT: ld.w $s1, $sp, 4 # 4-byte Folded Reload
-; LA32-NEXT: ld.w $s0, $sp, 8 # 4-byte Folded Reload
-; LA32-NEXT: ld.w $fp, $sp, 12 # 4-byte Folded Reload
-; LA32-NEXT: addi.w $sp, $sp, 16
-; LA32-NEXT: ret
-;
-; LA64-LABEL: sub_v2i128:
-; LA64: # %bb.0: # %entry
-; LA64-NEXT: ld.d $a3, $a2, 24
-; LA64-NEXT: ld.d $a4, $a1, 24
-; LA64-NEXT: ld.d $a5, $a2, 8
-; LA64-NEXT: ld.d $a6, $a2, 0
-; LA64-NEXT: ld.d $a7, $a1, 0
-; LA64-NEXT: ld.d $t0, $a1, 8
-; LA64-NEXT: ld.d $a2, $a2, 16
-; LA64-NEXT: ld.d $a1, $a1, 16
-; LA64-NEXT: sltu $t1, $a7, $a6
-; LA64-NEXT: sub.d $a5, $t0, $a5
-; LA64-NEXT: sub.d $a5, $a5, $t1
-; LA64-NEXT: sltu $t0, $a1, $a2
-; LA64-NEXT: sub.d $a3, $a4, $a3
-; LA64-NEXT: sub.d $a3, $a3, $t0
-; LA64-NEXT: sub.d $a4, $a7, $a6
-; LA64-NEXT: sub.d $a1, $a1, $a2
-; LA64-NEXT: st.d $a1, $a0, 16
-; LA64-NEXT: st.d $a4, $a0, 0
-; LA64-NEXT: st.d $a3, $a0, 24
-; LA64-NEXT: st.d $a5, $a0, 8
-; LA64-NEXT: ret
+; CHECK-LABEL: sub_v2i128:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: xvsub.q $xr0, $xr0, $xr1
+; CHECK-NEXT: ret
entry:
%0 = sub <2 x i128> %a, %b
ret <2 x i128> %0
diff --git a/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/add.ll b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/add.ll
index 4fe1d1ffa975..ab9b6bc757f1 100644
--- a/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/add.ll
+++ b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/add.ll
@@ -1,6 +1,6 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
-; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lsx < %s | FileCheck %s --check-prefixes=CHECK,LA32
-; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s --check-prefixes=CHECK,LA64
+; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lsx < %s | FileCheck %s
+; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s
define void @add_v16i8(ptr %res, ptr %a0, ptr %a1) nounwind {
; CHECK-LABEL: add_v16i8:
@@ -67,47 +67,10 @@ entry:
}
define <1 x i128> @add_v1i128(<1 x i128> %a, <1 x i128> %b) nounwind {
-; LA32-LABEL: add_v1i128:
-; LA32: # %bb.0: # %entry
-; LA32-NEXT: ld.w $a3, $a2, 12
-; LA32-NEXT: ld.w $a4, $a1, 12
-; LA32-NEXT: ld.w $a5, $a2, 0
-; LA32-NEXT: ld.w $a6, $a2, 4
-; LA32-NEXT: ld.w $a7, $a1, 4
-; LA32-NEXT: ld.w $t0, $a1, 0
-; LA32-NEXT: ld.w $a2, $a2, 8
-; LA32-NEXT: ld.w $a1, $a1, 8
-; LA32-NEXT: add.w $a6, $a7, $a6
-; LA32-NEXT: add.w $a5, $t0, $a5
-; LA32-NEXT: sltu $t0, $a5, $t0
-; LA32-NEXT: add.w $a6, $a6, $t0
-; LA32-NEXT: sltu $t1, $a6, $a7
-; LA32-NEXT: xor $a7, $a6, $a7
-; LA32-NEXT: sltui $a7, $a7, 1
-; LA32-NEXT: masknez $t1, $t1, $a7
-; LA32-NEXT: maskeqz $a7, $t0, $a7
-; LA32-NEXT: or $a7, $a7, $t1
-; LA32-NEXT: add.w $a2, $a1, $a2
-; LA32-NEXT: add.w $a7, $a2, $a7
-; LA32-NEXT: sltu $t0, $a7, $a2
-; LA32-NEXT: add.w $a3, $a4, $a3
-; LA32-NEXT: sltu $a1, $a2, $a1
-; LA32-NEXT: add.w $a1, $a3, $a1
-; LA32-NEXT: add.w $a1, $a1, $t0
-; LA32-NEXT: st.w $a5, $a0, 0
-; LA32-NEXT: st.w $a6, $a0, 4
-; LA32-NEXT: st.w $a7, $a0, 8
-; LA32-NEXT: st.w $a1, $a0, 12
-; LA32-NEXT: ret
-;
-; LA64-LABEL: add_v1i128:
-; LA64: # %bb.0: # %entry
-; LA64-NEXT: add.d $a1, $a1, $a3
-; LA64-NEXT: add.d $a2, $a0, $a2
-; LA64-NEXT: sltu $a0, $a2, $a0
-; LA64-NEXT: add.d $a1, $a1, $a0
-; LA64-NEXT: move $a0, $a2
-; LA64-NEXT: ret
+; CHECK-LABEL: add_v1i128:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vadd.q $vr0, $vr0, $vr1
+; CHECK-NEXT: ret
entry:
%0 = add <1 x i128> %a, %b
ret <1 x i128> %0
diff --git a/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/sub.ll b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/sub.ll
index a66f7977c438..f44960c0522f 100644
--- a/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/sub.ll
+++ b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/sub.ll
@@ -1,6 +1,6 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
-; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lsx < %s | FileCheck %s --check-prefixes=CHECK,LA32
-; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s --check-prefixes=CHECK,LA64
+; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lsx < %s | FileCheck %s
+; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s
define void @sub_v16i8(ptr %res, ptr %a0, ptr %a1) nounwind {
; CHECK-LABEL: sub_v16i8:
@@ -67,46 +67,10 @@ entry:
}
define <1 x i128> @sub_v1i128(<1 x i128> %a, <1 x i128> %b) nounwind {
-; LA32-LABEL: sub_v1i128:
-; LA32: # %bb.0: # %entry
-; LA32-NEXT: ld.w $a3, $a2, 12
-; LA32-NEXT: ld.w $a4, $a1, 12
-; LA32-NEXT: ld.w $a5, $a2, 8
-; LA32-NEXT: ld.w $a6, $a2, 4
-; LA32-NEXT: ld.w $a7, $a1, 4
-; LA32-NEXT: ld.w $t0, $a1, 8
-; LA32-NEXT: ld.w $a2, $a2, 0
-; LA32-NEXT: ld.w $a1, $a1, 0
-; LA32-NEXT: sltu $t1, $a7, $a6
-; LA32-NEXT: xor $t2, $a7, $a6
-; LA32-NEXT: sltui $t2, $t2, 1
-; LA32-NEXT: masknez $t1, $t1, $t2
-; LA32-NEXT: sltu $t3, $a1, $a2
-; LA32-NEXT: maskeqz $t2, $t3, $t2
-; LA32-NEXT: or $t1, $t2, $t1
-; LA32-NEXT: sub.w $t2, $t0, $a5
-; LA32-NEXT: sltu $t4, $t2, $t1
-; LA32-NEXT: sltu $a5, $t0, $a5
-; LA32-NEXT: sub.w $a3, $a4, $a3
-; LA32-NEXT: sub.w $a3, $a3, $a5
-; LA32-NEXT: sub.w $a3, $a3, $t4
-; LA32-NEXT: sub.w $a4, $t2, $t1
-; LA32-NEXT: sub.w $a5, $a7, $a6
-; LA32-NEXT: sub.w $a5, $a5, $t3
-; LA32-NEXT: sub.w $a1, $a1, $a2
-; LA32-NEXT: st.w $a1, $a0, 0
-; LA32-NEXT: st.w $a5, $a0, 4
-; LA32-NEXT: st.w $a4, $a0, 8
-; LA32-NEXT: st.w $a3, $a0, 12
-; LA32-NEXT: ret
-;
-; LA64-LABEL: sub_v1i128:
-; LA64: # %bb.0: # %entry
-; LA64-NEXT: sltu $a4, $a0, $a2
-; LA64-NEXT: sub.d $a1, $a1, $a3
-; LA64-NEXT: sub.d $a1, $a1, $a4
-; LA64-NEXT: sub.d $a0, $a0, $a2
-; LA64-NEXT: ret
+; CHECK-LABEL: sub_v1i128:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsub.q $vr0, $vr0, $vr1
+; CHECK-NEXT: ret
entry:
%0 = sub <1 x i128> %a, %b
ret <1 x i128> %0