aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorWANG Rui <wangrui@loongson.cn>2025-09-26 09:16:51 +0800
committerWANG Rui <wangrui@loongson.cn>2025-09-26 09:16:51 +0800
commitee29dd4f25dda7fcdff963053775279dd61d75ec (patch)
tree60ad5970d8ebf3004be1bf00dc2d4560387df928
parentaec52219a8b7c60e8d2dff2440b5c4c44596b377 (diff)
downloadllvm-users/hev/opt-vec-ext.zip
llvm-users/hev/opt-vec-ext.tar.gz
llvm-users/hev/opt-vec-ext.tar.bz2
[LoongArch] Introduce instruction patterns for vector sign/zero extensionsusers/hev/opt-vec-ext
This patch introduces legalization and instruction patterns for vector sign and zero extension operations.
-rw-r--r--llvm/lib/Target/LoongArch/LoongArch.td1
-rw-r--r--llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp13
-rw-r--r--llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td60
-rw-r--r--llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td26
-rw-r--r--llvm/test/CodeGen/LoongArch/lasx/vec-sext.ll899
-rw-r--r--llvm/test/CodeGen/LoongArch/lasx/vec-zext.ll1047
-rw-r--r--llvm/test/CodeGen/LoongArch/lsx/vec-sext.ll486
-rw-r--r--llvm/test/CodeGen/LoongArch/lsx/vec-shuffle-sign-ext.ll36
-rw-r--r--llvm/test/CodeGen/LoongArch/lsx/vec-zext.ll399
-rw-r--r--llvm/test/CodeGen/LoongArch/lsx/vmskcond.ll57
10 files changed, 785 insertions, 2239 deletions
diff --git a/llvm/lib/Target/LoongArch/LoongArch.td b/llvm/lib/Target/LoongArch/LoongArch.td
index 6497ff9..62e837a 100644
--- a/llvm/lib/Target/LoongArch/LoongArch.td
+++ b/llvm/lib/Target/LoongArch/LoongArch.td
@@ -59,6 +59,7 @@ def FeatureExtLSX
: SubtargetFeature<"lsx", "HasExtLSX", "true",
"'LSX' (Loongson SIMD Extension)", [FeatureBasicD]>;
def HasExtLSX : Predicate<"Subtarget->hasExtLSX()">;
+def IsExtLSX : Predicate<"Subtarget->hasExtLSX() && !Subtarget->hasExtLASX()">;
// Loongson Advanced SIMD eXtension (LASX)
def FeatureExtLASX
diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
index 801e557..104b315 100644
--- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
+++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
@@ -385,6 +385,10 @@ LoongArchTargetLowering::LoongArchTargetLowering(const TargetMachine &TM,
setOperationAction(ISD::VECREDUCE_UMAX, VT, Custom);
setOperationAction(ISD::VECREDUCE_UMIN, VT, Custom);
}
+ for (MVT VT : {MVT::v2i64, MVT::v4i32, MVT::v8i16}) {
+ setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, VT, Legal);
+ setOperationAction(ISD::ZERO_EXTEND_VECTOR_INREG, VT, Legal);
+ }
}
// Set operations for 'LASX' feature.
@@ -446,6 +450,15 @@ LoongArchTargetLowering::LoongArchTargetLowering(const TargetMachine &TM,
VT, Expand);
setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Legal);
}
+ for (MVT VT : {MVT::v4i64, MVT::v8i32, MVT::v16i16}) {
+ setOperationAction(ISD::SIGN_EXTEND, VT, Legal);
+ setOperationAction(ISD::ZERO_EXTEND, VT, Legal);
+ }
+ for (MVT VT :
+ {MVT::v2i64, MVT::v4i32, MVT::v4i64, MVT::v8i16, MVT::v8i32}) {
+ setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, VT, Legal);
+ setOperationAction(ISD::ZERO_EXTEND_VECTOR_INREG, VT, Legal);
+ }
}
// Set DAG combine for LA32 and LA64.
diff --git a/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td
index adfe990..b338946 100644
--- a/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td
+++ b/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td
@@ -2063,6 +2063,66 @@ defm : subvector_subreg_lowering<LSX128, v2f64, LASX256, v4f64, 2, sub_128>;
defm : subvector_subreg_lowering<LSX128, v8i16, LASX256, v16i16, 8, sub_128>;
defm : subvector_subreg_lowering<LSX128, v16i8, LASX256, v32i8, 16, sub_128>;
+// Sign extensions
+def : Pat<(v4i64 (sext v4i32:$vj)),
+ (v4i64 (VEXT2XV_D_W (SUBREG_TO_REG (i64 0), v4i32:$vj, sub_128)))>;
+def : Pat<(v8i32 (sext v8i16:$vj)),
+ (v8i32 (VEXT2XV_W_H (SUBREG_TO_REG (i64 0), v8i16:$vj, sub_128)))>;
+def : Pat<(v16i16 (sext v16i8:$vj)),
+ (v16i16 (VEXT2XV_H_B (SUBREG_TO_REG (i64 0), v16i8:$vj, sub_128)))>;
+
+def : Pat<(v2i64 (sext_invec v16i8:$vj)),
+ (v2i64 (EXTRACT_SUBREG (VEXT2XV_D_B (SUBREG_TO_REG (i64 0), v16i8:$vj, sub_128)),
+ sub_128))>;
+def : Pat<(v2i64 (sext_invec v8i16:$vj)),
+ (v2i64 (EXTRACT_SUBREG (VEXT2XV_D_H (SUBREG_TO_REG (i64 0), v8i16:$vj, sub_128)),
+ sub_128))>;
+def : Pat<(v2i64 (sext_invec v4i32:$vj)),
+ (v2i64 (EXTRACT_SUBREG (VEXT2XV_D_W (SUBREG_TO_REG (i64 0), v4i32:$vj, sub_128)),
+ sub_128))>;
+def : Pat<(v4i32 (sext_invec v16i8:$vj)),
+ (v4i32 (EXTRACT_SUBREG (VEXT2XV_W_B (SUBREG_TO_REG (i64 0), v16i8:$vj, sub_128)),
+ sub_128))>;
+def : Pat<(v4i32 (sext_invec v8i16:$vj)),
+ (v4i32 (EXTRACT_SUBREG (VEXT2XV_W_H (SUBREG_TO_REG (i64 0), v8i16:$vj, sub_128)),
+ sub_128))>;
+def : Pat<(v4i64 (sext_invec v32i8:$xj)), (v4i64 (VEXT2XV_D_B v32i8:$xj))>;
+def : Pat<(v4i64 (sext_invec v16i16:$xj)), (v4i64 (VEXT2XV_D_H v16i16:$xj))>;
+def : Pat<(v8i16 (sext_invec v16i8:$vj)),
+ (v8i16 (EXTRACT_SUBREG (VEXT2XV_H_B (SUBREG_TO_REG (i64 0), v16i8:$vj, sub_128)),
+ sub_128))>;
+def : Pat<(v8i32 (sext_invec v32i8:$xj)), (v8i32 (VEXT2XV_W_B v32i8:$xj))>;
+
+// Zero extensions
+def : Pat<(v4i64 (zext v4i32:$vj)),
+ (v4i64 (VEXT2XV_DU_WU (SUBREG_TO_REG (i64 0), v4i32:$vj, sub_128)))>;
+def : Pat<(v8i32 (zext v8i16:$vj)),
+ (v8i32 (VEXT2XV_WU_HU (SUBREG_TO_REG (i64 0), v8i16:$vj, sub_128)))>;
+def : Pat<(v16i16 (zext v16i8:$vj)),
+ (v16i16 (VEXT2XV_HU_BU (SUBREG_TO_REG (i64 0), v16i8:$vj, sub_128)))>;
+
+def : Pat<(v2i64 (zext_invec v16i8:$vj)),
+ (v2i64 (EXTRACT_SUBREG (VEXT2XV_DU_BU (SUBREG_TO_REG (i64 0), v16i8:$vj, sub_128)),
+ sub_128))>;
+def : Pat<(v2i64 (zext_invec v8i16:$vj)),
+ (v2i64 (EXTRACT_SUBREG (VEXT2XV_DU_HU (SUBREG_TO_REG (i64 0), v8i16:$vj, sub_128)),
+ sub_128))>;
+def : Pat<(v2i64 (zext_invec v4i32:$vj)),
+ (v2i64 (EXTRACT_SUBREG (VEXT2XV_DU_WU (SUBREG_TO_REG (i64 0), v4i32:$vj, sub_128)),
+ sub_128))>;
+def : Pat<(v4i32 (zext_invec v16i8:$vj)),
+ (v4i32 (EXTRACT_SUBREG (VEXT2XV_WU_BU (SUBREG_TO_REG (i64 0), v16i8:$vj, sub_128)),
+ sub_128))>;
+def : Pat<(v4i32 (zext_invec v8i16:$vj)),
+ (v4i32 (EXTRACT_SUBREG (VEXT2XV_WU_HU (SUBREG_TO_REG (i64 0), v8i16:$vj, sub_128)),
+ sub_128))>;
+def : Pat<(v4i64 (zext_invec v32i8:$xj)), (v4i64 (VEXT2XV_DU_BU v32i8:$xj))>;
+def : Pat<(v4i64 (zext_invec v16i16:$xj)), (v4i64 (VEXT2XV_DU_HU v16i16:$xj))>;
+def : Pat<(v8i16 (zext_invec v16i8:$vj)),
+ (v8i16 (EXTRACT_SUBREG (VEXT2XV_HU_BU (SUBREG_TO_REG (i64 0), v16i8:$vj, sub_128)),
+ sub_128))>;
+def : Pat<(v8i32 (zext_invec v32i8:$xj)), (v8i32 (VEXT2XV_WU_BU v32i8:$xj))>;
+
} // Predicates = [HasExtLASX]
/// Intrinsic pattern
diff --git a/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td
index b0eb51a..eb1fe93 100644
--- a/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td
+++ b/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td
@@ -2174,6 +2174,32 @@ def : Pat<(loongarch_vmsknez (v16i8 LSX128:$vj)), (PseudoVMSKNEZ_B LSX128:$vj)>;
} // Predicates = [HasExtLSX]
+let Predicates = [IsExtLSX] in {
+
+// Sign extensions
+def : Pat<(v2i64 (sext_invec v16i8:$vj)),
+ (v2i64 (VSLLWIL_D_W (VSLLWIL_W_H (VSLLWIL_H_B v16i8:$vj, 0), 0), 0))>;
+def : Pat<(v2i64 (sext_invec v8i16:$vj)),
+ (v2i64 (VSLLWIL_D_W (VSLLWIL_W_H v8i16:$vj, 0), 0))>;
+def : Pat<(v2i64 (sext_invec v4i32:$vj)), (v2i64 (VSLLWIL_D_W v4i32:$vj, 0))>;
+def : Pat<(v4i32 (sext_invec v16i8:$vj)),
+ (v4i32 (VSLLWIL_W_H (VSLLWIL_H_B v16i8:$vj, 0), 0))>;
+def : Pat<(v4i32 (sext_invec v8i16:$vj)), (v4i32 (VSLLWIL_W_H v8i16:$vj, 0))>;
+def : Pat<(v8i16 (sext_invec v16i8:$vj)), (v8i16 (VSLLWIL_H_B v16i8:$vj, 0))>;
+
+// Zero extensions
+def : Pat<(v2i64 (zext_invec v16i8:$vj)),
+ (v2i64 (VSLLWIL_DU_WU (VSLLWIL_WU_HU (VSLLWIL_HU_BU v16i8:$vj, 0), 0), 0))>;
+def : Pat<(v2i64 (zext_invec v8i16:$vj)),
+ (v2i64 (VSLLWIL_DU_WU (VSLLWIL_WU_HU v8i16:$vj, 0), 0))>;
+def : Pat<(v2i64 (zext_invec v4i32:$vj)), (v2i64 (VSLLWIL_DU_WU v4i32:$vj, 0))>;
+def : Pat<(v4i32 (zext_invec v16i8:$vj)),
+ (v4i32 (VSLLWIL_WU_HU (VSLLWIL_HU_BU v16i8:$vj, 0), 0))>;
+def : Pat<(v4i32 (zext_invec v8i16:$vj)), (v4i32 (VSLLWIL_WU_HU v8i16:$vj, 0))>;
+def : Pat<(v8i16 (zext_invec v16i8:$vj)), (v8i16 (VSLLWIL_HU_BU v16i8:$vj, 0))>;
+
+} // Predicates = [IsExtLSX]
+
/// Intrinsic pattern
class deriveLSXIntrinsic<string Inst> {
diff --git a/llvm/test/CodeGen/LoongArch/lasx/vec-sext.ll b/llvm/test/CodeGen/LoongArch/lasx/vec-sext.ll
index 953e6c4..8884aac 100644
--- a/llvm/test/CodeGen/LoongArch/lasx/vec-sext.ll
+++ b/llvm/test/CodeGen/LoongArch/lasx/vec-sext.ll
@@ -7,11 +7,7 @@ define void @load_sext_2i8_to_2i64(ptr %ptr, ptr %dst) {
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: ld.h $a0, $a0, 0
; CHECK-NEXT: vinsgr2vr.h $vr0, $a0, 0
-; CHECK-NEXT: vilvl.b $vr0, $vr0, $vr0
-; CHECK-NEXT: vilvl.h $vr0, $vr0, $vr0
-; CHECK-NEXT: vilvl.w $vr0, $vr0, $vr0
-; CHECK-NEXT: vslli.d $vr0, $vr0, 56
-; CHECK-NEXT: vsrai.d $vr0, $vr0, 56
+; CHECK-NEXT: vext2xv.d.b $xr0, $xr0
; CHECK-NEXT: vst $vr0, $a1, 0
; CHECK-NEXT: ret
entry:
@@ -26,10 +22,7 @@ define void @load_sext_2i16_to_2i64(ptr %ptr, ptr %dst) {
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: ld.w $a0, $a0, 0
; CHECK-NEXT: vinsgr2vr.w $vr0, $a0, 0
-; CHECK-NEXT: vilvl.h $vr0, $vr0, $vr0
-; CHECK-NEXT: vilvl.w $vr0, $vr0, $vr0
-; CHECK-NEXT: vslli.d $vr0, $vr0, 48
-; CHECK-NEXT: vsrai.d $vr0, $vr0, 48
+; CHECK-NEXT: vext2xv.d.h $xr0, $xr0
; CHECK-NEXT: vst $vr0, $a1, 0
; CHECK-NEXT: ret
entry:
@@ -45,9 +38,8 @@ define void @load_sext_2i32_to_2i64(ptr %ptr, ptr %dst) {
; LA32-NEXT: ld.w $a2, $a0, 0
; LA32-NEXT: ld.w $a0, $a0, 4
; LA32-NEXT: vinsgr2vr.w $vr0, $a2, 0
-; LA32-NEXT: vinsgr2vr.w $vr0, $a0, 2
-; LA32-NEXT: vslli.d $vr0, $vr0, 32
-; LA32-NEXT: vsrai.d $vr0, $vr0, 32
+; LA32-NEXT: vinsgr2vr.w $vr0, $a0, 1
+; LA32-NEXT: vext2xv.d.w $xr0, $xr0
; LA32-NEXT: vst $vr0, $a1, 0
; LA32-NEXT: ret
;
@@ -55,9 +47,7 @@ define void @load_sext_2i32_to_2i64(ptr %ptr, ptr %dst) {
; LA64: # %bb.0: # %entry
; LA64-NEXT: ld.d $a0, $a0, 0
; LA64-NEXT: vinsgr2vr.d $vr0, $a0, 0
-; LA64-NEXT: vshuf4i.w $vr0, $vr0, 16
-; LA64-NEXT: vslli.d $vr0, $vr0, 32
-; LA64-NEXT: vsrai.d $vr0, $vr0, 32
+; LA64-NEXT: vext2xv.d.w $xr0, $xr0
; LA64-NEXT: vst $vr0, $a1, 0
; LA64-NEXT: ret
entry:
@@ -72,10 +62,7 @@ define void @load_sext_4i8_to_4i32(ptr %ptr, ptr %dst) {
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: ld.w $a0, $a0, 0
; CHECK-NEXT: vinsgr2vr.w $vr0, $a0, 0
-; CHECK-NEXT: vilvl.b $vr0, $vr0, $vr0
-; CHECK-NEXT: vilvl.h $vr0, $vr0, $vr0
-; CHECK-NEXT: vslli.w $vr0, $vr0, 24
-; CHECK-NEXT: vsrai.w $vr0, $vr0, 24
+; CHECK-NEXT: vext2xv.w.b $xr0, $xr0
; CHECK-NEXT: vst $vr0, $a1, 0
; CHECK-NEXT: ret
entry:
@@ -89,13 +76,8 @@ define void @load_sext_4i8_to_4i64(ptr %ptr, ptr %dst) {
; CHECK-LABEL: load_sext_4i8_to_4i64:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: ld.w $a0, $a0, 0
-; CHECK-NEXT: pcalau12i $a2, %pc_hi20(.LCPI4_0)
-; CHECK-NEXT: xvld $xr0, $a2, %pc_lo12(.LCPI4_0)
-; CHECK-NEXT: vinsgr2vr.w $vr1, $a0, 0
-; CHECK-NEXT: xvpermi.d $xr1, $xr1, 68
-; CHECK-NEXT: xvshuf.b $xr0, $xr0, $xr1, $xr0
-; CHECK-NEXT: xvslli.d $xr0, $xr0, 56
-; CHECK-NEXT: xvsrai.d $xr0, $xr0, 56
+; CHECK-NEXT: vinsgr2vr.w $vr0, $a0, 0
+; CHECK-NEXT: vext2xv.d.b $xr0, $xr0
; CHECK-NEXT: xvst $xr0, $a1, 0
; CHECK-NEXT: ret
entry:
@@ -112,9 +94,7 @@ define void @load_sext_4i16_to_4i32(ptr %ptr, ptr %dst) {
; LA32-NEXT: ld.w $a0, $a0, 4
; LA32-NEXT: vinsgr2vr.w $vr0, $a2, 0
; LA32-NEXT: vinsgr2vr.w $vr0, $a0, 1
-; LA32-NEXT: vilvl.h $vr0, $vr0, $vr0
-; LA32-NEXT: vslli.w $vr0, $vr0, 16
-; LA32-NEXT: vsrai.w $vr0, $vr0, 16
+; LA32-NEXT: vext2xv.w.h $xr0, $xr0
; LA32-NEXT: vst $vr0, $a1, 0
; LA32-NEXT: ret
;
@@ -122,9 +102,7 @@ define void @load_sext_4i16_to_4i32(ptr %ptr, ptr %dst) {
; LA64: # %bb.0: # %entry
; LA64-NEXT: ld.d $a0, $a0, 0
; LA64-NEXT: vinsgr2vr.d $vr0, $a0, 0
-; LA64-NEXT: vilvl.h $vr0, $vr0, $vr0
-; LA64-NEXT: vslli.w $vr0, $vr0, 16
-; LA64-NEXT: vsrai.w $vr0, $vr0, 16
+; LA64-NEXT: vext2xv.w.h $xr0, $xr0
; LA64-NEXT: vst $vr0, $a1, 0
; LA64-NEXT: ret
entry:
@@ -139,27 +117,17 @@ define void @load_sext_4i16_to_4i64(ptr %ptr, ptr %dst) {
; LA32: # %bb.0: # %entry
; LA32-NEXT: ld.w $a2, $a0, 0
; LA32-NEXT: ld.w $a0, $a0, 4
-; LA32-NEXT: pcalau12i $a3, %pc_hi20(.LCPI6_0)
-; LA32-NEXT: xvld $xr0, $a3, %pc_lo12(.LCPI6_0)
-; LA32-NEXT: vinsgr2vr.w $vr1, $a2, 0
-; LA32-NEXT: vinsgr2vr.w $vr1, $a0, 1
-; LA32-NEXT: xvpermi.d $xr1, $xr1, 68
-; LA32-NEXT: xvshuf.h $xr0, $xr0, $xr1
-; LA32-NEXT: xvslli.d $xr0, $xr0, 48
-; LA32-NEXT: xvsrai.d $xr0, $xr0, 48
+; LA32-NEXT: vinsgr2vr.w $vr0, $a2, 0
+; LA32-NEXT: vinsgr2vr.w $vr0, $a0, 1
+; LA32-NEXT: vext2xv.d.h $xr0, $xr0
; LA32-NEXT: xvst $xr0, $a1, 0
; LA32-NEXT: ret
;
; LA64-LABEL: load_sext_4i16_to_4i64:
; LA64: # %bb.0: # %entry
; LA64-NEXT: ld.d $a0, $a0, 0
-; LA64-NEXT: pcalau12i $a2, %pc_hi20(.LCPI6_0)
-; LA64-NEXT: xvld $xr0, $a2, %pc_lo12(.LCPI6_0)
-; LA64-NEXT: vinsgr2vr.d $vr1, $a0, 0
-; LA64-NEXT: xvpermi.d $xr1, $xr1, 68
-; LA64-NEXT: xvshuf.h $xr0, $xr0, $xr1
-; LA64-NEXT: xvslli.d $xr0, $xr0, 48
-; LA64-NEXT: xvsrai.d $xr0, $xr0, 48
+; LA64-NEXT: vinsgr2vr.d $vr0, $a0, 0
+; LA64-NEXT: vext2xv.d.h $xr0, $xr0
; LA64-NEXT: xvst $xr0, $a1, 0
; LA64-NEXT: ret
entry:
@@ -170,43 +138,12 @@ entry:
}
define void @load_sext_4i32_to_4i64(ptr %ptr, ptr %dst) {
-; LA32-LABEL: load_sext_4i32_to_4i64:
-; LA32: # %bb.0: # %entry
-; LA32-NEXT: vld $vr0, $a0, 0
-; LA32-NEXT: vextrins.w $vr1, $vr0, 2
-; LA32-NEXT: vpickve2gr.w $a0, $vr0, 2
-; LA32-NEXT: srai.w $a0, $a0, 31
-; LA32-NEXT: vinsgr2vr.w $vr1, $a0, 1
-; LA32-NEXT: vextrins.w $vr1, $vr0, 35
-; LA32-NEXT: vpickve2gr.w $a0, $vr0, 3
-; LA32-NEXT: srai.w $a0, $a0, 31
-; LA32-NEXT: vinsgr2vr.w $vr1, $a0, 3
-; LA32-NEXT: vpickve2gr.w $a0, $vr0, 0
-; LA32-NEXT: srai.w $a0, $a0, 31
-; LA32-NEXT: vori.b $vr2, $vr0, 0
-; LA32-NEXT: vinsgr2vr.w $vr2, $a0, 1
-; LA32-NEXT: vextrins.w $vr2, $vr0, 33
-; LA32-NEXT: vpickve2gr.w $a0, $vr0, 1
-; LA32-NEXT: srai.w $a0, $a0, 31
-; LA32-NEXT: vinsgr2vr.w $vr2, $a0, 3
-; LA32-NEXT: xvpermi.q $xr2, $xr1, 2
-; LA32-NEXT: xvst $xr2, $a1, 0
-; LA32-NEXT: ret
-;
-; LA64-LABEL: load_sext_4i32_to_4i64:
-; LA64: # %bb.0: # %entry
-; LA64-NEXT: vld $vr0, $a0, 0
-; LA64-NEXT: vpickve2gr.w $a0, $vr0, 2
-; LA64-NEXT: vinsgr2vr.d $vr1, $a0, 0
-; LA64-NEXT: vpickve2gr.w $a0, $vr0, 3
-; LA64-NEXT: vinsgr2vr.d $vr1, $a0, 1
-; LA64-NEXT: vpickve2gr.w $a0, $vr0, 0
-; LA64-NEXT: vinsgr2vr.d $vr2, $a0, 0
-; LA64-NEXT: vpickve2gr.w $a0, $vr0, 1
-; LA64-NEXT: vinsgr2vr.d $vr2, $a0, 1
-; LA64-NEXT: xvpermi.q $xr2, $xr1, 2
-; LA64-NEXT: xvst $xr2, $a1, 0
-; LA64-NEXT: ret
+; CHECK-LABEL: load_sext_4i32_to_4i64:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vld $vr0, $a0, 0
+; CHECK-NEXT: vext2xv.d.w $xr0, $xr0
+; CHECK-NEXT: xvst $xr0, $a1, 0
+; CHECK-NEXT: ret
entry:
%A = load <4 x i32>, ptr %ptr
%B = sext <4 x i32> %A to <4 x i64>
@@ -221,9 +158,7 @@ define void @load_sext_8i8_to_8i16(ptr %ptr, ptr %dst) {
; LA32-NEXT: ld.w $a0, $a0, 4
; LA32-NEXT: vinsgr2vr.w $vr0, $a2, 0
; LA32-NEXT: vinsgr2vr.w $vr0, $a0, 1
-; LA32-NEXT: vilvl.b $vr0, $vr0, $vr0
-; LA32-NEXT: vslli.h $vr0, $vr0, 8
-; LA32-NEXT: vsrai.h $vr0, $vr0, 8
+; LA32-NEXT: vext2xv.h.b $xr0, $xr0
; LA32-NEXT: vst $vr0, $a1, 0
; LA32-NEXT: ret
;
@@ -231,9 +166,7 @@ define void @load_sext_8i8_to_8i16(ptr %ptr, ptr %dst) {
; LA64: # %bb.0: # %entry
; LA64-NEXT: ld.d $a0, $a0, 0
; LA64-NEXT: vinsgr2vr.d $vr0, $a0, 0
-; LA64-NEXT: vilvl.b $vr0, $vr0, $vr0
-; LA64-NEXT: vslli.h $vr0, $vr0, 8
-; LA64-NEXT: vsrai.h $vr0, $vr0, 8
+; LA64-NEXT: vext2xv.h.b $xr0, $xr0
; LA64-NEXT: vst $vr0, $a1, 0
; LA64-NEXT: ret
entry:
@@ -248,27 +181,17 @@ define void @load_sext_8i8_to_8i32(ptr %ptr, ptr %dst) {
; LA32: # %bb.0: # %entry
; LA32-NEXT: ld.w $a2, $a0, 0
; LA32-NEXT: ld.w $a0, $a0, 4
-; LA32-NEXT: pcalau12i $a3, %pc_hi20(.LCPI9_0)
-; LA32-NEXT: xvld $xr0, $a3, %pc_lo12(.LCPI9_0)
-; LA32-NEXT: vinsgr2vr.w $vr1, $a2, 0
-; LA32-NEXT: vinsgr2vr.w $vr1, $a0, 1
-; LA32-NEXT: xvpermi.d $xr1, $xr1, 68
-; LA32-NEXT: xvshuf.b $xr0, $xr0, $xr1, $xr0
-; LA32-NEXT: xvslli.w $xr0, $xr0, 24
-; LA32-NEXT: xvsrai.w $xr0, $xr0, 24
+; LA32-NEXT: vinsgr2vr.w $vr0, $a2, 0
+; LA32-NEXT: vinsgr2vr.w $vr0, $a0, 1
+; LA32-NEXT: vext2xv.w.b $xr0, $xr0
; LA32-NEXT: xvst $xr0, $a1, 0
; LA32-NEXT: ret
;
; LA64-LABEL: load_sext_8i8_to_8i32:
; LA64: # %bb.0: # %entry
; LA64-NEXT: ld.d $a0, $a0, 0
-; LA64-NEXT: pcalau12i $a2, %pc_hi20(.LCPI9_0)
-; LA64-NEXT: xvld $xr0, $a2, %pc_lo12(.LCPI9_0)
-; LA64-NEXT: vinsgr2vr.d $vr1, $a0, 0
-; LA64-NEXT: xvpermi.d $xr1, $xr1, 68
-; LA64-NEXT: xvshuf.b $xr0, $xr0, $xr1, $xr0
-; LA64-NEXT: xvslli.w $xr0, $xr0, 24
-; LA64-NEXT: xvsrai.w $xr0, $xr0, 24
+; LA64-NEXT: vinsgr2vr.d $vr0, $a0, 0
+; LA64-NEXT: vext2xv.w.b $xr0, $xr0
; LA64-NEXT: xvst $xr0, $a1, 0
; LA64-NEXT: ret
entry:
@@ -282,21 +205,13 @@ define void @load_sext_8i8_to_8i64(ptr %ptr, ptr %dst) {
; LA32-LABEL: load_sext_8i8_to_8i64:
; LA32: # %bb.0: # %entry
; LA32-NEXT: ld.w $a2, $a0, 0
-; LA32-NEXT: vinsgr2vr.w $vr0, $a2, 0
; LA32-NEXT: ld.w $a0, $a0, 4
-; LA32-NEXT: xvpermi.d $xr1, $xr0, 68
+; LA32-NEXT: vinsgr2vr.w $vr0, $a2, 0
+; LA32-NEXT: vext2xv.d.b $xr1, $xr0
; LA32-NEXT: # kill: def $vr0 killed $vr0 killed $xr0
-; LA32-NEXT: pcalau12i $a2, %pc_hi20(.LCPI10_0)
-; LA32-NEXT: xvld $xr2, $a2, %pc_lo12(.LCPI10_0)
; LA32-NEXT: vinsgr2vr.w $vr0, $a0, 1
; LA32-NEXT: vreplvei.w $vr0, $vr0, 1
-; LA32-NEXT: xvpermi.d $xr0, $xr0, 68
-; LA32-NEXT: xvshuf.b $xr0, $xr0, $xr0, $xr2
-; LA32-NEXT: xvslli.d $xr0, $xr0, 56
-; LA32-NEXT: xvsrai.d $xr0, $xr0, 56
-; LA32-NEXT: xvshuf.b $xr1, $xr0, $xr1, $xr2
-; LA32-NEXT: xvslli.d $xr1, $xr1, 56
-; LA32-NEXT: xvsrai.d $xr1, $xr1, 56
+; LA32-NEXT: vext2xv.d.b $xr0, $xr0
; LA32-NEXT: xvst $xr1, $a1, 0
; LA32-NEXT: xvst $xr0, $a1, 32
; LA32-NEXT: ret
@@ -304,20 +219,12 @@ define void @load_sext_8i8_to_8i64(ptr %ptr, ptr %dst) {
; LA64-LABEL: load_sext_8i8_to_8i64:
; LA64: # %bb.0: # %entry
; LA64-NEXT: ld.d $a0, $a0, 0
-; LA64-NEXT: pcalau12i $a2, %pc_hi20(.LCPI10_0)
-; LA64-NEXT: xvld $xr0, $a2, %pc_lo12(.LCPI10_0)
-; LA64-NEXT: vinsgr2vr.d $vr1, $a0, 0
-; LA64-NEXT: vsrli.d $vr2, $vr1, 32
-; LA64-NEXT: xvpermi.d $xr2, $xr2, 68
-; LA64-NEXT: xvshuf.b $xr2, $xr0, $xr2, $xr0
-; LA64-NEXT: xvslli.d $xr2, $xr2, 56
-; LA64-NEXT: xvsrai.d $xr2, $xr2, 56
-; LA64-NEXT: xvpermi.d $xr1, $xr1, 68
-; LA64-NEXT: xvshuf.b $xr0, $xr0, $xr1, $xr0
-; LA64-NEXT: xvslli.d $xr0, $xr0, 56
-; LA64-NEXT: xvsrai.d $xr0, $xr0, 56
+; LA64-NEXT: vinsgr2vr.d $vr0, $a0, 0
+; LA64-NEXT: vsrli.d $vr1, $vr0, 32
+; LA64-NEXT: vext2xv.d.b $xr1, $xr1
+; LA64-NEXT: vext2xv.d.b $xr0, $xr0
; LA64-NEXT: xvst $xr0, $a1, 0
-; LA64-NEXT: xvst $xr2, $a1, 32
+; LA64-NEXT: xvst $xr1, $a1, 32
; LA64-NEXT: ret
entry:
%A = load <8 x i8>, ptr %ptr
@@ -330,32 +237,8 @@ define void @load_sext_8i16_to_8i32(ptr %ptr, ptr %dst) {
; CHECK-LABEL: load_sext_8i16_to_8i32:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vld $vr0, $a0, 0
-; CHECK-NEXT: vpickve2gr.h $a0, $vr0, 4
-; CHECK-NEXT: ext.w.h $a0, $a0
-; CHECK-NEXT: vinsgr2vr.w $vr1, $a0, 0
-; CHECK-NEXT: vpickve2gr.h $a0, $vr0, 5
-; CHECK-NEXT: ext.w.h $a0, $a0
-; CHECK-NEXT: vinsgr2vr.w $vr1, $a0, 1
-; CHECK-NEXT: vpickve2gr.h $a0, $vr0, 6
-; CHECK-NEXT: ext.w.h $a0, $a0
-; CHECK-NEXT: vinsgr2vr.w $vr1, $a0, 2
-; CHECK-NEXT: vpickve2gr.h $a0, $vr0, 7
-; CHECK-NEXT: ext.w.h $a0, $a0
-; CHECK-NEXT: vinsgr2vr.w $vr1, $a0, 3
-; CHECK-NEXT: vpickve2gr.h $a0, $vr0, 0
-; CHECK-NEXT: ext.w.h $a0, $a0
-; CHECK-NEXT: vinsgr2vr.w $vr2, $a0, 0
-; CHECK-NEXT: vpickve2gr.h $a0, $vr0, 1
-; CHECK-NEXT: ext.w.h $a0, $a0
-; CHECK-NEXT: vinsgr2vr.w $vr2, $a0, 1
-; CHECK-NEXT: vpickve2gr.h $a0, $vr0, 2
-; CHECK-NEXT: ext.w.h $a0, $a0
-; CHECK-NEXT: vinsgr2vr.w $vr2, $a0, 2
-; CHECK-NEXT: vpickve2gr.h $a0, $vr0, 3
-; CHECK-NEXT: ext.w.h $a0, $a0
-; CHECK-NEXT: vinsgr2vr.w $vr2, $a0, 3
-; CHECK-NEXT: xvpermi.q $xr2, $xr1, 2
-; CHECK-NEXT: xvst $xr2, $a1, 0
+; CHECK-NEXT: vext2xv.w.h $xr0, $xr0
+; CHECK-NEXT: xvst $xr0, $a1, 0
; CHECK-NEXT: ret
entry:
%A = load <8 x i16>, ptr %ptr
@@ -365,93 +248,16 @@ entry:
}
define void @load_sext_8i16_to_8i64(ptr %ptr, ptr %dst) {
-; LA32-LABEL: load_sext_8i16_to_8i64:
-; LA32: # %bb.0: # %entry
-; LA32-NEXT: vld $vr0, $a0, 0
-; LA32-NEXT: vpickve2gr.h $a0, $vr0, 2
-; LA32-NEXT: ext.w.h $a0, $a0
-; LA32-NEXT: vinsgr2vr.w $vr1, $a0, 0
-; LA32-NEXT: srai.w $a2, $a0, 31
-; LA32-NEXT: vinsgr2vr.w $vr1, $a2, 1
-; LA32-NEXT: vpickve2gr.h $a2, $vr0, 3
-; LA32-NEXT: ext.w.h $a2, $a2
-; LA32-NEXT: vinsgr2vr.w $vr1, $a2, 2
-; LA32-NEXT: srai.w $a3, $a2, 31
-; LA32-NEXT: vinsgr2vr.w $vr1, $a3, 3
-; LA32-NEXT: vpickve2gr.h $a3, $vr0, 0
-; LA32-NEXT: ext.w.h $a3, $a3
-; LA32-NEXT: vinsgr2vr.w $vr2, $a3, 0
-; LA32-NEXT: vpickve2gr.h $a4, $vr0, 1
-; LA32-NEXT: ext.w.h $a4, $a4
-; LA32-NEXT: vinsgr2vr.w $vr2, $a4, 1
-; LA32-NEXT: srai.w $a3, $a3, 31
-; LA32-NEXT: vinsgr2vr.w $vr2, $a3, 1
-; LA32-NEXT: vinsgr2vr.w $vr2, $a0, 2
-; LA32-NEXT: vinsgr2vr.w $vr2, $a4, 2
-; LA32-NEXT: vinsgr2vr.w $vr2, $a2, 3
-; LA32-NEXT: srai.w $a0, $a4, 31
-; LA32-NEXT: vinsgr2vr.w $vr2, $a0, 3
-; LA32-NEXT: xvpermi.q $xr2, $xr1, 2
-; LA32-NEXT: vpickve2gr.h $a0, $vr0, 6
-; LA32-NEXT: ext.w.h $a0, $a0
-; LA32-NEXT: vinsgr2vr.w $vr1, $a0, 0
-; LA32-NEXT: srai.w $a2, $a0, 31
-; LA32-NEXT: vinsgr2vr.w $vr1, $a2, 1
-; LA32-NEXT: vpickve2gr.h $a2, $vr0, 7
-; LA32-NEXT: ext.w.h $a2, $a2
-; LA32-NEXT: vinsgr2vr.w $vr1, $a2, 2
-; LA32-NEXT: srai.w $a3, $a2, 31
-; LA32-NEXT: vinsgr2vr.w $vr1, $a3, 3
-; LA32-NEXT: vpickve2gr.h $a3, $vr0, 4
-; LA32-NEXT: ext.w.h $a3, $a3
-; LA32-NEXT: vinsgr2vr.w $vr3, $a3, 0
-; LA32-NEXT: vpickve2gr.h $a4, $vr0, 5
-; LA32-NEXT: ext.w.h $a4, $a4
-; LA32-NEXT: vinsgr2vr.w $vr3, $a4, 1
-; LA32-NEXT: srai.w $a3, $a3, 31
-; LA32-NEXT: vinsgr2vr.w $vr3, $a3, 1
-; LA32-NEXT: vinsgr2vr.w $vr3, $a0, 2
-; LA32-NEXT: vinsgr2vr.w $vr3, $a4, 2
-; LA32-NEXT: vinsgr2vr.w $vr3, $a2, 3
-; LA32-NEXT: srai.w $a0, $a4, 31
-; LA32-NEXT: vinsgr2vr.w $vr3, $a0, 3
-; LA32-NEXT: xvpermi.q $xr3, $xr1, 2
-; LA32-NEXT: xvst $xr3, $a1, 32
-; LA32-NEXT: xvst $xr2, $a1, 0
-; LA32-NEXT: ret
-;
-; LA64-LABEL: load_sext_8i16_to_8i64:
-; LA64: # %bb.0: # %entry
-; LA64-NEXT: vld $vr0, $a0, 0
-; LA64-NEXT: vpickve2gr.h $a0, $vr0, 2
-; LA64-NEXT: ext.w.h $a0, $a0
-; LA64-NEXT: vinsgr2vr.d $vr1, $a0, 0
-; LA64-NEXT: vpickve2gr.h $a0, $vr0, 3
-; LA64-NEXT: ext.w.h $a0, $a0
-; LA64-NEXT: vinsgr2vr.d $vr1, $a0, 1
-; LA64-NEXT: vpickve2gr.h $a0, $vr0, 0
-; LA64-NEXT: ext.w.h $a0, $a0
-; LA64-NEXT: vinsgr2vr.d $vr2, $a0, 0
-; LA64-NEXT: vpickve2gr.h $a0, $vr0, 1
-; LA64-NEXT: ext.w.h $a0, $a0
-; LA64-NEXT: vinsgr2vr.d $vr2, $a0, 1
-; LA64-NEXT: xvpermi.q $xr2, $xr1, 2
-; LA64-NEXT: vpickve2gr.h $a0, $vr0, 6
-; LA64-NEXT: ext.w.h $a0, $a0
-; LA64-NEXT: vinsgr2vr.d $vr1, $a0, 0
-; LA64-NEXT: vpickve2gr.h $a0, $vr0, 7
-; LA64-NEXT: ext.w.h $a0, $a0
-; LA64-NEXT: vinsgr2vr.d $vr1, $a0, 1
-; LA64-NEXT: vpickve2gr.h $a0, $vr0, 4
-; LA64-NEXT: ext.w.h $a0, $a0
-; LA64-NEXT: vinsgr2vr.d $vr3, $a0, 0
-; LA64-NEXT: vpickve2gr.h $a0, $vr0, 5
-; LA64-NEXT: ext.w.h $a0, $a0
-; LA64-NEXT: vinsgr2vr.d $vr3, $a0, 1
-; LA64-NEXT: xvpermi.q $xr3, $xr1, 2
-; LA64-NEXT: xvst $xr3, $a1, 32
-; LA64-NEXT: xvst $xr2, $a1, 0
-; LA64-NEXT: ret
+; CHECK-LABEL: load_sext_8i16_to_8i64:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vld $vr0, $a0, 0
+; CHECK-NEXT: vext2xv.w.h $xr0, $xr0
+; CHECK-NEXT: xvpermi.q $xr1, $xr0, 1
+; CHECK-NEXT: vext2xv.d.w $xr1, $xr1
+; CHECK-NEXT: vext2xv.d.w $xr0, $xr0
+; CHECK-NEXT: xvst $xr0, $a1, 0
+; CHECK-NEXT: xvst $xr1, $a1, 32
+; CHECK-NEXT: ret
entry:
%A = load <8 x i16>, ptr %ptr
%B = sext <8 x i16> %A to <8 x i64>
@@ -460,73 +266,15 @@ entry:
}
define void @load_sext_8i32_to_8i64(ptr %ptr, ptr %dst) {
-; LA32-LABEL: load_sext_8i32_to_8i64:
-; LA32: # %bb.0: # %entry
-; LA32-NEXT: xvld $xr0, $a0, 0
-; LA32-NEXT: xvpermi.q $xr1, $xr0, 1
-; LA32-NEXT: vextrins.w $vr2, $vr1, 2
-; LA32-NEXT: vpickve2gr.w $a0, $vr1, 2
-; LA32-NEXT: srai.w $a0, $a0, 31
-; LA32-NEXT: vinsgr2vr.w $vr2, $a0, 1
-; LA32-NEXT: vextrins.w $vr2, $vr1, 35
-; LA32-NEXT: vpickve2gr.w $a0, $vr1, 3
-; LA32-NEXT: srai.w $a0, $a0, 31
-; LA32-NEXT: vinsgr2vr.w $vr2, $a0, 3
-; LA32-NEXT: vpickve2gr.w $a0, $vr1, 0
-; LA32-NEXT: srai.w $a0, $a0, 31
-; LA32-NEXT: vori.b $vr3, $vr1, 0
-; LA32-NEXT: vinsgr2vr.w $vr3, $a0, 1
-; LA32-NEXT: vextrins.w $vr3, $vr1, 33
-; LA32-NEXT: vpickve2gr.w $a0, $vr1, 1
-; LA32-NEXT: srai.w $a0, $a0, 31
-; LA32-NEXT: vinsgr2vr.w $vr3, $a0, 3
-; LA32-NEXT: xvpermi.q $xr3, $xr2, 2
-; LA32-NEXT: vextrins.w $vr1, $vr0, 2
-; LA32-NEXT: vpickve2gr.w $a0, $vr0, 2
-; LA32-NEXT: srai.w $a0, $a0, 31
-; LA32-NEXT: vinsgr2vr.w $vr1, $a0, 1
-; LA32-NEXT: vextrins.w $vr1, $vr0, 35
-; LA32-NEXT: vpickve2gr.w $a0, $vr0, 3
-; LA32-NEXT: srai.w $a0, $a0, 31
-; LA32-NEXT: vinsgr2vr.w $vr1, $a0, 3
-; LA32-NEXT: vpickve2gr.w $a0, $vr0, 0
-; LA32-NEXT: srai.w $a0, $a0, 31
-; LA32-NEXT: vori.b $vr2, $vr0, 0
-; LA32-NEXT: vinsgr2vr.w $vr2, $a0, 1
-; LA32-NEXT: vextrins.w $vr2, $vr0, 33
-; LA32-NEXT: vpickve2gr.w $a0, $vr0, 1
-; LA32-NEXT: srai.w $a0, $a0, 31
-; LA32-NEXT: vinsgr2vr.w $vr2, $a0, 3
-; LA32-NEXT: xvpermi.q $xr2, $xr1, 2
-; LA32-NEXT: xvst $xr2, $a1, 0
-; LA32-NEXT: xvst $xr3, $a1, 32
-; LA32-NEXT: ret
-;
-; LA64-LABEL: load_sext_8i32_to_8i64:
-; LA64: # %bb.0: # %entry
-; LA64-NEXT: xvld $xr0, $a0, 0
-; LA64-NEXT: xvpermi.q $xr1, $xr0, 1
-; LA64-NEXT: vpickve2gr.w $a0, $vr1, 2
-; LA64-NEXT: vinsgr2vr.d $vr2, $a0, 0
-; LA64-NEXT: vpickve2gr.w $a0, $vr1, 3
-; LA64-NEXT: vinsgr2vr.d $vr2, $a0, 1
-; LA64-NEXT: vpickve2gr.w $a0, $vr1, 0
-; LA64-NEXT: vinsgr2vr.d $vr3, $a0, 0
-; LA64-NEXT: vpickve2gr.w $a0, $vr1, 1
-; LA64-NEXT: vinsgr2vr.d $vr3, $a0, 1
-; LA64-NEXT: xvpermi.q $xr3, $xr2, 2
-; LA64-NEXT: vpickve2gr.w $a0, $vr0, 2
-; LA64-NEXT: vinsgr2vr.d $vr1, $a0, 0
-; LA64-NEXT: vpickve2gr.w $a0, $vr0, 3
-; LA64-NEXT: vinsgr2vr.d $vr1, $a0, 1
-; LA64-NEXT: vpickve2gr.w $a0, $vr0, 0
-; LA64-NEXT: vinsgr2vr.d $vr2, $a0, 0
-; LA64-NEXT: vpickve2gr.w $a0, $vr0, 1
-; LA64-NEXT: vinsgr2vr.d $vr2, $a0, 1
-; LA64-NEXT: xvpermi.q $xr2, $xr1, 2
-; LA64-NEXT: xvst $xr2, $a1, 0
-; LA64-NEXT: xvst $xr3, $a1, 32
-; LA64-NEXT: ret
+; CHECK-LABEL: load_sext_8i32_to_8i64:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: xvld $xr0, $a0, 0
+; CHECK-NEXT: xvpermi.q $xr1, $xr0, 1
+; CHECK-NEXT: vext2xv.d.w $xr1, $xr1
+; CHECK-NEXT: vext2xv.d.w $xr0, $xr0
+; CHECK-NEXT: xvst $xr0, $a1, 0
+; CHECK-NEXT: xvst $xr1, $a1, 32
+; CHECK-NEXT: ret
entry:
%A = load <8 x i32>, ptr %ptr
%B = sext <8 x i32> %A to <8 x i64>
@@ -538,56 +286,8 @@ define void @load_sext_16i8_to_16i16(ptr %ptr, ptr %dst) {
; CHECK-LABEL: load_sext_16i8_to_16i16:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vld $vr0, $a0, 0
-; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 8
-; CHECK-NEXT: ext.w.b $a0, $a0
-; CHECK-NEXT: vinsgr2vr.h $vr1, $a0, 0
-; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 9
-; CHECK-NEXT: ext.w.b $a0, $a0
-; CHECK-NEXT: vinsgr2vr.h $vr1, $a0, 1
-; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 10
-; CHECK-NEXT: ext.w.b $a0, $a0
-; CHECK-NEXT: vinsgr2vr.h $vr1, $a0, 2
-; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 11
-; CHECK-NEXT: ext.w.b $a0, $a0
-; CHECK-NEXT: vinsgr2vr.h $vr1, $a0, 3
-; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 12
-; CHECK-NEXT: ext.w.b $a0, $a0
-; CHECK-NEXT: vinsgr2vr.h $vr1, $a0, 4
-; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 13
-; CHECK-NEXT: ext.w.b $a0, $a0
-; CHECK-NEXT: vinsgr2vr.h $vr1, $a0, 5
-; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 14
-; CHECK-NEXT: ext.w.b $a0, $a0
-; CHECK-NEXT: vinsgr2vr.h $vr1, $a0, 6
-; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 15
-; CHECK-NEXT: ext.w.b $a0, $a0
-; CHECK-NEXT: vinsgr2vr.h $vr1, $a0, 7
-; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 0
-; CHECK-NEXT: ext.w.b $a0, $a0
-; CHECK-NEXT: vinsgr2vr.h $vr2, $a0, 0
-; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 1
-; CHECK-NEXT: ext.w.b $a0, $a0
-; CHECK-NEXT: vinsgr2vr.h $vr2, $a0, 1
-; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 2
-; CHECK-NEXT: ext.w.b $a0, $a0
-; CHECK-NEXT: vinsgr2vr.h $vr2, $a0, 2
-; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 3
-; CHECK-NEXT: ext.w.b $a0, $a0
-; CHECK-NEXT: vinsgr2vr.h $vr2, $a0, 3
-; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 4
-; CHECK-NEXT: ext.w.b $a0, $a0
-; CHECK-NEXT: vinsgr2vr.h $vr2, $a0, 4
-; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 5
-; CHECK-NEXT: ext.w.b $a0, $a0
-; CHECK-NEXT: vinsgr2vr.h $vr2, $a0, 5
-; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 6
-; CHECK-NEXT: ext.w.b $a0, $a0
-; CHECK-NEXT: vinsgr2vr.h $vr2, $a0, 6
-; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 7
-; CHECK-NEXT: ext.w.b $a0, $a0
-; CHECK-NEXT: vinsgr2vr.h $vr2, $a0, 7
-; CHECK-NEXT: xvpermi.q $xr2, $xr1, 2
-; CHECK-NEXT: xvst $xr2, $a1, 0
+; CHECK-NEXT: vext2xv.h.b $xr0, $xr0
+; CHECK-NEXT: xvst $xr0, $a1, 0
; CHECK-NEXT: ret
entry:
%A = load <16 x i8>, ptr %ptr
@@ -600,58 +300,12 @@ define void @load_sext_16i8_to_16i32(ptr %ptr, ptr %dst) {
; CHECK-LABEL: load_sext_16i8_to_16i32:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vld $vr0, $a0, 0
-; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 4
-; CHECK-NEXT: ext.w.b $a0, $a0
-; CHECK-NEXT: vinsgr2vr.w $vr1, $a0, 0
-; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 5
-; CHECK-NEXT: ext.w.b $a0, $a0
-; CHECK-NEXT: vinsgr2vr.w $vr1, $a0, 1
-; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 6
-; CHECK-NEXT: ext.w.b $a0, $a0
-; CHECK-NEXT: vinsgr2vr.w $vr1, $a0, 2
-; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 7
-; CHECK-NEXT: ext.w.b $a0, $a0
-; CHECK-NEXT: vinsgr2vr.w $vr1, $a0, 3
-; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 0
-; CHECK-NEXT: ext.w.b $a0, $a0
-; CHECK-NEXT: vinsgr2vr.w $vr2, $a0, 0
-; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 1
-; CHECK-NEXT: ext.w.b $a0, $a0
-; CHECK-NEXT: vinsgr2vr.w $vr2, $a0, 1
-; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 2
-; CHECK-NEXT: ext.w.b $a0, $a0
-; CHECK-NEXT: vinsgr2vr.w $vr2, $a0, 2
-; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 3
-; CHECK-NEXT: ext.w.b $a0, $a0
-; CHECK-NEXT: vinsgr2vr.w $vr2, $a0, 3
-; CHECK-NEXT: xvpermi.q $xr2, $xr1, 2
-; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 12
-; CHECK-NEXT: ext.w.b $a0, $a0
-; CHECK-NEXT: vinsgr2vr.w $vr1, $a0, 0
-; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 13
-; CHECK-NEXT: ext.w.b $a0, $a0
-; CHECK-NEXT: vinsgr2vr.w $vr1, $a0, 1
-; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 14
-; CHECK-NEXT: ext.w.b $a0, $a0
-; CHECK-NEXT: vinsgr2vr.w $vr1, $a0, 2
-; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 15
-; CHECK-NEXT: ext.w.b $a0, $a0
-; CHECK-NEXT: vinsgr2vr.w $vr1, $a0, 3
-; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 8
-; CHECK-NEXT: ext.w.b $a0, $a0
-; CHECK-NEXT: vinsgr2vr.w $vr3, $a0, 0
-; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 9
-; CHECK-NEXT: ext.w.b $a0, $a0
-; CHECK-NEXT: vinsgr2vr.w $vr3, $a0, 1
-; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 10
-; CHECK-NEXT: ext.w.b $a0, $a0
-; CHECK-NEXT: vinsgr2vr.w $vr3, $a0, 2
-; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 11
-; CHECK-NEXT: ext.w.b $a0, $a0
-; CHECK-NEXT: vinsgr2vr.w $vr3, $a0, 3
-; CHECK-NEXT: xvpermi.q $xr3, $xr1, 2
-; CHECK-NEXT: xvst $xr3, $a1, 32
-; CHECK-NEXT: xvst $xr2, $a1, 0
+; CHECK-NEXT: vext2xv.h.b $xr0, $xr0
+; CHECK-NEXT: xvpermi.q $xr1, $xr0, 1
+; CHECK-NEXT: vext2xv.w.h $xr1, $xr1
+; CHECK-NEXT: vext2xv.w.h $xr0, $xr0
+; CHECK-NEXT: xvst $xr0, $a1, 0
+; CHECK-NEXT: xvst $xr1, $a1, 32
; CHECK-NEXT: ret
entry:
%A = load <16 x i8>, ptr %ptr
@@ -661,171 +315,24 @@ entry:
}
define void @load_sext_16i8_to_16i64(ptr %ptr, ptr %dst) {
-; LA32-LABEL: load_sext_16i8_to_16i64:
-; LA32: # %bb.0: # %entry
-; LA32-NEXT: vld $vr1, $a0, 0
-; LA32-NEXT: vpickve2gr.b $a0, $vr1, 2
-; LA32-NEXT: ext.w.b $a0, $a0
-; LA32-NEXT: vinsgr2vr.w $vr2, $a0, 0
-; LA32-NEXT: srai.w $a2, $a0, 31
-; LA32-NEXT: vinsgr2vr.w $vr2, $a2, 1
-; LA32-NEXT: vpickve2gr.b $a2, $vr1, 3
-; LA32-NEXT: ext.w.b $a2, $a2
-; LA32-NEXT: vinsgr2vr.w $vr2, $a2, 2
-; LA32-NEXT: srai.w $a3, $a2, 31
-; LA32-NEXT: vinsgr2vr.w $vr2, $a3, 3
-; LA32-NEXT: vpickve2gr.b $a3, $vr1, 0
-; LA32-NEXT: ext.w.b $a3, $a3
-; LA32-NEXT: vinsgr2vr.w $vr0, $a3, 0
-; LA32-NEXT: vpickve2gr.b $a4, $vr1, 1
-; LA32-NEXT: ext.w.b $a4, $a4
-; LA32-NEXT: vinsgr2vr.w $vr0, $a4, 1
-; LA32-NEXT: srai.w $a3, $a3, 31
-; LA32-NEXT: vinsgr2vr.w $vr0, $a3, 1
-; LA32-NEXT: vinsgr2vr.w $vr0, $a0, 2
-; LA32-NEXT: vinsgr2vr.w $vr0, $a4, 2
-; LA32-NEXT: vinsgr2vr.w $vr0, $a2, 3
-; LA32-NEXT: srai.w $a0, $a4, 31
-; LA32-NEXT: vinsgr2vr.w $vr0, $a0, 3
-; LA32-NEXT: xvpermi.q $xr0, $xr2, 2
-; LA32-NEXT: vpickve2gr.b $a0, $vr1, 6
-; LA32-NEXT: ext.w.b $a0, $a0
-; LA32-NEXT: vinsgr2vr.w $vr3, $a0, 0
-; LA32-NEXT: srai.w $a2, $a0, 31
-; LA32-NEXT: vinsgr2vr.w $vr3, $a2, 1
-; LA32-NEXT: vpickve2gr.b $a2, $vr1, 7
-; LA32-NEXT: ext.w.b $a2, $a2
-; LA32-NEXT: vinsgr2vr.w $vr3, $a2, 2
-; LA32-NEXT: srai.w $a3, $a2, 31
-; LA32-NEXT: vinsgr2vr.w $vr3, $a3, 3
-; LA32-NEXT: vpickve2gr.b $a3, $vr1, 4
-; LA32-NEXT: ext.w.b $a3, $a3
-; LA32-NEXT: vinsgr2vr.w $vr2, $a3, 0
-; LA32-NEXT: vpickve2gr.b $a4, $vr1, 5
-; LA32-NEXT: ext.w.b $a4, $a4
-; LA32-NEXT: vinsgr2vr.w $vr2, $a4, 1
-; LA32-NEXT: srai.w $a3, $a3, 31
-; LA32-NEXT: vinsgr2vr.w $vr2, $a3, 1
-; LA32-NEXT: vinsgr2vr.w $vr2, $a0, 2
-; LA32-NEXT: vinsgr2vr.w $vr2, $a4, 2
-; LA32-NEXT: vinsgr2vr.w $vr2, $a2, 3
-; LA32-NEXT: srai.w $a0, $a4, 31
-; LA32-NEXT: vinsgr2vr.w $vr2, $a0, 3
-; LA32-NEXT: xvpermi.q $xr2, $xr3, 2
-; LA32-NEXT: vpickve2gr.b $a0, $vr1, 10
-; LA32-NEXT: ext.w.b $a0, $a0
-; LA32-NEXT: vinsgr2vr.w $vr3, $a0, 0
-; LA32-NEXT: srai.w $a2, $a0, 31
-; LA32-NEXT: vinsgr2vr.w $vr3, $a2, 1
-; LA32-NEXT: vpickve2gr.b $a2, $vr1, 11
-; LA32-NEXT: ext.w.b $a2, $a2
-; LA32-NEXT: vinsgr2vr.w $vr3, $a2, 2
-; LA32-NEXT: srai.w $a3, $a2, 31
-; LA32-NEXT: vinsgr2vr.w $vr3, $a3, 3
-; LA32-NEXT: vpickve2gr.b $a3, $vr1, 8
-; LA32-NEXT: ext.w.b $a3, $a3
-; LA32-NEXT: vinsgr2vr.w $vr4, $a3, 0
-; LA32-NEXT: vpickve2gr.b $a4, $vr1, 9
-; LA32-NEXT: ext.w.b $a4, $a4
-; LA32-NEXT: vinsgr2vr.w $vr4, $a4, 1
-; LA32-NEXT: srai.w $a3, $a3, 31
-; LA32-NEXT: vinsgr2vr.w $vr4, $a3, 1
-; LA32-NEXT: vinsgr2vr.w $vr4, $a0, 2
-; LA32-NEXT: vinsgr2vr.w $vr4, $a4, 2
-; LA32-NEXT: vinsgr2vr.w $vr4, $a2, 3
-; LA32-NEXT: srai.w $a0, $a4, 31
-; LA32-NEXT: vinsgr2vr.w $vr4, $a0, 3
-; LA32-NEXT: xvpermi.q $xr4, $xr3, 2
-; LA32-NEXT: vpickve2gr.b $a0, $vr1, 14
-; LA32-NEXT: ext.w.b $a0, $a0
-; LA32-NEXT: vinsgr2vr.w $vr3, $a0, 0
-; LA32-NEXT: srai.w $a2, $a0, 31
-; LA32-NEXT: vinsgr2vr.w $vr3, $a2, 1
-; LA32-NEXT: vpickve2gr.b $a2, $vr1, 15
-; LA32-NEXT: ext.w.b $a2, $a2
-; LA32-NEXT: vinsgr2vr.w $vr3, $a2, 2
-; LA32-NEXT: srai.w $a3, $a2, 31
-; LA32-NEXT: vinsgr2vr.w $vr3, $a3, 3
-; LA32-NEXT: vpickve2gr.b $a3, $vr1, 12
-; LA32-NEXT: ext.w.b $a3, $a3
-; LA32-NEXT: vinsgr2vr.w $vr5, $a3, 0
-; LA32-NEXT: vpickve2gr.b $a4, $vr1, 13
-; LA32-NEXT: ext.w.b $a4, $a4
-; LA32-NEXT: vinsgr2vr.w $vr5, $a4, 1
-; LA32-NEXT: srai.w $a3, $a3, 31
-; LA32-NEXT: vinsgr2vr.w $vr5, $a3, 1
-; LA32-NEXT: vinsgr2vr.w $vr5, $a0, 2
-; LA32-NEXT: vinsgr2vr.w $vr5, $a4, 2
-; LA32-NEXT: vinsgr2vr.w $vr5, $a2, 3
-; LA32-NEXT: srai.w $a0, $a4, 31
-; LA32-NEXT: vinsgr2vr.w $vr5, $a0, 3
-; LA32-NEXT: xvpermi.q $xr5, $xr3, 2
-; LA32-NEXT: xvst $xr5, $a1, 96
-; LA32-NEXT: xvst $xr4, $a1, 64
-; LA32-NEXT: xvst $xr2, $a1, 32
-; LA32-NEXT: xvst $xr0, $a1, 0
-; LA32-NEXT: ret
-;
-; LA64-LABEL: load_sext_16i8_to_16i64:
-; LA64: # %bb.0: # %entry
-; LA64-NEXT: vld $vr0, $a0, 0
-; LA64-NEXT: vpickve2gr.b $a0, $vr0, 2
-; LA64-NEXT: ext.w.b $a0, $a0
-; LA64-NEXT: vinsgr2vr.d $vr2, $a0, 0
-; LA64-NEXT: vpickve2gr.b $a0, $vr0, 3
-; LA64-NEXT: ext.w.b $a0, $a0
-; LA64-NEXT: vinsgr2vr.d $vr2, $a0, 1
-; LA64-NEXT: vpickve2gr.b $a0, $vr0, 0
-; LA64-NEXT: ext.w.b $a0, $a0
-; LA64-NEXT: vinsgr2vr.d $vr1, $a0, 0
-; LA64-NEXT: vpickve2gr.b $a0, $vr0, 1
-; LA64-NEXT: ext.w.b $a0, $a0
-; LA64-NEXT: vinsgr2vr.d $vr1, $a0, 1
-; LA64-NEXT: xvpermi.q $xr1, $xr2, 2
-; LA64-NEXT: vpickve2gr.b $a0, $vr0, 6
-; LA64-NEXT: ext.w.b $a0, $a0
-; LA64-NEXT: vinsgr2vr.d $vr2, $a0, 0
-; LA64-NEXT: vpickve2gr.b $a0, $vr0, 7
-; LA64-NEXT: ext.w.b $a0, $a0
-; LA64-NEXT: vinsgr2vr.d $vr2, $a0, 1
-; LA64-NEXT: vpickve2gr.b $a0, $vr0, 4
-; LA64-NEXT: ext.w.b $a0, $a0
-; LA64-NEXT: vinsgr2vr.d $vr3, $a0, 0
-; LA64-NEXT: vpickve2gr.b $a0, $vr0, 5
-; LA64-NEXT: ext.w.b $a0, $a0
-; LA64-NEXT: vinsgr2vr.d $vr3, $a0, 1
-; LA64-NEXT: xvpermi.q $xr3, $xr2, 2
-; LA64-NEXT: vpickve2gr.b $a0, $vr0, 10
-; LA64-NEXT: ext.w.b $a0, $a0
-; LA64-NEXT: vinsgr2vr.d $vr2, $a0, 0
-; LA64-NEXT: vpickve2gr.b $a0, $vr0, 11
-; LA64-NEXT: ext.w.b $a0, $a0
-; LA64-NEXT: vinsgr2vr.d $vr2, $a0, 1
-; LA64-NEXT: vpickve2gr.b $a0, $vr0, 8
-; LA64-NEXT: ext.w.b $a0, $a0
-; LA64-NEXT: vinsgr2vr.d $vr4, $a0, 0
-; LA64-NEXT: vpickve2gr.b $a0, $vr0, 9
-; LA64-NEXT: ext.w.b $a0, $a0
-; LA64-NEXT: vinsgr2vr.d $vr4, $a0, 1
-; LA64-NEXT: xvpermi.q $xr4, $xr2, 2
-; LA64-NEXT: vpickve2gr.b $a0, $vr0, 14
-; LA64-NEXT: ext.w.b $a0, $a0
-; LA64-NEXT: vinsgr2vr.d $vr2, $a0, 0
-; LA64-NEXT: vpickve2gr.b $a0, $vr0, 15
-; LA64-NEXT: ext.w.b $a0, $a0
-; LA64-NEXT: vinsgr2vr.d $vr2, $a0, 1
-; LA64-NEXT: vpickve2gr.b $a0, $vr0, 12
-; LA64-NEXT: ext.w.b $a0, $a0
-; LA64-NEXT: vinsgr2vr.d $vr5, $a0, 0
-; LA64-NEXT: vpickve2gr.b $a0, $vr0, 13
-; LA64-NEXT: ext.w.b $a0, $a0
-; LA64-NEXT: vinsgr2vr.d $vr5, $a0, 1
-; LA64-NEXT: xvpermi.q $xr5, $xr2, 2
-; LA64-NEXT: xvst $xr5, $a1, 96
-; LA64-NEXT: xvst $xr4, $a1, 64
-; LA64-NEXT: xvst $xr3, $a1, 32
-; LA64-NEXT: xvst $xr1, $a1, 0
-; LA64-NEXT: ret
+; CHECK-LABEL: load_sext_16i8_to_16i64:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vld $vr0, $a0, 0
+; CHECK-NEXT: vext2xv.h.b $xr0, $xr0
+; CHECK-NEXT: xvpermi.q $xr1, $xr0, 1
+; CHECK-NEXT: vext2xv.w.h $xr1, $xr1
+; CHECK-NEXT: xvpermi.q $xr2, $xr1, 1
+; CHECK-NEXT: vext2xv.d.w $xr2, $xr2
+; CHECK-NEXT: vext2xv.w.h $xr0, $xr0
+; CHECK-NEXT: xvpermi.q $xr3, $xr0, 1
+; CHECK-NEXT: vext2xv.d.w $xr3, $xr3
+; CHECK-NEXT: vext2xv.d.w $xr1, $xr1
+; CHECK-NEXT: vext2xv.d.w $xr0, $xr0
+; CHECK-NEXT: xvst $xr0, $a1, 0
+; CHECK-NEXT: xvst $xr1, $a1, 64
+; CHECK-NEXT: xvst $xr3, $a1, 32
+; CHECK-NEXT: xvst $xr2, $a1, 96
+; CHECK-NEXT: ret
entry:
%A = load <16 x i8>, ptr %ptr
%B = sext <16 x i8> %A to <16 x i64>
@@ -838,58 +345,10 @@ define void @load_sext_16i16_to_16i32(ptr %ptr, ptr %dst) {
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: xvld $xr0, $a0, 0
; CHECK-NEXT: xvpermi.q $xr1, $xr0, 1
-; CHECK-NEXT: vpickve2gr.h $a0, $vr1, 4
-; CHECK-NEXT: ext.w.h $a0, $a0
-; CHECK-NEXT: vinsgr2vr.w $vr2, $a0, 0
-; CHECK-NEXT: vpickve2gr.h $a0, $vr1, 5
-; CHECK-NEXT: ext.w.h $a0, $a0
-; CHECK-NEXT: vinsgr2vr.w $vr2, $a0, 1
-; CHECK-NEXT: vpickve2gr.h $a0, $vr1, 6
-; CHECK-NEXT: ext.w.h $a0, $a0
-; CHECK-NEXT: vinsgr2vr.w $vr2, $a0, 2
-; CHECK-NEXT: vpickve2gr.h $a0, $vr1, 7
-; CHECK-NEXT: ext.w.h $a0, $a0
-; CHECK-NEXT: vinsgr2vr.w $vr2, $a0, 3
-; CHECK-NEXT: vpickve2gr.h $a0, $vr1, 0
-; CHECK-NEXT: ext.w.h $a0, $a0
-; CHECK-NEXT: vinsgr2vr.w $vr3, $a0, 0
-; CHECK-NEXT: vpickve2gr.h $a0, $vr1, 1
-; CHECK-NEXT: ext.w.h $a0, $a0
-; CHECK-NEXT: vinsgr2vr.w $vr3, $a0, 1
-; CHECK-NEXT: vpickve2gr.h $a0, $vr1, 2
-; CHECK-NEXT: ext.w.h $a0, $a0
-; CHECK-NEXT: vinsgr2vr.w $vr3, $a0, 2
-; CHECK-NEXT: vpickve2gr.h $a0, $vr1, 3
-; CHECK-NEXT: ext.w.h $a0, $a0
-; CHECK-NEXT: vinsgr2vr.w $vr3, $a0, 3
-; CHECK-NEXT: xvpermi.q $xr3, $xr2, 2
-; CHECK-NEXT: vpickve2gr.h $a0, $vr0, 4
-; CHECK-NEXT: ext.w.h $a0, $a0
-; CHECK-NEXT: vinsgr2vr.w $vr1, $a0, 0
-; CHECK-NEXT: vpickve2gr.h $a0, $vr0, 5
-; CHECK-NEXT: ext.w.h $a0, $a0
-; CHECK-NEXT: vinsgr2vr.w $vr1, $a0, 1
-; CHECK-NEXT: vpickve2gr.h $a0, $vr0, 6
-; CHECK-NEXT: ext.w.h $a0, $a0
-; CHECK-NEXT: vinsgr2vr.w $vr1, $a0, 2
-; CHECK-NEXT: vpickve2gr.h $a0, $vr0, 7
-; CHECK-NEXT: ext.w.h $a0, $a0
-; CHECK-NEXT: vinsgr2vr.w $vr1, $a0, 3
-; CHECK-NEXT: vpickve2gr.h $a0, $vr0, 0
-; CHECK-NEXT: ext.w.h $a0, $a0
-; CHECK-NEXT: vinsgr2vr.w $vr2, $a0, 0
-; CHECK-NEXT: vpickve2gr.h $a0, $vr0, 1
-; CHECK-NEXT: ext.w.h $a0, $a0
-; CHECK-NEXT: vinsgr2vr.w $vr2, $a0, 1
-; CHECK-NEXT: vpickve2gr.h $a0, $vr0, 2
-; CHECK-NEXT: ext.w.h $a0, $a0
-; CHECK-NEXT: vinsgr2vr.w $vr2, $a0, 2
-; CHECK-NEXT: vpickve2gr.h $a0, $vr0, 3
-; CHECK-NEXT: ext.w.h $a0, $a0
-; CHECK-NEXT: vinsgr2vr.w $vr2, $a0, 3
-; CHECK-NEXT: xvpermi.q $xr2, $xr1, 2
-; CHECK-NEXT: xvst $xr2, $a1, 0
-; CHECK-NEXT: xvst $xr3, $a1, 32
+; CHECK-NEXT: vext2xv.w.h $xr1, $xr1
+; CHECK-NEXT: vext2xv.w.h $xr0, $xr0
+; CHECK-NEXT: xvst $xr0, $a1, 0
+; CHECK-NEXT: xvst $xr1, $a1, 32
; CHECK-NEXT: ret
entry:
%A = load <16 x i16>, ptr %ptr
@@ -899,173 +358,23 @@ entry:
}
define void @load_sext_16i16_to_16i64(ptr %ptr, ptr %dst) {
-; LA32-LABEL: load_sext_16i16_to_16i64:
-; LA32: # %bb.0: # %entry
-; LA32-NEXT: xvld $xr1, $a0, 0
-; LA32-NEXT: xvpermi.q $xr3, $xr1, 1
-; LA32-NEXT: vpickve2gr.h $a0, $vr3, 2
-; LA32-NEXT: ext.w.h $a0, $a0
-; LA32-NEXT: vinsgr2vr.w $vr2, $a0, 0
-; LA32-NEXT: srai.w $a2, $a0, 31
-; LA32-NEXT: vinsgr2vr.w $vr2, $a2, 1
-; LA32-NEXT: vpickve2gr.h $a2, $vr3, 3
-; LA32-NEXT: ext.w.h $a2, $a2
-; LA32-NEXT: vinsgr2vr.w $vr2, $a2, 2
-; LA32-NEXT: srai.w $a3, $a2, 31
-; LA32-NEXT: vinsgr2vr.w $vr2, $a3, 3
-; LA32-NEXT: vpickve2gr.h $a3, $vr3, 0
-; LA32-NEXT: ext.w.h $a3, $a3
-; LA32-NEXT: vinsgr2vr.w $vr0, $a3, 0
-; LA32-NEXT: vpickve2gr.h $a4, $vr3, 1
-; LA32-NEXT: ext.w.h $a4, $a4
-; LA32-NEXT: vinsgr2vr.w $vr0, $a4, 1
-; LA32-NEXT: srai.w $a3, $a3, 31
-; LA32-NEXT: vinsgr2vr.w $vr0, $a3, 1
-; LA32-NEXT: vinsgr2vr.w $vr0, $a0, 2
-; LA32-NEXT: vinsgr2vr.w $vr0, $a4, 2
-; LA32-NEXT: vinsgr2vr.w $vr0, $a2, 3
-; LA32-NEXT: srai.w $a0, $a4, 31
-; LA32-NEXT: vinsgr2vr.w $vr0, $a0, 3
-; LA32-NEXT: xvpermi.q $xr0, $xr2, 2
-; LA32-NEXT: vpickve2gr.h $a0, $vr3, 6
-; LA32-NEXT: ext.w.h $a0, $a0
-; LA32-NEXT: vinsgr2vr.w $vr4, $a0, 0
-; LA32-NEXT: srai.w $a2, $a0, 31
-; LA32-NEXT: vinsgr2vr.w $vr4, $a2, 1
-; LA32-NEXT: vpickve2gr.h $a2, $vr3, 7
-; LA32-NEXT: ext.w.h $a2, $a2
-; LA32-NEXT: vinsgr2vr.w $vr4, $a2, 2
-; LA32-NEXT: srai.w $a3, $a2, 31
-; LA32-NEXT: vinsgr2vr.w $vr4, $a3, 3
-; LA32-NEXT: vpickve2gr.h $a3, $vr3, 4
-; LA32-NEXT: ext.w.h $a3, $a3
-; LA32-NEXT: vinsgr2vr.w $vr2, $a3, 0
-; LA32-NEXT: vpickve2gr.h $a4, $vr3, 5
-; LA32-NEXT: ext.w.h $a4, $a4
-; LA32-NEXT: vinsgr2vr.w $vr2, $a4, 1
-; LA32-NEXT: srai.w $a3, $a3, 31
-; LA32-NEXT: vinsgr2vr.w $vr2, $a3, 1
-; LA32-NEXT: vinsgr2vr.w $vr2, $a0, 2
-; LA32-NEXT: vinsgr2vr.w $vr2, $a4, 2
-; LA32-NEXT: vinsgr2vr.w $vr2, $a2, 3
-; LA32-NEXT: srai.w $a0, $a4, 31
-; LA32-NEXT: vinsgr2vr.w $vr2, $a0, 3
-; LA32-NEXT: xvpermi.q $xr2, $xr4, 2
-; LA32-NEXT: vpickve2gr.h $a0, $vr1, 2
-; LA32-NEXT: ext.w.h $a0, $a0
-; LA32-NEXT: vinsgr2vr.w $vr3, $a0, 0
-; LA32-NEXT: srai.w $a2, $a0, 31
-; LA32-NEXT: vinsgr2vr.w $vr3, $a2, 1
-; LA32-NEXT: vpickve2gr.h $a2, $vr1, 3
-; LA32-NEXT: ext.w.h $a2, $a2
-; LA32-NEXT: vinsgr2vr.w $vr3, $a2, 2
-; LA32-NEXT: srai.w $a3, $a2, 31
-; LA32-NEXT: vinsgr2vr.w $vr3, $a3, 3
-; LA32-NEXT: vpickve2gr.h $a3, $vr1, 0
-; LA32-NEXT: ext.w.h $a3, $a3
-; LA32-NEXT: vinsgr2vr.w $vr4, $a3, 0
-; LA32-NEXT: vpickve2gr.h $a4, $vr1, 1
-; LA32-NEXT: ext.w.h $a4, $a4
-; LA32-NEXT: vinsgr2vr.w $vr4, $a4, 1
-; LA32-NEXT: srai.w $a3, $a3, 31
-; LA32-NEXT: vinsgr2vr.w $vr4, $a3, 1
-; LA32-NEXT: vinsgr2vr.w $vr4, $a0, 2
-; LA32-NEXT: vinsgr2vr.w $vr4, $a4, 2
-; LA32-NEXT: vinsgr2vr.w $vr4, $a2, 3
-; LA32-NEXT: srai.w $a0, $a4, 31
-; LA32-NEXT: vinsgr2vr.w $vr4, $a0, 3
-; LA32-NEXT: xvpermi.q $xr4, $xr3, 2
-; LA32-NEXT: vpickve2gr.h $a0, $vr1, 6
-; LA32-NEXT: ext.w.h $a0, $a0
-; LA32-NEXT: vinsgr2vr.w $vr3, $a0, 0
-; LA32-NEXT: srai.w $a2, $a0, 31
-; LA32-NEXT: vinsgr2vr.w $vr3, $a2, 1
-; LA32-NEXT: vpickve2gr.h $a2, $vr1, 7
-; LA32-NEXT: ext.w.h $a2, $a2
-; LA32-NEXT: vinsgr2vr.w $vr3, $a2, 2
-; LA32-NEXT: srai.w $a3, $a2, 31
-; LA32-NEXT: vinsgr2vr.w $vr3, $a3, 3
-; LA32-NEXT: vpickve2gr.h $a3, $vr1, 4
-; LA32-NEXT: ext.w.h $a3, $a3
-; LA32-NEXT: vinsgr2vr.w $vr5, $a3, 0
-; LA32-NEXT: vpickve2gr.h $a4, $vr1, 5
-; LA32-NEXT: ext.w.h $a4, $a4
-; LA32-NEXT: vinsgr2vr.w $vr5, $a4, 1
-; LA32-NEXT: srai.w $a3, $a3, 31
-; LA32-NEXT: vinsgr2vr.w $vr5, $a3, 1
-; LA32-NEXT: vinsgr2vr.w $vr5, $a0, 2
-; LA32-NEXT: vinsgr2vr.w $vr5, $a4, 2
-; LA32-NEXT: vinsgr2vr.w $vr5, $a2, 3
-; LA32-NEXT: srai.w $a0, $a4, 31
-; LA32-NEXT: vinsgr2vr.w $vr5, $a0, 3
-; LA32-NEXT: xvpermi.q $xr5, $xr3, 2
-; LA32-NEXT: xvst $xr5, $a1, 32
-; LA32-NEXT: xvst $xr4, $a1, 0
-; LA32-NEXT: xvst $xr2, $a1, 96
-; LA32-NEXT: xvst $xr0, $a1, 64
-; LA32-NEXT: ret
-;
-; LA64-LABEL: load_sext_16i16_to_16i64:
-; LA64: # %bb.0: # %entry
-; LA64-NEXT: xvld $xr0, $a0, 0
-; LA64-NEXT: xvpermi.q $xr2, $xr0, 1
-; LA64-NEXT: vpickve2gr.h $a0, $vr2, 2
-; LA64-NEXT: ext.w.h $a0, $a0
-; LA64-NEXT: vinsgr2vr.d $vr3, $a0, 0
-; LA64-NEXT: vpickve2gr.h $a0, $vr2, 3
-; LA64-NEXT: ext.w.h $a0, $a0
-; LA64-NEXT: vinsgr2vr.d $vr3, $a0, 1
-; LA64-NEXT: vpickve2gr.h $a0, $vr2, 0
-; LA64-NEXT: ext.w.h $a0, $a0
-; LA64-NEXT: vinsgr2vr.d $vr1, $a0, 0
-; LA64-NEXT: vpickve2gr.h $a0, $vr2, 1
-; LA64-NEXT: ext.w.h $a0, $a0
-; LA64-NEXT: vinsgr2vr.d $vr1, $a0, 1
-; LA64-NEXT: xvpermi.q $xr1, $xr3, 2
-; LA64-NEXT: vpickve2gr.h $a0, $vr2, 6
-; LA64-NEXT: ext.w.h $a0, $a0
-; LA64-NEXT: vinsgr2vr.d $vr3, $a0, 0
-; LA64-NEXT: vpickve2gr.h $a0, $vr2, 7
-; LA64-NEXT: ext.w.h $a0, $a0
-; LA64-NEXT: vinsgr2vr.d $vr3, $a0, 1
-; LA64-NEXT: vpickve2gr.h $a0, $vr2, 4
-; LA64-NEXT: ext.w.h $a0, $a0
-; LA64-NEXT: vinsgr2vr.d $vr4, $a0, 0
-; LA64-NEXT: vpickve2gr.h $a0, $vr2, 5
-; LA64-NEXT: ext.w.h $a0, $a0
-; LA64-NEXT: vinsgr2vr.d $vr4, $a0, 1
-; LA64-NEXT: xvpermi.q $xr4, $xr3, 2
-; LA64-NEXT: vpickve2gr.h $a0, $vr0, 2
-; LA64-NEXT: ext.w.h $a0, $a0
-; LA64-NEXT: vinsgr2vr.d $vr2, $a0, 0
-; LA64-NEXT: vpickve2gr.h $a0, $vr0, 3
-; LA64-NEXT: ext.w.h $a0, $a0
-; LA64-NEXT: vinsgr2vr.d $vr2, $a0, 1
-; LA64-NEXT: vpickve2gr.h $a0, $vr0, 0
-; LA64-NEXT: ext.w.h $a0, $a0
-; LA64-NEXT: vinsgr2vr.d $vr3, $a0, 0
-; LA64-NEXT: vpickve2gr.h $a0, $vr0, 1
-; LA64-NEXT: ext.w.h $a0, $a0
-; LA64-NEXT: vinsgr2vr.d $vr3, $a0, 1
-; LA64-NEXT: xvpermi.q $xr3, $xr2, 2
-; LA64-NEXT: vpickve2gr.h $a0, $vr0, 6
-; LA64-NEXT: ext.w.h $a0, $a0
-; LA64-NEXT: vinsgr2vr.d $vr2, $a0, 0
-; LA64-NEXT: vpickve2gr.h $a0, $vr0, 7
-; LA64-NEXT: ext.w.h $a0, $a0
-; LA64-NEXT: vinsgr2vr.d $vr2, $a0, 1
-; LA64-NEXT: vpickve2gr.h $a0, $vr0, 4
-; LA64-NEXT: ext.w.h $a0, $a0
-; LA64-NEXT: vinsgr2vr.d $vr5, $a0, 0
-; LA64-NEXT: vpickve2gr.h $a0, $vr0, 5
-; LA64-NEXT: ext.w.h $a0, $a0
-; LA64-NEXT: vinsgr2vr.d $vr5, $a0, 1
-; LA64-NEXT: xvpermi.q $xr5, $xr2, 2
-; LA64-NEXT: xvst $xr5, $a1, 32
-; LA64-NEXT: xvst $xr3, $a1, 0
-; LA64-NEXT: xvst $xr4, $a1, 96
-; LA64-NEXT: xvst $xr1, $a1, 64
-; LA64-NEXT: ret
+; CHECK-LABEL: load_sext_16i16_to_16i64:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: xvld $xr0, $a0, 0
+; CHECK-NEXT: xvpermi.q $xr1, $xr0, 1
+; CHECK-NEXT: vext2xv.w.h $xr1, $xr1
+; CHECK-NEXT: xvpermi.q $xr2, $xr1, 1
+; CHECK-NEXT: vext2xv.d.w $xr2, $xr2
+; CHECK-NEXT: vext2xv.w.h $xr0, $xr0
+; CHECK-NEXT: xvpermi.q $xr3, $xr0, 1
+; CHECK-NEXT: vext2xv.d.w $xr3, $xr3
+; CHECK-NEXT: vext2xv.d.w $xr1, $xr1
+; CHECK-NEXT: vext2xv.d.w $xr0, $xr0
+; CHECK-NEXT: xvst $xr0, $a1, 0
+; CHECK-NEXT: xvst $xr1, $a1, 64
+; CHECK-NEXT: xvst $xr3, $a1, 32
+; CHECK-NEXT: xvst $xr2, $a1, 96
+; CHECK-NEXT: ret
entry:
%A = load <16 x i16>, ptr %ptr
%B = sext <16 x i16> %A to <16 x i64>
diff --git a/llvm/test/CodeGen/LoongArch/lasx/vec-zext.ll b/llvm/test/CodeGen/LoongArch/lasx/vec-zext.ll
index f0548cc..5269202 100644
--- a/llvm/test/CodeGen/LoongArch/lasx/vec-zext.ll
+++ b/llvm/test/CodeGen/LoongArch/lasx/vec-zext.ll
@@ -7,10 +7,7 @@ define void @load_zext_2i8_to_2i64(ptr %ptr, ptr %dst) {
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: ld.h $a0, $a0, 0
; CHECK-NEXT: vinsgr2vr.h $vr0, $a0, 0
-; CHECK-NEXT: vrepli.b $vr1, 0
-; CHECK-NEXT: vilvl.b $vr0, $vr1, $vr0
-; CHECK-NEXT: vilvl.h $vr0, $vr1, $vr0
-; CHECK-NEXT: vilvl.w $vr0, $vr1, $vr0
+; CHECK-NEXT: vext2xv.du.bu $xr0, $xr0
; CHECK-NEXT: vst $vr0, $a1, 0
; CHECK-NEXT: ret
entry:
@@ -25,9 +22,7 @@ define void @load_zext_2i16_to_2i64(ptr %ptr, ptr %dst) {
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: ld.w $a0, $a0, 0
; CHECK-NEXT: vinsgr2vr.w $vr0, $a0, 0
-; CHECK-NEXT: vrepli.b $vr1, 0
-; CHECK-NEXT: vilvl.h $vr0, $vr1, $vr0
-; CHECK-NEXT: vilvl.w $vr0, $vr1, $vr0
+; CHECK-NEXT: vext2xv.du.hu $xr0, $xr0
; CHECK-NEXT: vst $vr0, $a1, 0
; CHECK-NEXT: ret
entry:
@@ -42,9 +37,9 @@ define void @load_zext_2i32_to_2i64(ptr %ptr, ptr %dst) {
; LA32: # %bb.0: # %entry
; LA32-NEXT: ld.w $a2, $a0, 0
; LA32-NEXT: ld.w $a0, $a0, 4
-; LA32-NEXT: vrepli.b $vr0, 0
; LA32-NEXT: vinsgr2vr.w $vr0, $a2, 0
-; LA32-NEXT: vinsgr2vr.w $vr0, $a0, 2
+; LA32-NEXT: vinsgr2vr.w $vr0, $a0, 1
+; LA32-NEXT: vext2xv.du.wu $xr0, $xr0
; LA32-NEXT: vst $vr0, $a1, 0
; LA32-NEXT: ret
;
@@ -52,8 +47,7 @@ define void @load_zext_2i32_to_2i64(ptr %ptr, ptr %dst) {
; LA64: # %bb.0: # %entry
; LA64-NEXT: ld.d $a0, $a0, 0
; LA64-NEXT: vinsgr2vr.d $vr0, $a0, 0
-; LA64-NEXT: vrepli.b $vr1, 0
-; LA64-NEXT: vilvl.w $vr0, $vr1, $vr0
+; LA64-NEXT: vext2xv.du.wu $xr0, $xr0
; LA64-NEXT: vst $vr0, $a1, 0
; LA64-NEXT: ret
entry:
@@ -68,9 +62,7 @@ define void @load_zext_4i8_to_4i32(ptr %ptr, ptr %dst) {
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: ld.w $a0, $a0, 0
; CHECK-NEXT: vinsgr2vr.w $vr0, $a0, 0
-; CHECK-NEXT: vrepli.b $vr1, 0
-; CHECK-NEXT: vilvl.b $vr0, $vr1, $vr0
-; CHECK-NEXT: vilvl.h $vr0, $vr1, $vr0
+; CHECK-NEXT: vext2xv.wu.bu $xr0, $xr0
; CHECK-NEXT: vst $vr0, $a1, 0
; CHECK-NEXT: ret
entry:
@@ -85,24 +77,8 @@ define void @load_zext_4i8_to_4i64(ptr %ptr, ptr %dst) {
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: ld.w $a0, $a0, 0
; CHECK-NEXT: vinsgr2vr.w $vr0, $a0, 0
-; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 0
-; CHECK-NEXT: xvrepli.b $xr1, 0
-; CHECK-NEXT: xvreplgr2vr.b $xr2, $a0
-; CHECK-NEXT: xvpermi.q $xr2, $xr1, 18
-; CHECK-NEXT: xvextrins.b $xr1, $xr2, 0
-; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 1
-; CHECK-NEXT: xvreplgr2vr.b $xr2, $a0
-; CHECK-NEXT: xvpermi.q $xr2, $xr1, 18
-; CHECK-NEXT: xvextrins.b $xr1, $xr2, 136
-; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 2
-; CHECK-NEXT: xvreplgr2vr.b $xr2, $a0
-; CHECK-NEXT: xvpermi.q $xr2, $xr1, 48
-; CHECK-NEXT: xvextrins.b $xr1, $xr2, 0
-; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 3
-; CHECK-NEXT: xvreplgr2vr.b $xr0, $a0
-; CHECK-NEXT: xvpermi.q $xr0, $xr1, 48
-; CHECK-NEXT: xvextrins.b $xr1, $xr0, 136
-; CHECK-NEXT: xvst $xr1, $a1, 0
+; CHECK-NEXT: vext2xv.du.bu $xr0, $xr0
+; CHECK-NEXT: xvst $xr0, $a1, 0
; CHECK-NEXT: ret
entry:
%A = load <4 x i8>, ptr %ptr
@@ -118,8 +94,7 @@ define void @load_zext_4i16_to_4i32(ptr %ptr, ptr %dst) {
; LA32-NEXT: ld.w $a0, $a0, 4
; LA32-NEXT: vinsgr2vr.w $vr0, $a2, 0
; LA32-NEXT: vinsgr2vr.w $vr0, $a0, 1
-; LA32-NEXT: vrepli.b $vr1, 0
-; LA32-NEXT: vilvl.h $vr0, $vr1, $vr0
+; LA32-NEXT: vext2xv.wu.hu $xr0, $xr0
; LA32-NEXT: vst $vr0, $a1, 0
; LA32-NEXT: ret
;
@@ -127,8 +102,7 @@ define void @load_zext_4i16_to_4i32(ptr %ptr, ptr %dst) {
; LA64: # %bb.0: # %entry
; LA64-NEXT: ld.d $a0, $a0, 0
; LA64-NEXT: vinsgr2vr.d $vr0, $a0, 0
-; LA64-NEXT: vrepli.b $vr1, 0
-; LA64-NEXT: vilvl.h $vr0, $vr1, $vr0
+; LA64-NEXT: vext2xv.wu.hu $xr0, $xr0
; LA64-NEXT: vst $vr0, $a1, 0
; LA64-NEXT: ret
entry:
@@ -145,48 +119,16 @@ define void @load_zext_4i16_to_4i64(ptr %ptr, ptr %dst) {
; LA32-NEXT: ld.w $a0, $a0, 4
; LA32-NEXT: vinsgr2vr.w $vr0, $a2, 0
; LA32-NEXT: vinsgr2vr.w $vr0, $a0, 1
-; LA32-NEXT: vpickve2gr.h $a0, $vr0, 0
-; LA32-NEXT: xvrepli.b $xr1, 0
-; LA32-NEXT: xvreplgr2vr.h $xr2, $a0
-; LA32-NEXT: xvpermi.q $xr2, $xr1, 18
-; LA32-NEXT: xvextrins.h $xr1, $xr2, 0
-; LA32-NEXT: vpickve2gr.h $a0, $vr0, 1
-; LA32-NEXT: xvreplgr2vr.h $xr2, $a0
-; LA32-NEXT: xvpermi.q $xr2, $xr1, 18
-; LA32-NEXT: xvextrins.h $xr1, $xr2, 68
-; LA32-NEXT: vpickve2gr.h $a0, $vr0, 2
-; LA32-NEXT: xvreplgr2vr.h $xr2, $a0
-; LA32-NEXT: xvpermi.q $xr2, $xr1, 48
-; LA32-NEXT: xvextrins.h $xr1, $xr2, 0
-; LA32-NEXT: vpickve2gr.h $a0, $vr0, 3
-; LA32-NEXT: xvreplgr2vr.h $xr0, $a0
-; LA32-NEXT: xvpermi.q $xr0, $xr1, 48
-; LA32-NEXT: xvextrins.h $xr1, $xr0, 68
-; LA32-NEXT: xvst $xr1, $a1, 0
+; LA32-NEXT: vext2xv.du.hu $xr0, $xr0
+; LA32-NEXT: xvst $xr0, $a1, 0
; LA32-NEXT: ret
;
; LA64-LABEL: load_zext_4i16_to_4i64:
; LA64: # %bb.0: # %entry
; LA64-NEXT: ld.d $a0, $a0, 0
; LA64-NEXT: vinsgr2vr.d $vr0, $a0, 0
-; LA64-NEXT: vpickve2gr.h $a0, $vr0, 0
-; LA64-NEXT: xvrepli.b $xr1, 0
-; LA64-NEXT: xvreplgr2vr.h $xr2, $a0
-; LA64-NEXT: xvpermi.q $xr2, $xr1, 18
-; LA64-NEXT: xvextrins.h $xr1, $xr2, 0
-; LA64-NEXT: vpickve2gr.h $a0, $vr0, 1
-; LA64-NEXT: xvreplgr2vr.h $xr2, $a0
-; LA64-NEXT: xvpermi.q $xr2, $xr1, 18
-; LA64-NEXT: xvextrins.h $xr1, $xr2, 68
-; LA64-NEXT: vpickve2gr.h $a0, $vr0, 2
-; LA64-NEXT: xvreplgr2vr.h $xr2, $a0
-; LA64-NEXT: xvpermi.q $xr2, $xr1, 48
-; LA64-NEXT: xvextrins.h $xr1, $xr2, 0
-; LA64-NEXT: vpickve2gr.h $a0, $vr0, 3
-; LA64-NEXT: xvreplgr2vr.h $xr0, $a0
-; LA64-NEXT: xvpermi.q $xr0, $xr1, 48
-; LA64-NEXT: xvextrins.h $xr1, $xr0, 68
-; LA64-NEXT: xvst $xr1, $a1, 0
+; LA64-NEXT: vext2xv.du.hu $xr0, $xr0
+; LA64-NEXT: xvst $xr0, $a1, 0
; LA64-NEXT: ret
entry:
%A = load <4 x i16>, ptr %ptr
@@ -196,39 +138,12 @@ entry:
}
define void @load_zext_4i32_to_4i64(ptr %ptr, ptr %dst) {
-; LA32-LABEL: load_zext_4i32_to_4i64:
-; LA32: # %bb.0: # %entry
-; LA32-NEXT: vld $vr0, $a0, 0
-; LA32-NEXT: vpickve2gr.w $a0, $vr0, 0
-; LA32-NEXT: xvrepli.b $xr1, 0
-; LA32-NEXT: xvinsgr2vr.w $xr1, $a0, 0
-; LA32-NEXT: vpickve2gr.w $a0, $vr0, 1
-; LA32-NEXT: xvinsgr2vr.w $xr1, $a0, 2
-; LA32-NEXT: vpickve2gr.w $a0, $vr0, 2
-; LA32-NEXT: xvinsgr2vr.w $xr1, $a0, 4
-; LA32-NEXT: vpickve2gr.w $a0, $vr0, 3
-; LA32-NEXT: xvinsgr2vr.w $xr1, $a0, 6
-; LA32-NEXT: xvst $xr1, $a1, 0
-; LA32-NEXT: ret
-;
-; LA64-LABEL: load_zext_4i32_to_4i64:
-; LA64: # %bb.0: # %entry
-; LA64-NEXT: vld $vr0, $a0, 0
-; LA64-NEXT: vpickve2gr.w $a0, $vr0, 2
-; LA64-NEXT: bstrpick.d $a0, $a0, 31, 0
-; LA64-NEXT: vinsgr2vr.d $vr1, $a0, 0
-; LA64-NEXT: vpickve2gr.w $a0, $vr0, 3
-; LA64-NEXT: bstrpick.d $a0, $a0, 31, 0
-; LA64-NEXT: vinsgr2vr.d $vr1, $a0, 1
-; LA64-NEXT: vpickve2gr.w $a0, $vr0, 0
-; LA64-NEXT: bstrpick.d $a0, $a0, 31, 0
-; LA64-NEXT: vinsgr2vr.d $vr2, $a0, 0
-; LA64-NEXT: vpickve2gr.w $a0, $vr0, 1
-; LA64-NEXT: bstrpick.d $a0, $a0, 31, 0
-; LA64-NEXT: vinsgr2vr.d $vr2, $a0, 1
-; LA64-NEXT: xvpermi.q $xr2, $xr1, 2
-; LA64-NEXT: xvst $xr2, $a1, 0
-; LA64-NEXT: ret
+; CHECK-LABEL: load_zext_4i32_to_4i64:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vld $vr0, $a0, 0
+; CHECK-NEXT: vext2xv.du.wu $xr0, $xr0
+; CHECK-NEXT: xvst $xr0, $a1, 0
+; CHECK-NEXT: ret
entry:
%A = load <4 x i32>, ptr %ptr
%B = zext <4 x i32> %A to <4 x i64>
@@ -243,8 +158,7 @@ define void @load_zext_8i8_to_8i16(ptr %ptr, ptr %dst) {
; LA32-NEXT: ld.w $a0, $a0, 4
; LA32-NEXT: vinsgr2vr.w $vr0, $a2, 0
; LA32-NEXT: vinsgr2vr.w $vr0, $a0, 1
-; LA32-NEXT: vrepli.b $vr1, 0
-; LA32-NEXT: vilvl.b $vr0, $vr1, $vr0
+; LA32-NEXT: vext2xv.hu.bu $xr0, $xr0
; LA32-NEXT: vst $vr0, $a1, 0
; LA32-NEXT: ret
;
@@ -252,8 +166,7 @@ define void @load_zext_8i8_to_8i16(ptr %ptr, ptr %dst) {
; LA64: # %bb.0: # %entry
; LA64-NEXT: ld.d $a0, $a0, 0
; LA64-NEXT: vinsgr2vr.d $vr0, $a0, 0
-; LA64-NEXT: vrepli.b $vr1, 0
-; LA64-NEXT: vilvl.b $vr0, $vr1, $vr0
+; LA64-NEXT: vext2xv.hu.bu $xr0, $xr0
; LA64-NEXT: vst $vr0, $a1, 0
; LA64-NEXT: ret
entry:
@@ -270,80 +183,16 @@ define void @load_zext_8i8_to_8i32(ptr %ptr, ptr %dst) {
; LA32-NEXT: ld.w $a0, $a0, 4
; LA32-NEXT: vinsgr2vr.w $vr0, $a2, 0
; LA32-NEXT: vinsgr2vr.w $vr0, $a0, 1
-; LA32-NEXT: vpickve2gr.b $a0, $vr0, 0
-; LA32-NEXT: xvrepli.b $xr1, 0
-; LA32-NEXT: xvreplgr2vr.b $xr2, $a0
-; LA32-NEXT: xvpermi.q $xr2, $xr1, 18
-; LA32-NEXT: xvextrins.b $xr1, $xr2, 0
-; LA32-NEXT: vpickve2gr.b $a0, $vr0, 1
-; LA32-NEXT: xvreplgr2vr.b $xr2, $a0
-; LA32-NEXT: xvpermi.q $xr2, $xr1, 18
-; LA32-NEXT: xvextrins.b $xr1, $xr2, 68
-; LA32-NEXT: vpickve2gr.b $a0, $vr0, 2
-; LA32-NEXT: xvreplgr2vr.b $xr2, $a0
-; LA32-NEXT: xvpermi.q $xr2, $xr1, 18
-; LA32-NEXT: xvextrins.b $xr1, $xr2, 136
-; LA32-NEXT: vpickve2gr.b $a0, $vr0, 3
-; LA32-NEXT: xvreplgr2vr.b $xr2, $a0
-; LA32-NEXT: xvpermi.q $xr2, $xr1, 18
-; LA32-NEXT: xvextrins.b $xr1, $xr2, 204
-; LA32-NEXT: vpickve2gr.b $a0, $vr0, 4
-; LA32-NEXT: xvreplgr2vr.b $xr2, $a0
-; LA32-NEXT: xvpermi.q $xr2, $xr1, 48
-; LA32-NEXT: xvextrins.b $xr1, $xr2, 0
-; LA32-NEXT: vpickve2gr.b $a0, $vr0, 5
-; LA32-NEXT: xvreplgr2vr.b $xr2, $a0
-; LA32-NEXT: xvpermi.q $xr2, $xr1, 48
-; LA32-NEXT: xvextrins.b $xr1, $xr2, 68
-; LA32-NEXT: vpickve2gr.b $a0, $vr0, 6
-; LA32-NEXT: xvreplgr2vr.b $xr2, $a0
-; LA32-NEXT: xvpermi.q $xr2, $xr1, 48
-; LA32-NEXT: xvextrins.b $xr1, $xr2, 136
-; LA32-NEXT: vpickve2gr.b $a0, $vr0, 7
-; LA32-NEXT: xvreplgr2vr.b $xr0, $a0
-; LA32-NEXT: xvpermi.q $xr0, $xr1, 48
-; LA32-NEXT: xvextrins.b $xr1, $xr0, 204
-; LA32-NEXT: xvst $xr1, $a1, 0
+; LA32-NEXT: vext2xv.wu.bu $xr0, $xr0
+; LA32-NEXT: xvst $xr0, $a1, 0
; LA32-NEXT: ret
;
; LA64-LABEL: load_zext_8i8_to_8i32:
; LA64: # %bb.0: # %entry
; LA64-NEXT: ld.d $a0, $a0, 0
; LA64-NEXT: vinsgr2vr.d $vr0, $a0, 0
-; LA64-NEXT: vpickve2gr.b $a0, $vr0, 0
-; LA64-NEXT: xvrepli.b $xr1, 0
-; LA64-NEXT: xvreplgr2vr.b $xr2, $a0
-; LA64-NEXT: xvpermi.q $xr2, $xr1, 18
-; LA64-NEXT: xvextrins.b $xr1, $xr2, 0
-; LA64-NEXT: vpickve2gr.b $a0, $vr0, 1
-; LA64-NEXT: xvreplgr2vr.b $xr2, $a0
-; LA64-NEXT: xvpermi.q $xr2, $xr1, 18
-; LA64-NEXT: xvextrins.b $xr1, $xr2, 68
-; LA64-NEXT: vpickve2gr.b $a0, $vr0, 2
-; LA64-NEXT: xvreplgr2vr.b $xr2, $a0
-; LA64-NEXT: xvpermi.q $xr2, $xr1, 18
-; LA64-NEXT: xvextrins.b $xr1, $xr2, 136
-; LA64-NEXT: vpickve2gr.b $a0, $vr0, 3
-; LA64-NEXT: xvreplgr2vr.b $xr2, $a0
-; LA64-NEXT: xvpermi.q $xr2, $xr1, 18
-; LA64-NEXT: xvextrins.b $xr1, $xr2, 204
-; LA64-NEXT: vpickve2gr.b $a0, $vr0, 4
-; LA64-NEXT: xvreplgr2vr.b $xr2, $a0
-; LA64-NEXT: xvpermi.q $xr2, $xr1, 48
-; LA64-NEXT: xvextrins.b $xr1, $xr2, 0
-; LA64-NEXT: vpickve2gr.b $a0, $vr0, 5
-; LA64-NEXT: xvreplgr2vr.b $xr2, $a0
-; LA64-NEXT: xvpermi.q $xr2, $xr1, 48
-; LA64-NEXT: xvextrins.b $xr1, $xr2, 68
-; LA64-NEXT: vpickve2gr.b $a0, $vr0, 6
-; LA64-NEXT: xvreplgr2vr.b $xr2, $a0
-; LA64-NEXT: xvpermi.q $xr2, $xr1, 48
-; LA64-NEXT: xvextrins.b $xr1, $xr2, 136
-; LA64-NEXT: vpickve2gr.b $a0, $vr0, 7
-; LA64-NEXT: xvreplgr2vr.b $xr0, $a0
-; LA64-NEXT: xvpermi.q $xr0, $xr1, 48
-; LA64-NEXT: xvextrins.b $xr1, $xr0, 204
-; LA64-NEXT: xvst $xr1, $a1, 0
+; LA64-NEXT: vext2xv.wu.bu $xr0, $xr0
+; LA64-NEXT: xvst $xr0, $a1, 0
; LA64-NEXT: ret
entry:
%A = load <8 x i8>, ptr %ptr
@@ -356,46 +205,15 @@ define void @load_zext_8i8_to_8i64(ptr %ptr, ptr %dst) {
; LA32-LABEL: load_zext_8i8_to_8i64:
; LA32: # %bb.0: # %entry
; LA32-NEXT: ld.w $a2, $a0, 0
-; LA32-NEXT: vinsgr2vr.w $vr0, $a2, 0
; LA32-NEXT: ld.w $a0, $a0, 4
-; LA32-NEXT: vpickve2gr.b $a2, $vr0, 0
-; LA32-NEXT: vpickve2gr.b $a3, $vr0, 1
-; LA32-NEXT: vpickve2gr.b $a4, $vr0, 2
-; LA32-NEXT: vpickve2gr.b $a5, $vr0, 3
+; LA32-NEXT: vinsgr2vr.w $vr0, $a2, 0
+; LA32-NEXT: vext2xv.du.bu $xr1, $xr0
+; LA32-NEXT: # kill: def $vr0 killed $vr0 killed $xr0
; LA32-NEXT: vinsgr2vr.w $vr0, $a0, 1
; LA32-NEXT: vreplvei.w $vr0, $vr0, 1
-; LA32-NEXT: vpickve2gr.b $a0, $vr0, 0
-; LA32-NEXT: xvrepli.b $xr1, 0
-; LA32-NEXT: xvreplgr2vr.b $xr2, $a0
-; LA32-NEXT: xvpermi.q $xr2, $xr1, 18
-; LA32-NEXT: xvori.b $xr3, $xr1, 0
-; LA32-NEXT: xvextrins.b $xr3, $xr2, 0
-; LA32-NEXT: vpickve2gr.b $a0, $vr0, 1
-; LA32-NEXT: xvreplgr2vr.b $xr2, $a0
-; LA32-NEXT: vpickve2gr.b $a0, $vr0, 2
-; LA32-NEXT: xvreplgr2vr.b $xr4, $a0
-; LA32-NEXT: vpickve2gr.b $a0, $vr0, 3
-; LA32-NEXT: xvreplgr2vr.b $xr0, $a0
-; LA32-NEXT: xvpermi.q $xr2, $xr3, 18
-; LA32-NEXT: xvextrins.b $xr3, $xr2, 136
-; LA32-NEXT: xvreplgr2vr.b $xr2, $a2
-; LA32-NEXT: xvpermi.q $xr4, $xr3, 48
-; LA32-NEXT: xvextrins.b $xr3, $xr4, 0
-; LA32-NEXT: xvreplgr2vr.b $xr4, $a3
-; LA32-NEXT: xvpermi.q $xr0, $xr3, 48
-; LA32-NEXT: xvextrins.b $xr3, $xr0, 136
-; LA32-NEXT: xvpermi.q $xr2, $xr1, 18
-; LA32-NEXT: xvextrins.b $xr1, $xr2, 0
-; LA32-NEXT: xvpermi.q $xr4, $xr1, 18
-; LA32-NEXT: xvextrins.b $xr1, $xr4, 136
-; LA32-NEXT: xvreplgr2vr.b $xr0, $a4
-; LA32-NEXT: xvpermi.q $xr0, $xr1, 48
-; LA32-NEXT: xvextrins.b $xr1, $xr0, 0
-; LA32-NEXT: xvreplgr2vr.b $xr0, $a5
-; LA32-NEXT: xvpermi.q $xr0, $xr1, 48
-; LA32-NEXT: xvextrins.b $xr1, $xr0, 136
+; LA32-NEXT: vext2xv.du.bu $xr0, $xr0
; LA32-NEXT: xvst $xr1, $a1, 0
-; LA32-NEXT: xvst $xr3, $a1, 32
+; LA32-NEXT: xvst $xr0, $a1, 32
; LA32-NEXT: ret
;
; LA64-LABEL: load_zext_8i8_to_8i64:
@@ -403,42 +221,10 @@ define void @load_zext_8i8_to_8i64(ptr %ptr, ptr %dst) {
; LA64-NEXT: ld.d $a0, $a0, 0
; LA64-NEXT: vinsgr2vr.d $vr0, $a0, 0
; LA64-NEXT: vsrli.d $vr1, $vr0, 32
-; LA64-NEXT: vpickve2gr.b $a0, $vr1, 0
-; LA64-NEXT: xvrepli.b $xr2, 0
-; LA64-NEXT: xvreplgr2vr.b $xr3, $a0
-; LA64-NEXT: xvpermi.q $xr3, $xr2, 18
-; LA64-NEXT: xvori.b $xr4, $xr2, 0
-; LA64-NEXT: xvextrins.b $xr4, $xr3, 0
-; LA64-NEXT: vpickve2gr.b $a0, $vr1, 1
-; LA64-NEXT: xvreplgr2vr.b $xr3, $a0
-; LA64-NEXT: xvpermi.q $xr3, $xr4, 18
-; LA64-NEXT: xvextrins.b $xr4, $xr3, 136
-; LA64-NEXT: vpickve2gr.b $a0, $vr1, 2
-; LA64-NEXT: xvreplgr2vr.b $xr3, $a0
-; LA64-NEXT: xvpermi.q $xr3, $xr4, 48
-; LA64-NEXT: xvextrins.b $xr4, $xr3, 0
-; LA64-NEXT: vpickve2gr.b $a0, $vr1, 3
-; LA64-NEXT: xvreplgr2vr.b $xr1, $a0
-; LA64-NEXT: xvpermi.q $xr1, $xr4, 48
-; LA64-NEXT: xvextrins.b $xr4, $xr1, 136
-; LA64-NEXT: vpickve2gr.b $a0, $vr0, 0
-; LA64-NEXT: xvreplgr2vr.b $xr1, $a0
-; LA64-NEXT: xvpermi.q $xr1, $xr2, 18
-; LA64-NEXT: xvextrins.b $xr2, $xr1, 0
-; LA64-NEXT: vpickve2gr.b $a0, $vr0, 1
-; LA64-NEXT: xvreplgr2vr.b $xr1, $a0
-; LA64-NEXT: xvpermi.q $xr1, $xr2, 18
-; LA64-NEXT: xvextrins.b $xr2, $xr1, 136
-; LA64-NEXT: vpickve2gr.b $a0, $vr0, 2
-; LA64-NEXT: xvreplgr2vr.b $xr1, $a0
-; LA64-NEXT: xvpermi.q $xr1, $xr2, 48
-; LA64-NEXT: xvextrins.b $xr2, $xr1, 0
-; LA64-NEXT: vpickve2gr.b $a0, $vr0, 3
-; LA64-NEXT: xvreplgr2vr.b $xr0, $a0
-; LA64-NEXT: xvpermi.q $xr0, $xr2, 48
-; LA64-NEXT: xvextrins.b $xr2, $xr0, 136
-; LA64-NEXT: xvst $xr2, $a1, 0
-; LA64-NEXT: xvst $xr4, $a1, 32
+; LA64-NEXT: vext2xv.du.bu $xr1, $xr1
+; LA64-NEXT: vext2xv.du.bu $xr0, $xr0
+; LA64-NEXT: xvst $xr0, $a1, 0
+; LA64-NEXT: xvst $xr1, $a1, 32
; LA64-NEXT: ret
entry:
%A = load <8 x i8>, ptr %ptr
@@ -448,67 +234,12 @@ entry:
}
define void @load_zext_8i16_to_8i32(ptr %ptr, ptr %dst) {
-; LA32-LABEL: load_zext_8i16_to_8i32:
-; LA32: # %bb.0: # %entry
-; LA32-NEXT: vld $vr0, $a0, 0
-; LA32-NEXT: vpickve2gr.h $a0, $vr0, 4
-; LA32-NEXT: bstrpick.w $a0, $a0, 15, 0
-; LA32-NEXT: vinsgr2vr.w $vr1, $a0, 0
-; LA32-NEXT: vpickve2gr.h $a0, $vr0, 5
-; LA32-NEXT: bstrpick.w $a0, $a0, 15, 0
-; LA32-NEXT: vinsgr2vr.w $vr1, $a0, 1
-; LA32-NEXT: vpickve2gr.h $a0, $vr0, 6
-; LA32-NEXT: bstrpick.w $a0, $a0, 15, 0
-; LA32-NEXT: vinsgr2vr.w $vr1, $a0, 2
-; LA32-NEXT: vpickve2gr.h $a0, $vr0, 7
-; LA32-NEXT: bstrpick.w $a0, $a0, 15, 0
-; LA32-NEXT: vinsgr2vr.w $vr1, $a0, 3
-; LA32-NEXT: vpickve2gr.h $a0, $vr0, 0
-; LA32-NEXT: bstrpick.w $a0, $a0, 15, 0
-; LA32-NEXT: vinsgr2vr.w $vr2, $a0, 0
-; LA32-NEXT: vpickve2gr.h $a0, $vr0, 1
-; LA32-NEXT: bstrpick.w $a0, $a0, 15, 0
-; LA32-NEXT: vinsgr2vr.w $vr2, $a0, 1
-; LA32-NEXT: vpickve2gr.h $a0, $vr0, 2
-; LA32-NEXT: bstrpick.w $a0, $a0, 15, 0
-; LA32-NEXT: vinsgr2vr.w $vr2, $a0, 2
-; LA32-NEXT: vpickve2gr.h $a0, $vr0, 3
-; LA32-NEXT: bstrpick.w $a0, $a0, 15, 0
-; LA32-NEXT: vinsgr2vr.w $vr2, $a0, 3
-; LA32-NEXT: xvpermi.q $xr2, $xr1, 2
-; LA32-NEXT: xvst $xr2, $a1, 0
-; LA32-NEXT: ret
-;
-; LA64-LABEL: load_zext_8i16_to_8i32:
-; LA64: # %bb.0: # %entry
-; LA64-NEXT: vld $vr0, $a0, 0
-; LA64-NEXT: vpickve2gr.h $a0, $vr0, 4
-; LA64-NEXT: bstrpick.d $a0, $a0, 15, 0
-; LA64-NEXT: vinsgr2vr.w $vr1, $a0, 0
-; LA64-NEXT: vpickve2gr.h $a0, $vr0, 5
-; LA64-NEXT: bstrpick.d $a0, $a0, 15, 0
-; LA64-NEXT: vinsgr2vr.w $vr1, $a0, 1
-; LA64-NEXT: vpickve2gr.h $a0, $vr0, 6
-; LA64-NEXT: bstrpick.d $a0, $a0, 15, 0
-; LA64-NEXT: vinsgr2vr.w $vr1, $a0, 2
-; LA64-NEXT: vpickve2gr.h $a0, $vr0, 7
-; LA64-NEXT: bstrpick.d $a0, $a0, 15, 0
-; LA64-NEXT: vinsgr2vr.w $vr1, $a0, 3
-; LA64-NEXT: vpickve2gr.h $a0, $vr0, 0
-; LA64-NEXT: bstrpick.d $a0, $a0, 15, 0
-; LA64-NEXT: vinsgr2vr.w $vr2, $a0, 0
-; LA64-NEXT: vpickve2gr.h $a0, $vr0, 1
-; LA64-NEXT: bstrpick.d $a0, $a0, 15, 0
-; LA64-NEXT: vinsgr2vr.w $vr2, $a0, 1
-; LA64-NEXT: vpickve2gr.h $a0, $vr0, 2
-; LA64-NEXT: bstrpick.d $a0, $a0, 15, 0
-; LA64-NEXT: vinsgr2vr.w $vr2, $a0, 2
-; LA64-NEXT: vpickve2gr.h $a0, $vr0, 3
-; LA64-NEXT: bstrpick.d $a0, $a0, 15, 0
-; LA64-NEXT: vinsgr2vr.w $vr2, $a0, 3
-; LA64-NEXT: xvpermi.q $xr2, $xr1, 2
-; LA64-NEXT: xvst $xr2, $a1, 0
-; LA64-NEXT: ret
+; CHECK-LABEL: load_zext_8i16_to_8i32:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vld $vr0, $a0, 0
+; CHECK-NEXT: vext2xv.wu.hu $xr0, $xr0
+; CHECK-NEXT: xvst $xr0, $a1, 0
+; CHECK-NEXT: ret
entry:
%A = load <8 x i16>, ptr %ptr
%B = zext <8 x i16> %A to <8 x i32>
@@ -517,79 +248,16 @@ entry:
}
define void @load_zext_8i16_to_8i64(ptr %ptr, ptr %dst) {
-; LA32-LABEL: load_zext_8i16_to_8i64:
-; LA32: # %bb.0: # %entry
-; LA32-NEXT: vld $vr0, $a0, 0
-; LA32-NEXT: vpickve2gr.h $a0, $vr0, 0
-; LA32-NEXT: xvrepli.b $xr1, 0
-; LA32-NEXT: bstrpick.w $a0, $a0, 15, 0
-; LA32-NEXT: xvori.b $xr2, $xr1, 0
-; LA32-NEXT: xvinsgr2vr.w $xr2, $a0, 0
-; LA32-NEXT: vpickve2gr.h $a0, $vr0, 1
-; LA32-NEXT: bstrpick.w $a0, $a0, 15, 0
-; LA32-NEXT: xvinsgr2vr.w $xr2, $a0, 2
-; LA32-NEXT: vpickve2gr.h $a0, $vr0, 2
-; LA32-NEXT: bstrpick.w $a0, $a0, 15, 0
-; LA32-NEXT: xvinsgr2vr.w $xr2, $a0, 4
-; LA32-NEXT: vpickve2gr.h $a0, $vr0, 3
-; LA32-NEXT: bstrpick.w $a0, $a0, 15, 0
-; LA32-NEXT: xvinsgr2vr.w $xr2, $a0, 6
-; LA32-NEXT: vpickve2gr.h $a0, $vr0, 4
-; LA32-NEXT: bstrpick.w $a0, $a0, 15, 0
-; LA32-NEXT: xvinsgr2vr.w $xr1, $a0, 0
-; LA32-NEXT: vpickve2gr.h $a0, $vr0, 5
-; LA32-NEXT: bstrpick.w $a0, $a0, 15, 0
-; LA32-NEXT: xvinsgr2vr.w $xr1, $a0, 2
-; LA32-NEXT: vpickve2gr.h $a0, $vr0, 6
-; LA32-NEXT: bstrpick.w $a0, $a0, 15, 0
-; LA32-NEXT: xvinsgr2vr.w $xr1, $a0, 4
-; LA32-NEXT: vpickve2gr.h $a0, $vr0, 7
-; LA32-NEXT: bstrpick.w $a0, $a0, 15, 0
-; LA32-NEXT: xvinsgr2vr.w $xr1, $a0, 6
-; LA32-NEXT: xvst $xr1, $a1, 32
-; LA32-NEXT: xvst $xr2, $a1, 0
-; LA32-NEXT: ret
-;
-; LA64-LABEL: load_zext_8i16_to_8i64:
-; LA64: # %bb.0: # %entry
-; LA64-NEXT: vld $vr0, $a0, 0
-; LA64-NEXT: vpickve2gr.h $a0, $vr0, 2
-; LA64-NEXT: bstrpick.d $a0, $a0, 15, 0
-; LA64-NEXT: bstrpick.d $a0, $a0, 15, 0
-; LA64-NEXT: vinsgr2vr.d $vr1, $a0, 0
-; LA64-NEXT: vpickve2gr.h $a0, $vr0, 3
-; LA64-NEXT: bstrpick.d $a0, $a0, 15, 0
-; LA64-NEXT: bstrpick.d $a0, $a0, 15, 0
-; LA64-NEXT: vinsgr2vr.d $vr1, $a0, 1
-; LA64-NEXT: vpickve2gr.h $a0, $vr0, 0
-; LA64-NEXT: bstrpick.d $a0, $a0, 15, 0
-; LA64-NEXT: bstrpick.d $a0, $a0, 15, 0
-; LA64-NEXT: vinsgr2vr.d $vr2, $a0, 0
-; LA64-NEXT: vpickve2gr.h $a0, $vr0, 1
-; LA64-NEXT: bstrpick.d $a0, $a0, 15, 0
-; LA64-NEXT: bstrpick.d $a0, $a0, 15, 0
-; LA64-NEXT: vinsgr2vr.d $vr2, $a0, 1
-; LA64-NEXT: xvpermi.q $xr2, $xr1, 2
-; LA64-NEXT: vpickve2gr.h $a0, $vr0, 6
-; LA64-NEXT: bstrpick.d $a0, $a0, 15, 0
-; LA64-NEXT: bstrpick.d $a0, $a0, 15, 0
-; LA64-NEXT: vinsgr2vr.d $vr1, $a0, 0
-; LA64-NEXT: vpickve2gr.h $a0, $vr0, 7
-; LA64-NEXT: bstrpick.d $a0, $a0, 15, 0
-; LA64-NEXT: bstrpick.d $a0, $a0, 15, 0
-; LA64-NEXT: vinsgr2vr.d $vr1, $a0, 1
-; LA64-NEXT: vpickve2gr.h $a0, $vr0, 4
-; LA64-NEXT: bstrpick.d $a0, $a0, 15, 0
-; LA64-NEXT: bstrpick.d $a0, $a0, 15, 0
-; LA64-NEXT: vinsgr2vr.d $vr3, $a0, 0
-; LA64-NEXT: vpickve2gr.h $a0, $vr0, 5
-; LA64-NEXT: bstrpick.d $a0, $a0, 15, 0
-; LA64-NEXT: bstrpick.d $a0, $a0, 15, 0
-; LA64-NEXT: vinsgr2vr.d $vr3, $a0, 1
-; LA64-NEXT: xvpermi.q $xr3, $xr1, 2
-; LA64-NEXT: xvst $xr3, $a1, 32
-; LA64-NEXT: xvst $xr2, $a1, 0
-; LA64-NEXT: ret
+; CHECK-LABEL: load_zext_8i16_to_8i64:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vld $vr0, $a0, 0
+; CHECK-NEXT: vext2xv.wu.hu $xr0, $xr0
+; CHECK-NEXT: xvpermi.q $xr1, $xr0, 1
+; CHECK-NEXT: vext2xv.du.wu $xr1, $xr1
+; CHECK-NEXT: vext2xv.du.wu $xr0, $xr0
+; CHECK-NEXT: xvst $xr0, $a1, 0
+; CHECK-NEXT: xvst $xr1, $a1, 32
+; CHECK-NEXT: ret
entry:
%A = load <8 x i16>, ptr %ptr
%B = zext <8 x i16> %A to <8 x i64>
@@ -598,65 +266,15 @@ entry:
}
define void @load_zext_8i32_to_8i64(ptr %ptr, ptr %dst) {
-; LA32-LABEL: load_zext_8i32_to_8i64:
-; LA32: # %bb.0: # %entry
-; LA32-NEXT: xvld $xr0, $a0, 0
-; LA32-NEXT: xvpermi.q $xr1, $xr0, 1
-; LA32-NEXT: xvrepli.b $xr2, 0
-; LA32-NEXT: vpickve2gr.w $a0, $vr1, 0
-; LA32-NEXT: xvori.b $xr3, $xr2, 0
-; LA32-NEXT: xvinsgr2vr.w $xr3, $a0, 0
-; LA32-NEXT: vpickve2gr.w $a0, $vr1, 1
-; LA32-NEXT: xvinsgr2vr.w $xr3, $a0, 2
-; LA32-NEXT: vpickve2gr.w $a0, $vr1, 2
-; LA32-NEXT: xvinsgr2vr.w $xr3, $a0, 4
-; LA32-NEXT: vpickve2gr.w $a0, $vr1, 3
-; LA32-NEXT: xvinsgr2vr.w $xr3, $a0, 6
-; LA32-NEXT: vpickve2gr.w $a0, $vr0, 0
-; LA32-NEXT: xvinsgr2vr.w $xr2, $a0, 0
-; LA32-NEXT: vpickve2gr.w $a0, $vr0, 1
-; LA32-NEXT: xvinsgr2vr.w $xr2, $a0, 2
-; LA32-NEXT: vpickve2gr.w $a0, $vr0, 2
-; LA32-NEXT: xvinsgr2vr.w $xr2, $a0, 4
-; LA32-NEXT: vpickve2gr.w $a0, $vr0, 3
-; LA32-NEXT: xvinsgr2vr.w $xr2, $a0, 6
-; LA32-NEXT: xvst $xr2, $a1, 0
-; LA32-NEXT: xvst $xr3, $a1, 32
-; LA32-NEXT: ret
-;
-; LA64-LABEL: load_zext_8i32_to_8i64:
-; LA64: # %bb.0: # %entry
-; LA64-NEXT: xvld $xr0, $a0, 0
-; LA64-NEXT: xvpermi.q $xr1, $xr0, 1
-; LA64-NEXT: vpickve2gr.w $a0, $vr1, 2
-; LA64-NEXT: bstrpick.d $a0, $a0, 31, 0
-; LA64-NEXT: vinsgr2vr.d $vr2, $a0, 0
-; LA64-NEXT: vpickve2gr.w $a0, $vr1, 3
-; LA64-NEXT: bstrpick.d $a0, $a0, 31, 0
-; LA64-NEXT: vinsgr2vr.d $vr2, $a0, 1
-; LA64-NEXT: vpickve2gr.w $a0, $vr1, 0
-; LA64-NEXT: bstrpick.d $a0, $a0, 31, 0
-; LA64-NEXT: vinsgr2vr.d $vr3, $a0, 0
-; LA64-NEXT: vpickve2gr.w $a0, $vr1, 1
-; LA64-NEXT: bstrpick.d $a0, $a0, 31, 0
-; LA64-NEXT: vinsgr2vr.d $vr3, $a0, 1
-; LA64-NEXT: xvpermi.q $xr3, $xr2, 2
-; LA64-NEXT: vpickve2gr.w $a0, $vr0, 2
-; LA64-NEXT: bstrpick.d $a0, $a0, 31, 0
-; LA64-NEXT: vinsgr2vr.d $vr1, $a0, 0
-; LA64-NEXT: vpickve2gr.w $a0, $vr0, 3
-; LA64-NEXT: bstrpick.d $a0, $a0, 31, 0
-; LA64-NEXT: vinsgr2vr.d $vr1, $a0, 1
-; LA64-NEXT: vpickve2gr.w $a0, $vr0, 0
-; LA64-NEXT: bstrpick.d $a0, $a0, 31, 0
-; LA64-NEXT: vinsgr2vr.d $vr2, $a0, 0
-; LA64-NEXT: vpickve2gr.w $a0, $vr0, 1
-; LA64-NEXT: bstrpick.d $a0, $a0, 31, 0
-; LA64-NEXT: vinsgr2vr.d $vr2, $a0, 1
-; LA64-NEXT: xvpermi.q $xr2, $xr1, 2
-; LA64-NEXT: xvst $xr2, $a1, 0
-; LA64-NEXT: xvst $xr3, $a1, 32
-; LA64-NEXT: ret
+; CHECK-LABEL: load_zext_8i32_to_8i64:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: xvld $xr0, $a0, 0
+; CHECK-NEXT: xvpermi.q $xr1, $xr0, 1
+; CHECK-NEXT: vext2xv.du.wu $xr1, $xr1
+; CHECK-NEXT: vext2xv.du.wu $xr0, $xr0
+; CHECK-NEXT: xvst $xr0, $a1, 0
+; CHECK-NEXT: xvst $xr1, $a1, 32
+; CHECK-NEXT: ret
entry:
%A = load <8 x i32>, ptr %ptr
%B = zext <8 x i32> %A to <8 x i64>
@@ -668,56 +286,8 @@ define void @load_zext_16i8_to_16i16(ptr %ptr, ptr %dst) {
; CHECK-LABEL: load_zext_16i8_to_16i16:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vld $vr0, $a0, 0
-; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 8
-; CHECK-NEXT: andi $a0, $a0, 255
-; CHECK-NEXT: vinsgr2vr.h $vr1, $a0, 0
-; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 9
-; CHECK-NEXT: andi $a0, $a0, 255
-; CHECK-NEXT: vinsgr2vr.h $vr1, $a0, 1
-; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 10
-; CHECK-NEXT: andi $a0, $a0, 255
-; CHECK-NEXT: vinsgr2vr.h $vr1, $a0, 2
-; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 11
-; CHECK-NEXT: andi $a0, $a0, 255
-; CHECK-NEXT: vinsgr2vr.h $vr1, $a0, 3
-; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 12
-; CHECK-NEXT: andi $a0, $a0, 255
-; CHECK-NEXT: vinsgr2vr.h $vr1, $a0, 4
-; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 13
-; CHECK-NEXT: andi $a0, $a0, 255
-; CHECK-NEXT: vinsgr2vr.h $vr1, $a0, 5
-; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 14
-; CHECK-NEXT: andi $a0, $a0, 255
-; CHECK-NEXT: vinsgr2vr.h $vr1, $a0, 6
-; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 15
-; CHECK-NEXT: andi $a0, $a0, 255
-; CHECK-NEXT: vinsgr2vr.h $vr1, $a0, 7
-; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 0
-; CHECK-NEXT: andi $a0, $a0, 255
-; CHECK-NEXT: vinsgr2vr.h $vr2, $a0, 0
-; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 1
-; CHECK-NEXT: andi $a0, $a0, 255
-; CHECK-NEXT: vinsgr2vr.h $vr2, $a0, 1
-; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 2
-; CHECK-NEXT: andi $a0, $a0, 255
-; CHECK-NEXT: vinsgr2vr.h $vr2, $a0, 2
-; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 3
-; CHECK-NEXT: andi $a0, $a0, 255
-; CHECK-NEXT: vinsgr2vr.h $vr2, $a0, 3
-; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 4
-; CHECK-NEXT: andi $a0, $a0, 255
-; CHECK-NEXT: vinsgr2vr.h $vr2, $a0, 4
-; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 5
-; CHECK-NEXT: andi $a0, $a0, 255
-; CHECK-NEXT: vinsgr2vr.h $vr2, $a0, 5
-; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 6
-; CHECK-NEXT: andi $a0, $a0, 255
-; CHECK-NEXT: vinsgr2vr.h $vr2, $a0, 6
-; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 7
-; CHECK-NEXT: andi $a0, $a0, 255
-; CHECK-NEXT: vinsgr2vr.h $vr2, $a0, 7
-; CHECK-NEXT: xvpermi.q $xr2, $xr1, 2
-; CHECK-NEXT: xvst $xr2, $a1, 0
+; CHECK-NEXT: vext2xv.hu.bu $xr0, $xr0
+; CHECK-NEXT: xvst $xr0, $a1, 0
; CHECK-NEXT: ret
entry:
%A = load <16 x i8>, ptr %ptr
@@ -730,58 +300,12 @@ define void @load_zext_16i8_to_16i32(ptr %ptr, ptr %dst) {
; CHECK-LABEL: load_zext_16i8_to_16i32:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vld $vr0, $a0, 0
-; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 4
-; CHECK-NEXT: andi $a0, $a0, 255
-; CHECK-NEXT: vinsgr2vr.w $vr1, $a0, 0
-; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 5
-; CHECK-NEXT: andi $a0, $a0, 255
-; CHECK-NEXT: vinsgr2vr.w $vr1, $a0, 1
-; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 6
-; CHECK-NEXT: andi $a0, $a0, 255
-; CHECK-NEXT: vinsgr2vr.w $vr1, $a0, 2
-; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 7
-; CHECK-NEXT: andi $a0, $a0, 255
-; CHECK-NEXT: vinsgr2vr.w $vr1, $a0, 3
-; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 0
-; CHECK-NEXT: andi $a0, $a0, 255
-; CHECK-NEXT: vinsgr2vr.w $vr2, $a0, 0
-; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 1
-; CHECK-NEXT: andi $a0, $a0, 255
-; CHECK-NEXT: vinsgr2vr.w $vr2, $a0, 1
-; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 2
-; CHECK-NEXT: andi $a0, $a0, 255
-; CHECK-NEXT: vinsgr2vr.w $vr2, $a0, 2
-; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 3
-; CHECK-NEXT: andi $a0, $a0, 255
-; CHECK-NEXT: vinsgr2vr.w $vr2, $a0, 3
-; CHECK-NEXT: xvpermi.q $xr2, $xr1, 2
-; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 12
-; CHECK-NEXT: andi $a0, $a0, 255
-; CHECK-NEXT: vinsgr2vr.w $vr1, $a0, 0
-; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 13
-; CHECK-NEXT: andi $a0, $a0, 255
-; CHECK-NEXT: vinsgr2vr.w $vr1, $a0, 1
-; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 14
-; CHECK-NEXT: andi $a0, $a0, 255
-; CHECK-NEXT: vinsgr2vr.w $vr1, $a0, 2
-; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 15
-; CHECK-NEXT: andi $a0, $a0, 255
-; CHECK-NEXT: vinsgr2vr.w $vr1, $a0, 3
-; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 8
-; CHECK-NEXT: andi $a0, $a0, 255
-; CHECK-NEXT: vinsgr2vr.w $vr3, $a0, 0
-; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 9
-; CHECK-NEXT: andi $a0, $a0, 255
-; CHECK-NEXT: vinsgr2vr.w $vr3, $a0, 1
-; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 10
-; CHECK-NEXT: andi $a0, $a0, 255
-; CHECK-NEXT: vinsgr2vr.w $vr3, $a0, 2
-; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 11
-; CHECK-NEXT: andi $a0, $a0, 255
-; CHECK-NEXT: vinsgr2vr.w $vr3, $a0, 3
-; CHECK-NEXT: xvpermi.q $xr3, $xr1, 2
-; CHECK-NEXT: xvst $xr3, $a1, 32
-; CHECK-NEXT: xvst $xr2, $a1, 0
+; CHECK-NEXT: vext2xv.hu.bu $xr0, $xr0
+; CHECK-NEXT: xvpermi.q $xr1, $xr0, 1
+; CHECK-NEXT: vext2xv.wu.hu $xr1, $xr1
+; CHECK-NEXT: vext2xv.wu.hu $xr0, $xr0
+; CHECK-NEXT: xvst $xr0, $a1, 0
+; CHECK-NEXT: xvst $xr1, $a1, 32
; CHECK-NEXT: ret
entry:
%A = load <16 x i8>, ptr %ptr
@@ -791,143 +315,24 @@ entry:
}
define void @load_zext_16i8_to_16i64(ptr %ptr, ptr %dst) {
-; LA32-LABEL: load_zext_16i8_to_16i64:
-; LA32: # %bb.0: # %entry
-; LA32-NEXT: vld $vr1, $a0, 0
-; LA32-NEXT: vpickve2gr.b $a0, $vr1, 0
-; LA32-NEXT: xvrepli.b $xr2, 0
-; LA32-NEXT: andi $a0, $a0, 255
-; LA32-NEXT: xvori.b $xr0, $xr2, 0
-; LA32-NEXT: xvinsgr2vr.w $xr0, $a0, 0
-; LA32-NEXT: vpickve2gr.b $a0, $vr1, 1
-; LA32-NEXT: andi $a0, $a0, 255
-; LA32-NEXT: xvinsgr2vr.w $xr0, $a0, 2
-; LA32-NEXT: vpickve2gr.b $a0, $vr1, 2
-; LA32-NEXT: andi $a0, $a0, 255
-; LA32-NEXT: xvinsgr2vr.w $xr0, $a0, 4
-; LA32-NEXT: vpickve2gr.b $a0, $vr1, 3
-; LA32-NEXT: andi $a0, $a0, 255
-; LA32-NEXT: xvinsgr2vr.w $xr0, $a0, 6
-; LA32-NEXT: vpickve2gr.b $a0, $vr1, 4
-; LA32-NEXT: andi $a0, $a0, 255
-; LA32-NEXT: xvori.b $xr3, $xr2, 0
-; LA32-NEXT: xvinsgr2vr.w $xr3, $a0, 0
-; LA32-NEXT: vpickve2gr.b $a0, $vr1, 5
-; LA32-NEXT: andi $a0, $a0, 255
-; LA32-NEXT: xvinsgr2vr.w $xr3, $a0, 2
-; LA32-NEXT: vpickve2gr.b $a0, $vr1, 6
-; LA32-NEXT: andi $a0, $a0, 255
-; LA32-NEXT: xvinsgr2vr.w $xr3, $a0, 4
-; LA32-NEXT: vpickve2gr.b $a0, $vr1, 7
-; LA32-NEXT: andi $a0, $a0, 255
-; LA32-NEXT: xvinsgr2vr.w $xr3, $a0, 6
-; LA32-NEXT: vpickve2gr.b $a0, $vr1, 8
-; LA32-NEXT: andi $a0, $a0, 255
-; LA32-NEXT: xvori.b $xr4, $xr2, 0
-; LA32-NEXT: xvinsgr2vr.w $xr4, $a0, 0
-; LA32-NEXT: vpickve2gr.b $a0, $vr1, 9
-; LA32-NEXT: andi $a0, $a0, 255
-; LA32-NEXT: xvinsgr2vr.w $xr4, $a0, 2
-; LA32-NEXT: vpickve2gr.b $a0, $vr1, 10
-; LA32-NEXT: andi $a0, $a0, 255
-; LA32-NEXT: xvinsgr2vr.w $xr4, $a0, 4
-; LA32-NEXT: vpickve2gr.b $a0, $vr1, 11
-; LA32-NEXT: andi $a0, $a0, 255
-; LA32-NEXT: xvinsgr2vr.w $xr4, $a0, 6
-; LA32-NEXT: vpickve2gr.b $a0, $vr1, 12
-; LA32-NEXT: andi $a0, $a0, 255
-; LA32-NEXT: xvinsgr2vr.w $xr2, $a0, 0
-; LA32-NEXT: vpickve2gr.b $a0, $vr1, 13
-; LA32-NEXT: andi $a0, $a0, 255
-; LA32-NEXT: xvinsgr2vr.w $xr2, $a0, 2
-; LA32-NEXT: vpickve2gr.b $a0, $vr1, 14
-; LA32-NEXT: andi $a0, $a0, 255
-; LA32-NEXT: xvinsgr2vr.w $xr2, $a0, 4
-; LA32-NEXT: vpickve2gr.b $a0, $vr1, 15
-; LA32-NEXT: andi $a0, $a0, 255
-; LA32-NEXT: xvinsgr2vr.w $xr2, $a0, 6
-; LA32-NEXT: xvst $xr2, $a1, 96
-; LA32-NEXT: xvst $xr4, $a1, 64
-; LA32-NEXT: xvst $xr3, $a1, 32
-; LA32-NEXT: xvst $xr0, $a1, 0
-; LA32-NEXT: ret
-;
-; LA64-LABEL: load_zext_16i8_to_16i64:
-; LA64: # %bb.0: # %entry
-; LA64-NEXT: vld $vr0, $a0, 0
-; LA64-NEXT: vpickve2gr.b $a0, $vr0, 2
-; LA64-NEXT: andi $a0, $a0, 255
-; LA64-NEXT: bstrpick.d $a0, $a0, 15, 0
-; LA64-NEXT: vinsgr2vr.d $vr2, $a0, 0
-; LA64-NEXT: vpickve2gr.b $a0, $vr0, 3
-; LA64-NEXT: andi $a0, $a0, 255
-; LA64-NEXT: bstrpick.d $a0, $a0, 15, 0
-; LA64-NEXT: vinsgr2vr.d $vr2, $a0, 1
-; LA64-NEXT: vpickve2gr.b $a0, $vr0, 0
-; LA64-NEXT: andi $a0, $a0, 255
-; LA64-NEXT: bstrpick.d $a0, $a0, 15, 0
-; LA64-NEXT: vinsgr2vr.d $vr1, $a0, 0
-; LA64-NEXT: vpickve2gr.b $a0, $vr0, 1
-; LA64-NEXT: andi $a0, $a0, 255
-; LA64-NEXT: bstrpick.d $a0, $a0, 15, 0
-; LA64-NEXT: vinsgr2vr.d $vr1, $a0, 1
-; LA64-NEXT: xvpermi.q $xr1, $xr2, 2
-; LA64-NEXT: vpickve2gr.b $a0, $vr0, 6
-; LA64-NEXT: andi $a0, $a0, 255
-; LA64-NEXT: bstrpick.d $a0, $a0, 15, 0
-; LA64-NEXT: vinsgr2vr.d $vr2, $a0, 0
-; LA64-NEXT: vpickve2gr.b $a0, $vr0, 7
-; LA64-NEXT: andi $a0, $a0, 255
-; LA64-NEXT: bstrpick.d $a0, $a0, 15, 0
-; LA64-NEXT: vinsgr2vr.d $vr2, $a0, 1
-; LA64-NEXT: vpickve2gr.b $a0, $vr0, 4
-; LA64-NEXT: andi $a0, $a0, 255
-; LA64-NEXT: bstrpick.d $a0, $a0, 15, 0
-; LA64-NEXT: vinsgr2vr.d $vr3, $a0, 0
-; LA64-NEXT: vpickve2gr.b $a0, $vr0, 5
-; LA64-NEXT: andi $a0, $a0, 255
-; LA64-NEXT: bstrpick.d $a0, $a0, 15, 0
-; LA64-NEXT: vinsgr2vr.d $vr3, $a0, 1
-; LA64-NEXT: xvpermi.q $xr3, $xr2, 2
-; LA64-NEXT: vpickve2gr.b $a0, $vr0, 10
-; LA64-NEXT: andi $a0, $a0, 255
-; LA64-NEXT: bstrpick.d $a0, $a0, 15, 0
-; LA64-NEXT: vinsgr2vr.d $vr2, $a0, 0
-; LA64-NEXT: vpickve2gr.b $a0, $vr0, 11
-; LA64-NEXT: andi $a0, $a0, 255
-; LA64-NEXT: bstrpick.d $a0, $a0, 15, 0
-; LA64-NEXT: vinsgr2vr.d $vr2, $a0, 1
-; LA64-NEXT: vpickve2gr.b $a0, $vr0, 8
-; LA64-NEXT: andi $a0, $a0, 255
-; LA64-NEXT: bstrpick.d $a0, $a0, 15, 0
-; LA64-NEXT: vinsgr2vr.d $vr4, $a0, 0
-; LA64-NEXT: vpickve2gr.b $a0, $vr0, 9
-; LA64-NEXT: andi $a0, $a0, 255
-; LA64-NEXT: bstrpick.d $a0, $a0, 15, 0
-; LA64-NEXT: vinsgr2vr.d $vr4, $a0, 1
-; LA64-NEXT: xvpermi.q $xr4, $xr2, 2
-; LA64-NEXT: vpickve2gr.b $a0, $vr0, 14
-; LA64-NEXT: andi $a0, $a0, 255
-; LA64-NEXT: bstrpick.d $a0, $a0, 15, 0
-; LA64-NEXT: vinsgr2vr.d $vr2, $a0, 0
-; LA64-NEXT: vpickve2gr.b $a0, $vr0, 15
-; LA64-NEXT: andi $a0, $a0, 255
-; LA64-NEXT: bstrpick.d $a0, $a0, 15, 0
-; LA64-NEXT: vinsgr2vr.d $vr2, $a0, 1
-; LA64-NEXT: vpickve2gr.b $a0, $vr0, 12
-; LA64-NEXT: andi $a0, $a0, 255
-; LA64-NEXT: bstrpick.d $a0, $a0, 15, 0
-; LA64-NEXT: vinsgr2vr.d $vr5, $a0, 0
-; LA64-NEXT: vpickve2gr.b $a0, $vr0, 13
-; LA64-NEXT: andi $a0, $a0, 255
-; LA64-NEXT: bstrpick.d $a0, $a0, 15, 0
-; LA64-NEXT: vinsgr2vr.d $vr5, $a0, 1
-; LA64-NEXT: xvpermi.q $xr5, $xr2, 2
-; LA64-NEXT: xvst $xr5, $a1, 96
-; LA64-NEXT: xvst $xr4, $a1, 64
-; LA64-NEXT: xvst $xr3, $a1, 32
-; LA64-NEXT: xvst $xr1, $a1, 0
-; LA64-NEXT: ret
+; CHECK-LABEL: load_zext_16i8_to_16i64:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vld $vr0, $a0, 0
+; CHECK-NEXT: vext2xv.hu.bu $xr0, $xr0
+; CHECK-NEXT: xvpermi.q $xr1, $xr0, 1
+; CHECK-NEXT: vext2xv.wu.hu $xr1, $xr1
+; CHECK-NEXT: xvpermi.q $xr2, $xr1, 1
+; CHECK-NEXT: vext2xv.du.wu $xr2, $xr2
+; CHECK-NEXT: vext2xv.wu.hu $xr0, $xr0
+; CHECK-NEXT: xvpermi.q $xr3, $xr0, 1
+; CHECK-NEXT: vext2xv.du.wu $xr3, $xr3
+; CHECK-NEXT: vext2xv.du.wu $xr1, $xr1
+; CHECK-NEXT: vext2xv.du.wu $xr0, $xr0
+; CHECK-NEXT: xvst $xr0, $a1, 0
+; CHECK-NEXT: xvst $xr1, $a1, 64
+; CHECK-NEXT: xvst $xr3, $a1, 32
+; CHECK-NEXT: xvst $xr2, $a1, 96
+; CHECK-NEXT: ret
entry:
%A = load <16 x i8>, ptr %ptr
%B = zext <16 x i8> %A to <16 x i64>
@@ -936,121 +341,15 @@ entry:
}
define void @load_zext_16i16_to_16i32(ptr %ptr, ptr %dst) {
-; LA32-LABEL: load_zext_16i16_to_16i32:
-; LA32: # %bb.0: # %entry
-; LA32-NEXT: xvld $xr0, $a0, 0
-; LA32-NEXT: xvpermi.q $xr1, $xr0, 1
-; LA32-NEXT: vpickve2gr.h $a0, $vr1, 4
-; LA32-NEXT: bstrpick.w $a0, $a0, 15, 0
-; LA32-NEXT: vinsgr2vr.w $vr2, $a0, 0
-; LA32-NEXT: vpickve2gr.h $a0, $vr1, 5
-; LA32-NEXT: bstrpick.w $a0, $a0, 15, 0
-; LA32-NEXT: vinsgr2vr.w $vr2, $a0, 1
-; LA32-NEXT: vpickve2gr.h $a0, $vr1, 6
-; LA32-NEXT: bstrpick.w $a0, $a0, 15, 0
-; LA32-NEXT: vinsgr2vr.w $vr2, $a0, 2
-; LA32-NEXT: vpickve2gr.h $a0, $vr1, 7
-; LA32-NEXT: bstrpick.w $a0, $a0, 15, 0
-; LA32-NEXT: vinsgr2vr.w $vr2, $a0, 3
-; LA32-NEXT: vpickve2gr.h $a0, $vr1, 0
-; LA32-NEXT: bstrpick.w $a0, $a0, 15, 0
-; LA32-NEXT: vinsgr2vr.w $vr3, $a0, 0
-; LA32-NEXT: vpickve2gr.h $a0, $vr1, 1
-; LA32-NEXT: bstrpick.w $a0, $a0, 15, 0
-; LA32-NEXT: vinsgr2vr.w $vr3, $a0, 1
-; LA32-NEXT: vpickve2gr.h $a0, $vr1, 2
-; LA32-NEXT: bstrpick.w $a0, $a0, 15, 0
-; LA32-NEXT: vinsgr2vr.w $vr3, $a0, 2
-; LA32-NEXT: vpickve2gr.h $a0, $vr1, 3
-; LA32-NEXT: bstrpick.w $a0, $a0, 15, 0
-; LA32-NEXT: vinsgr2vr.w $vr3, $a0, 3
-; LA32-NEXT: xvpermi.q $xr3, $xr2, 2
-; LA32-NEXT: vpickve2gr.h $a0, $vr0, 4
-; LA32-NEXT: bstrpick.w $a0, $a0, 15, 0
-; LA32-NEXT: vinsgr2vr.w $vr1, $a0, 0
-; LA32-NEXT: vpickve2gr.h $a0, $vr0, 5
-; LA32-NEXT: bstrpick.w $a0, $a0, 15, 0
-; LA32-NEXT: vinsgr2vr.w $vr1, $a0, 1
-; LA32-NEXT: vpickve2gr.h $a0, $vr0, 6
-; LA32-NEXT: bstrpick.w $a0, $a0, 15, 0
-; LA32-NEXT: vinsgr2vr.w $vr1, $a0, 2
-; LA32-NEXT: vpickve2gr.h $a0, $vr0, 7
-; LA32-NEXT: bstrpick.w $a0, $a0, 15, 0
-; LA32-NEXT: vinsgr2vr.w $vr1, $a0, 3
-; LA32-NEXT: vpickve2gr.h $a0, $vr0, 0
-; LA32-NEXT: bstrpick.w $a0, $a0, 15, 0
-; LA32-NEXT: vinsgr2vr.w $vr2, $a0, 0
-; LA32-NEXT: vpickve2gr.h $a0, $vr0, 1
-; LA32-NEXT: bstrpick.w $a0, $a0, 15, 0
-; LA32-NEXT: vinsgr2vr.w $vr2, $a0, 1
-; LA32-NEXT: vpickve2gr.h $a0, $vr0, 2
-; LA32-NEXT: bstrpick.w $a0, $a0, 15, 0
-; LA32-NEXT: vinsgr2vr.w $vr2, $a0, 2
-; LA32-NEXT: vpickve2gr.h $a0, $vr0, 3
-; LA32-NEXT: bstrpick.w $a0, $a0, 15, 0
-; LA32-NEXT: vinsgr2vr.w $vr2, $a0, 3
-; LA32-NEXT: xvpermi.q $xr2, $xr1, 2
-; LA32-NEXT: xvst $xr2, $a1, 0
-; LA32-NEXT: xvst $xr3, $a1, 32
-; LA32-NEXT: ret
-;
-; LA64-LABEL: load_zext_16i16_to_16i32:
-; LA64: # %bb.0: # %entry
-; LA64-NEXT: xvld $xr0, $a0, 0
-; LA64-NEXT: xvpermi.q $xr1, $xr0, 1
-; LA64-NEXT: vpickve2gr.h $a0, $vr1, 4
-; LA64-NEXT: bstrpick.d $a0, $a0, 15, 0
-; LA64-NEXT: vinsgr2vr.w $vr2, $a0, 0
-; LA64-NEXT: vpickve2gr.h $a0, $vr1, 5
-; LA64-NEXT: bstrpick.d $a0, $a0, 15, 0
-; LA64-NEXT: vinsgr2vr.w $vr2, $a0, 1
-; LA64-NEXT: vpickve2gr.h $a0, $vr1, 6
-; LA64-NEXT: bstrpick.d $a0, $a0, 15, 0
-; LA64-NEXT: vinsgr2vr.w $vr2, $a0, 2
-; LA64-NEXT: vpickve2gr.h $a0, $vr1, 7
-; LA64-NEXT: bstrpick.d $a0, $a0, 15, 0
-; LA64-NEXT: vinsgr2vr.w $vr2, $a0, 3
-; LA64-NEXT: vpickve2gr.h $a0, $vr1, 0
-; LA64-NEXT: bstrpick.d $a0, $a0, 15, 0
-; LA64-NEXT: vinsgr2vr.w $vr3, $a0, 0
-; LA64-NEXT: vpickve2gr.h $a0, $vr1, 1
-; LA64-NEXT: bstrpick.d $a0, $a0, 15, 0
-; LA64-NEXT: vinsgr2vr.w $vr3, $a0, 1
-; LA64-NEXT: vpickve2gr.h $a0, $vr1, 2
-; LA64-NEXT: bstrpick.d $a0, $a0, 15, 0
-; LA64-NEXT: vinsgr2vr.w $vr3, $a0, 2
-; LA64-NEXT: vpickve2gr.h $a0, $vr1, 3
-; LA64-NEXT: bstrpick.d $a0, $a0, 15, 0
-; LA64-NEXT: vinsgr2vr.w $vr3, $a0, 3
-; LA64-NEXT: xvpermi.q $xr3, $xr2, 2
-; LA64-NEXT: vpickve2gr.h $a0, $vr0, 4
-; LA64-NEXT: bstrpick.d $a0, $a0, 15, 0
-; LA64-NEXT: vinsgr2vr.w $vr1, $a0, 0
-; LA64-NEXT: vpickve2gr.h $a0, $vr0, 5
-; LA64-NEXT: bstrpick.d $a0, $a0, 15, 0
-; LA64-NEXT: vinsgr2vr.w $vr1, $a0, 1
-; LA64-NEXT: vpickve2gr.h $a0, $vr0, 6
-; LA64-NEXT: bstrpick.d $a0, $a0, 15, 0
-; LA64-NEXT: vinsgr2vr.w $vr1, $a0, 2
-; LA64-NEXT: vpickve2gr.h $a0, $vr0, 7
-; LA64-NEXT: bstrpick.d $a0, $a0, 15, 0
-; LA64-NEXT: vinsgr2vr.w $vr1, $a0, 3
-; LA64-NEXT: vpickve2gr.h $a0, $vr0, 0
-; LA64-NEXT: bstrpick.d $a0, $a0, 15, 0
-; LA64-NEXT: vinsgr2vr.w $vr2, $a0, 0
-; LA64-NEXT: vpickve2gr.h $a0, $vr0, 1
-; LA64-NEXT: bstrpick.d $a0, $a0, 15, 0
-; LA64-NEXT: vinsgr2vr.w $vr2, $a0, 1
-; LA64-NEXT: vpickve2gr.h $a0, $vr0, 2
-; LA64-NEXT: bstrpick.d $a0, $a0, 15, 0
-; LA64-NEXT: vinsgr2vr.w $vr2, $a0, 2
-; LA64-NEXT: vpickve2gr.h $a0, $vr0, 3
-; LA64-NEXT: bstrpick.d $a0, $a0, 15, 0
-; LA64-NEXT: vinsgr2vr.w $vr2, $a0, 3
-; LA64-NEXT: xvpermi.q $xr2, $xr1, 2
-; LA64-NEXT: xvst $xr2, $a1, 0
-; LA64-NEXT: xvst $xr3, $a1, 32
-; LA64-NEXT: ret
+; CHECK-LABEL: load_zext_16i16_to_16i32:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: xvld $xr0, $a0, 0
+; CHECK-NEXT: xvpermi.q $xr1, $xr0, 1
+; CHECK-NEXT: vext2xv.wu.hu $xr1, $xr1
+; CHECK-NEXT: vext2xv.wu.hu $xr0, $xr0
+; CHECK-NEXT: xvst $xr0, $a1, 0
+; CHECK-NEXT: xvst $xr1, $a1, 32
+; CHECK-NEXT: ret
entry:
%A = load <16 x i16>, ptr %ptr
%B = zext <16 x i16> %A to <16 x i32>
@@ -1059,145 +358,23 @@ entry:
}
define void @load_zext_16i16_to_16i64(ptr %ptr, ptr %dst) {
-; LA32-LABEL: load_zext_16i16_to_16i64:
-; LA32: # %bb.0: # %entry
-; LA32-NEXT: xvld $xr0, $a0, 0
-; LA32-NEXT: xvpermi.q $xr3, $xr0, 1
-; LA32-NEXT: vpickve2gr.h $a0, $vr3, 0
-; LA32-NEXT: xvrepli.b $xr2, 0
-; LA32-NEXT: bstrpick.w $a0, $a0, 15, 0
-; LA32-NEXT: xvori.b $xr1, $xr2, 0
-; LA32-NEXT: xvinsgr2vr.w $xr1, $a0, 0
-; LA32-NEXT: vpickve2gr.h $a0, $vr3, 1
-; LA32-NEXT: bstrpick.w $a0, $a0, 15, 0
-; LA32-NEXT: xvinsgr2vr.w $xr1, $a0, 2
-; LA32-NEXT: vpickve2gr.h $a0, $vr3, 2
-; LA32-NEXT: bstrpick.w $a0, $a0, 15, 0
-; LA32-NEXT: xvinsgr2vr.w $xr1, $a0, 4
-; LA32-NEXT: vpickve2gr.h $a0, $vr3, 3
-; LA32-NEXT: bstrpick.w $a0, $a0, 15, 0
-; LA32-NEXT: xvinsgr2vr.w $xr1, $a0, 6
-; LA32-NEXT: vpickve2gr.h $a0, $vr3, 4
-; LA32-NEXT: bstrpick.w $a0, $a0, 15, 0
-; LA32-NEXT: xvori.b $xr4, $xr2, 0
-; LA32-NEXT: xvinsgr2vr.w $xr4, $a0, 0
-; LA32-NEXT: vpickve2gr.h $a0, $vr3, 5
-; LA32-NEXT: bstrpick.w $a0, $a0, 15, 0
-; LA32-NEXT: xvinsgr2vr.w $xr4, $a0, 2
-; LA32-NEXT: vpickve2gr.h $a0, $vr3, 6
-; LA32-NEXT: bstrpick.w $a0, $a0, 15, 0
-; LA32-NEXT: xvinsgr2vr.w $xr4, $a0, 4
-; LA32-NEXT: vpickve2gr.h $a0, $vr3, 7
-; LA32-NEXT: bstrpick.w $a0, $a0, 15, 0
-; LA32-NEXT: xvinsgr2vr.w $xr4, $a0, 6
-; LA32-NEXT: vpickve2gr.h $a0, $vr0, 0
-; LA32-NEXT: bstrpick.w $a0, $a0, 15, 0
-; LA32-NEXT: xvori.b $xr3, $xr2, 0
-; LA32-NEXT: xvinsgr2vr.w $xr3, $a0, 0
-; LA32-NEXT: vpickve2gr.h $a0, $vr0, 1
-; LA32-NEXT: bstrpick.w $a0, $a0, 15, 0
-; LA32-NEXT: xvinsgr2vr.w $xr3, $a0, 2
-; LA32-NEXT: vpickve2gr.h $a0, $vr0, 2
-; LA32-NEXT: bstrpick.w $a0, $a0, 15, 0
-; LA32-NEXT: xvinsgr2vr.w $xr3, $a0, 4
-; LA32-NEXT: vpickve2gr.h $a0, $vr0, 3
-; LA32-NEXT: bstrpick.w $a0, $a0, 15, 0
-; LA32-NEXT: xvinsgr2vr.w $xr3, $a0, 6
-; LA32-NEXT: vpickve2gr.h $a0, $vr0, 4
-; LA32-NEXT: bstrpick.w $a0, $a0, 15, 0
-; LA32-NEXT: xvinsgr2vr.w $xr2, $a0, 0
-; LA32-NEXT: vpickve2gr.h $a0, $vr0, 5
-; LA32-NEXT: bstrpick.w $a0, $a0, 15, 0
-; LA32-NEXT: xvinsgr2vr.w $xr2, $a0, 2
-; LA32-NEXT: vpickve2gr.h $a0, $vr0, 6
-; LA32-NEXT: bstrpick.w $a0, $a0, 15, 0
-; LA32-NEXT: xvinsgr2vr.w $xr2, $a0, 4
-; LA32-NEXT: vpickve2gr.h $a0, $vr0, 7
-; LA32-NEXT: bstrpick.w $a0, $a0, 15, 0
-; LA32-NEXT: xvinsgr2vr.w $xr2, $a0, 6
-; LA32-NEXT: xvst $xr2, $a1, 32
-; LA32-NEXT: xvst $xr3, $a1, 0
-; LA32-NEXT: xvst $xr4, $a1, 96
-; LA32-NEXT: xvst $xr1, $a1, 64
-; LA32-NEXT: ret
-;
-; LA64-LABEL: load_zext_16i16_to_16i64:
-; LA64: # %bb.0: # %entry
-; LA64-NEXT: xvld $xr0, $a0, 0
-; LA64-NEXT: xvpermi.q $xr2, $xr0, 1
-; LA64-NEXT: vpickve2gr.h $a0, $vr2, 2
-; LA64-NEXT: bstrpick.d $a0, $a0, 15, 0
-; LA64-NEXT: bstrpick.d $a0, $a0, 15, 0
-; LA64-NEXT: vinsgr2vr.d $vr3, $a0, 0
-; LA64-NEXT: vpickve2gr.h $a0, $vr2, 3
-; LA64-NEXT: bstrpick.d $a0, $a0, 15, 0
-; LA64-NEXT: bstrpick.d $a0, $a0, 15, 0
-; LA64-NEXT: vinsgr2vr.d $vr3, $a0, 1
-; LA64-NEXT: vpickve2gr.h $a0, $vr2, 0
-; LA64-NEXT: bstrpick.d $a0, $a0, 15, 0
-; LA64-NEXT: bstrpick.d $a0, $a0, 15, 0
-; LA64-NEXT: vinsgr2vr.d $vr1, $a0, 0
-; LA64-NEXT: vpickve2gr.h $a0, $vr2, 1
-; LA64-NEXT: bstrpick.d $a0, $a0, 15, 0
-; LA64-NEXT: bstrpick.d $a0, $a0, 15, 0
-; LA64-NEXT: vinsgr2vr.d $vr1, $a0, 1
-; LA64-NEXT: xvpermi.q $xr1, $xr3, 2
-; LA64-NEXT: vpickve2gr.h $a0, $vr2, 6
-; LA64-NEXT: bstrpick.d $a0, $a0, 15, 0
-; LA64-NEXT: bstrpick.d $a0, $a0, 15, 0
-; LA64-NEXT: vinsgr2vr.d $vr3, $a0, 0
-; LA64-NEXT: vpickve2gr.h $a0, $vr2, 7
-; LA64-NEXT: bstrpick.d $a0, $a0, 15, 0
-; LA64-NEXT: bstrpick.d $a0, $a0, 15, 0
-; LA64-NEXT: vinsgr2vr.d $vr3, $a0, 1
-; LA64-NEXT: vpickve2gr.h $a0, $vr2, 4
-; LA64-NEXT: bstrpick.d $a0, $a0, 15, 0
-; LA64-NEXT: bstrpick.d $a0, $a0, 15, 0
-; LA64-NEXT: vinsgr2vr.d $vr4, $a0, 0
-; LA64-NEXT: vpickve2gr.h $a0, $vr2, 5
-; LA64-NEXT: bstrpick.d $a0, $a0, 15, 0
-; LA64-NEXT: bstrpick.d $a0, $a0, 15, 0
-; LA64-NEXT: vinsgr2vr.d $vr4, $a0, 1
-; LA64-NEXT: xvpermi.q $xr4, $xr3, 2
-; LA64-NEXT: vpickve2gr.h $a0, $vr0, 2
-; LA64-NEXT: bstrpick.d $a0, $a0, 15, 0
-; LA64-NEXT: bstrpick.d $a0, $a0, 15, 0
-; LA64-NEXT: vinsgr2vr.d $vr2, $a0, 0
-; LA64-NEXT: vpickve2gr.h $a0, $vr0, 3
-; LA64-NEXT: bstrpick.d $a0, $a0, 15, 0
-; LA64-NEXT: bstrpick.d $a0, $a0, 15, 0
-; LA64-NEXT: vinsgr2vr.d $vr2, $a0, 1
-; LA64-NEXT: vpickve2gr.h $a0, $vr0, 0
-; LA64-NEXT: bstrpick.d $a0, $a0, 15, 0
-; LA64-NEXT: bstrpick.d $a0, $a0, 15, 0
-; LA64-NEXT: vinsgr2vr.d $vr3, $a0, 0
-; LA64-NEXT: vpickve2gr.h $a0, $vr0, 1
-; LA64-NEXT: bstrpick.d $a0, $a0, 15, 0
-; LA64-NEXT: bstrpick.d $a0, $a0, 15, 0
-; LA64-NEXT: vinsgr2vr.d $vr3, $a0, 1
-; LA64-NEXT: xvpermi.q $xr3, $xr2, 2
-; LA64-NEXT: vpickve2gr.h $a0, $vr0, 6
-; LA64-NEXT: bstrpick.d $a0, $a0, 15, 0
-; LA64-NEXT: bstrpick.d $a0, $a0, 15, 0
-; LA64-NEXT: vinsgr2vr.d $vr2, $a0, 0
-; LA64-NEXT: vpickve2gr.h $a0, $vr0, 7
-; LA64-NEXT: bstrpick.d $a0, $a0, 15, 0
-; LA64-NEXT: bstrpick.d $a0, $a0, 15, 0
-; LA64-NEXT: vinsgr2vr.d $vr2, $a0, 1
-; LA64-NEXT: vpickve2gr.h $a0, $vr0, 4
-; LA64-NEXT: bstrpick.d $a0, $a0, 15, 0
-; LA64-NEXT: bstrpick.d $a0, $a0, 15, 0
-; LA64-NEXT: vinsgr2vr.d $vr5, $a0, 0
-; LA64-NEXT: vpickve2gr.h $a0, $vr0, 5
-; LA64-NEXT: bstrpick.d $a0, $a0, 15, 0
-; LA64-NEXT: bstrpick.d $a0, $a0, 15, 0
-; LA64-NEXT: vinsgr2vr.d $vr5, $a0, 1
-; LA64-NEXT: xvpermi.q $xr5, $xr2, 2
-; LA64-NEXT: xvst $xr5, $a1, 32
-; LA64-NEXT: xvst $xr3, $a1, 0
-; LA64-NEXT: xvst $xr4, $a1, 96
-; LA64-NEXT: xvst $xr1, $a1, 64
-; LA64-NEXT: ret
+; CHECK-LABEL: load_zext_16i16_to_16i64:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: xvld $xr0, $a0, 0
+; CHECK-NEXT: xvpermi.q $xr1, $xr0, 1
+; CHECK-NEXT: vext2xv.wu.hu $xr1, $xr1
+; CHECK-NEXT: xvpermi.q $xr2, $xr1, 1
+; CHECK-NEXT: vext2xv.du.wu $xr2, $xr2
+; CHECK-NEXT: vext2xv.wu.hu $xr0, $xr0
+; CHECK-NEXT: xvpermi.q $xr3, $xr0, 1
+; CHECK-NEXT: vext2xv.du.wu $xr3, $xr3
+; CHECK-NEXT: vext2xv.du.wu $xr1, $xr1
+; CHECK-NEXT: vext2xv.du.wu $xr0, $xr0
+; CHECK-NEXT: xvst $xr0, $a1, 0
+; CHECK-NEXT: xvst $xr1, $a1, 64
+; CHECK-NEXT: xvst $xr3, $a1, 32
+; CHECK-NEXT: xvst $xr2, $a1, 96
+; CHECK-NEXT: ret
entry:
%A = load <16 x i16>, ptr %ptr
%B = zext <16 x i16> %A to <16 x i64>
diff --git a/llvm/test/CodeGen/LoongArch/lsx/vec-sext.ll b/llvm/test/CodeGen/LoongArch/lsx/vec-sext.ll
index cadaf2f..c78de80 100644
--- a/llvm/test/CodeGen/LoongArch/lsx/vec-sext.ll
+++ b/llvm/test/CodeGen/LoongArch/lsx/vec-sext.ll
@@ -7,11 +7,9 @@ define void @load_sext_2i8_to_2i64(ptr %ptr, ptr %dst) {
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: ld.h $a0, $a0, 0
; CHECK-NEXT: vinsgr2vr.h $vr0, $a0, 0
-; CHECK-NEXT: vilvl.b $vr0, $vr0, $vr0
-; CHECK-NEXT: vilvl.h $vr0, $vr0, $vr0
-; CHECK-NEXT: vilvl.w $vr0, $vr0, $vr0
-; CHECK-NEXT: vslli.d $vr0, $vr0, 56
-; CHECK-NEXT: vsrai.d $vr0, $vr0, 56
+; CHECK-NEXT: vsllwil.h.b $vr0, $vr0, 0
+; CHECK-NEXT: vsllwil.w.h $vr0, $vr0, 0
+; CHECK-NEXT: vsllwil.d.w $vr0, $vr0, 0
; CHECK-NEXT: vst $vr0, $a1, 0
; CHECK-NEXT: ret
entry:
@@ -26,10 +24,8 @@ define void @load_sext_2i16_to_2i64(ptr %ptr, ptr %dst) {
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: ld.w $a0, $a0, 0
; CHECK-NEXT: vinsgr2vr.w $vr0, $a0, 0
-; CHECK-NEXT: vilvl.h $vr0, $vr0, $vr0
-; CHECK-NEXT: vilvl.w $vr0, $vr0, $vr0
-; CHECK-NEXT: vslli.d $vr0, $vr0, 48
-; CHECK-NEXT: vsrai.d $vr0, $vr0, 48
+; CHECK-NEXT: vsllwil.w.h $vr0, $vr0, 0
+; CHECK-NEXT: vsllwil.d.w $vr0, $vr0, 0
; CHECK-NEXT: vst $vr0, $a1, 0
; CHECK-NEXT: ret
entry:
@@ -45,9 +41,8 @@ define void @load_sext_2i32_to_2i64(ptr %ptr, ptr %dst) {
; LA32-NEXT: ld.w $a2, $a0, 0
; LA32-NEXT: ld.w $a0, $a0, 4
; LA32-NEXT: vinsgr2vr.w $vr0, $a2, 0
-; LA32-NEXT: vinsgr2vr.w $vr0, $a0, 2
-; LA32-NEXT: vslli.d $vr0, $vr0, 32
-; LA32-NEXT: vsrai.d $vr0, $vr0, 32
+; LA32-NEXT: vinsgr2vr.w $vr0, $a0, 1
+; LA32-NEXT: vsllwil.d.w $vr0, $vr0, 0
; LA32-NEXT: vst $vr0, $a1, 0
; LA32-NEXT: ret
;
@@ -55,9 +50,7 @@ define void @load_sext_2i32_to_2i64(ptr %ptr, ptr %dst) {
; LA64: # %bb.0: # %entry
; LA64-NEXT: ld.d $a0, $a0, 0
; LA64-NEXT: vinsgr2vr.d $vr0, $a0, 0
-; LA64-NEXT: vshuf4i.w $vr0, $vr0, 16
-; LA64-NEXT: vslli.d $vr0, $vr0, 32
-; LA64-NEXT: vsrai.d $vr0, $vr0, 32
+; LA64-NEXT: vsllwil.d.w $vr0, $vr0, 0
; LA64-NEXT: vst $vr0, $a1, 0
; LA64-NEXT: ret
entry:
@@ -72,10 +65,8 @@ define void @load_sext_4i8_to_4i32(ptr %ptr, ptr %dst) {
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: ld.w $a0, $a0, 0
; CHECK-NEXT: vinsgr2vr.w $vr0, $a0, 0
-; CHECK-NEXT: vilvl.b $vr0, $vr0, $vr0
-; CHECK-NEXT: vilvl.h $vr0, $vr0, $vr0
-; CHECK-NEXT: vslli.w $vr0, $vr0, 24
-; CHECK-NEXT: vsrai.w $vr0, $vr0, 24
+; CHECK-NEXT: vsllwil.h.b $vr0, $vr0, 0
+; CHECK-NEXT: vsllwil.w.h $vr0, $vr0, 0
; CHECK-NEXT: vst $vr0, $a1, 0
; CHECK-NEXT: ret
entry:
@@ -90,16 +81,15 @@ define void @load_sext_4i8_to_4i64(ptr %ptr, ptr %dst) {
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: ld.w $a0, $a0, 0
; CHECK-NEXT: vinsgr2vr.w $vr0, $a0, 0
-; CHECK-NEXT: vilvl.b $vr0, $vr0, $vr0
-; CHECK-NEXT: vilvl.h $vr0, $vr0, $vr0
-; CHECK-NEXT: vilvl.w $vr1, $vr0, $vr0
-; CHECK-NEXT: vslli.d $vr1, $vr1, 56
-; CHECK-NEXT: vsrai.d $vr1, $vr1, 56
-; CHECK-NEXT: vilvh.w $vr0, $vr0, $vr0
-; CHECK-NEXT: vslli.d $vr0, $vr0, 56
-; CHECK-NEXT: vsrai.d $vr0, $vr0, 56
-; CHECK-NEXT: vst $vr0, $a1, 16
-; CHECK-NEXT: vst $vr1, $a1, 0
+; CHECK-NEXT: vshuf4i.b $vr1, $vr0, 14
+; CHECK-NEXT: vsllwil.h.b $vr1, $vr1, 0
+; CHECK-NEXT: vsllwil.w.h $vr1, $vr1, 0
+; CHECK-NEXT: vsllwil.d.w $vr1, $vr1, 0
+; CHECK-NEXT: vsllwil.h.b $vr0, $vr0, 0
+; CHECK-NEXT: vsllwil.w.h $vr0, $vr0, 0
+; CHECK-NEXT: vsllwil.d.w $vr0, $vr0, 0
+; CHECK-NEXT: vst $vr0, $a1, 0
+; CHECK-NEXT: vst $vr1, $a1, 16
; CHECK-NEXT: ret
entry:
%A = load <4 x i8>, ptr %ptr
@@ -115,9 +105,7 @@ define void @load_sext_4i16_to_4i32(ptr %ptr, ptr %dst) {
; LA32-NEXT: ld.w $a0, $a0, 4
; LA32-NEXT: vinsgr2vr.w $vr0, $a2, 0
; LA32-NEXT: vinsgr2vr.w $vr0, $a0, 1
-; LA32-NEXT: vilvl.h $vr0, $vr0, $vr0
-; LA32-NEXT: vslli.w $vr0, $vr0, 16
-; LA32-NEXT: vsrai.w $vr0, $vr0, 16
+; LA32-NEXT: vsllwil.w.h $vr0, $vr0, 0
; LA32-NEXT: vst $vr0, $a1, 0
; LA32-NEXT: ret
;
@@ -125,9 +113,7 @@ define void @load_sext_4i16_to_4i32(ptr %ptr, ptr %dst) {
; LA64: # %bb.0: # %entry
; LA64-NEXT: ld.d $a0, $a0, 0
; LA64-NEXT: vinsgr2vr.d $vr0, $a0, 0
-; LA64-NEXT: vilvl.h $vr0, $vr0, $vr0
-; LA64-NEXT: vslli.w $vr0, $vr0, 16
-; LA64-NEXT: vsrai.w $vr0, $vr0, 16
+; LA64-NEXT: vsllwil.w.h $vr0, $vr0, 0
; LA64-NEXT: vst $vr0, $a1, 0
; LA64-NEXT: ret
entry:
@@ -141,17 +127,13 @@ define void @load_sext_4i16_to_4i64(ptr %ptr, ptr %dst) {
; LA32-LABEL: load_sext_4i16_to_4i64:
; LA32: # %bb.0: # %entry
; LA32-NEXT: ld.w $a2, $a0, 0
-; LA32-NEXT: vinsgr2vr.w $vr0, $a2, 0
; LA32-NEXT: ld.w $a0, $a0, 4
-; LA32-NEXT: vilvl.h $vr0, $vr0, $vr0
-; LA32-NEXT: vilvl.w $vr0, $vr0, $vr0
-; LA32-NEXT: vslli.d $vr0, $vr0, 48
-; LA32-NEXT: vsrai.d $vr0, $vr0, 48
+; LA32-NEXT: vinsgr2vr.w $vr0, $a2, 0
+; LA32-NEXT: vsllwil.w.h $vr0, $vr0, 0
+; LA32-NEXT: vsllwil.d.w $vr0, $vr0, 0
; LA32-NEXT: vinsgr2vr.w $vr1, $a0, 0
-; LA32-NEXT: vilvl.h $vr1, $vr1, $vr1
-; LA32-NEXT: vilvl.w $vr1, $vr1, $vr1
-; LA32-NEXT: vslli.d $vr1, $vr1, 48
-; LA32-NEXT: vsrai.d $vr1, $vr1, 48
+; LA32-NEXT: vsllwil.w.h $vr1, $vr1, 0
+; LA32-NEXT: vsllwil.d.w $vr1, $vr1, 0
; LA32-NEXT: vst $vr1, $a1, 16
; LA32-NEXT: vst $vr0, $a1, 0
; LA32-NEXT: ret
@@ -160,15 +142,13 @@ define void @load_sext_4i16_to_4i64(ptr %ptr, ptr %dst) {
; LA64: # %bb.0: # %entry
; LA64-NEXT: ld.d $a0, $a0, 0
; LA64-NEXT: vinsgr2vr.d $vr0, $a0, 0
-; LA64-NEXT: vilvl.h $vr0, $vr0, $vr0
-; LA64-NEXT: vilvl.w $vr1, $vr0, $vr0
-; LA64-NEXT: vslli.d $vr1, $vr1, 48
-; LA64-NEXT: vsrai.d $vr1, $vr1, 48
-; LA64-NEXT: vilvh.w $vr0, $vr0, $vr0
-; LA64-NEXT: vslli.d $vr0, $vr0, 48
-; LA64-NEXT: vsrai.d $vr0, $vr0, 48
-; LA64-NEXT: vst $vr0, $a1, 16
-; LA64-NEXT: vst $vr1, $a1, 0
+; LA64-NEXT: vshuf4i.h $vr1, $vr0, 14
+; LA64-NEXT: vsllwil.w.h $vr1, $vr1, 0
+; LA64-NEXT: vsllwil.d.w $vr1, $vr1, 0
+; LA64-NEXT: vsllwil.w.h $vr0, $vr0, 0
+; LA64-NEXT: vsllwil.d.w $vr0, $vr0, 0
+; LA64-NEXT: vst $vr0, $a1, 0
+; LA64-NEXT: vst $vr1, $a1, 16
; LA64-NEXT: ret
entry:
%A = load <4 x i16>, ptr %ptr
@@ -181,14 +161,11 @@ define void @load_sext_4i32_to_4i64(ptr %ptr, ptr %dst) {
; CHECK-LABEL: load_sext_4i32_to_4i64:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vld $vr0, $a0, 0
-; CHECK-NEXT: vshuf4i.w $vr1, $vr0, 16
-; CHECK-NEXT: vslli.d $vr1, $vr1, 32
-; CHECK-NEXT: vsrai.d $vr1, $vr1, 32
-; CHECK-NEXT: vshuf4i.w $vr0, $vr0, 50
-; CHECK-NEXT: vslli.d $vr0, $vr0, 32
-; CHECK-NEXT: vsrai.d $vr0, $vr0, 32
-; CHECK-NEXT: vst $vr0, $a1, 16
-; CHECK-NEXT: vst $vr1, $a1, 0
+; CHECK-NEXT: vshuf4i.w $vr1, $vr0, 14
+; CHECK-NEXT: vsllwil.d.w $vr1, $vr1, 0
+; CHECK-NEXT: vsllwil.d.w $vr0, $vr0, 0
+; CHECK-NEXT: vst $vr0, $a1, 0
+; CHECK-NEXT: vst $vr1, $a1, 16
; CHECK-NEXT: ret
entry:
%A = load <4 x i32>, ptr %ptr
@@ -204,9 +181,7 @@ define void @load_sext_8i8_to_8i16(ptr %ptr, ptr %dst) {
; LA32-NEXT: ld.w $a0, $a0, 4
; LA32-NEXT: vinsgr2vr.w $vr0, $a2, 0
; LA32-NEXT: vinsgr2vr.w $vr0, $a0, 1
-; LA32-NEXT: vilvl.b $vr0, $vr0, $vr0
-; LA32-NEXT: vslli.h $vr0, $vr0, 8
-; LA32-NEXT: vsrai.h $vr0, $vr0, 8
+; LA32-NEXT: vsllwil.h.b $vr0, $vr0, 0
; LA32-NEXT: vst $vr0, $a1, 0
; LA32-NEXT: ret
;
@@ -214,9 +189,7 @@ define void @load_sext_8i8_to_8i16(ptr %ptr, ptr %dst) {
; LA64: # %bb.0: # %entry
; LA64-NEXT: ld.d $a0, $a0, 0
; LA64-NEXT: vinsgr2vr.d $vr0, $a0, 0
-; LA64-NEXT: vilvl.b $vr0, $vr0, $vr0
-; LA64-NEXT: vslli.h $vr0, $vr0, 8
-; LA64-NEXT: vsrai.h $vr0, $vr0, 8
+; LA64-NEXT: vsllwil.h.b $vr0, $vr0, 0
; LA64-NEXT: vst $vr0, $a1, 0
; LA64-NEXT: ret
entry:
@@ -230,17 +203,13 @@ define void @load_sext_8i8_to_8i32(ptr %ptr, ptr %dst) {
; LA32-LABEL: load_sext_8i8_to_8i32:
; LA32: # %bb.0: # %entry
; LA32-NEXT: ld.w $a2, $a0, 0
-; LA32-NEXT: vinsgr2vr.w $vr0, $a2, 0
; LA32-NEXT: ld.w $a0, $a0, 4
-; LA32-NEXT: vilvl.b $vr0, $vr0, $vr0
-; LA32-NEXT: vilvl.h $vr0, $vr0, $vr0
-; LA32-NEXT: vslli.w $vr0, $vr0, 24
-; LA32-NEXT: vsrai.w $vr0, $vr0, 24
+; LA32-NEXT: vinsgr2vr.w $vr0, $a2, 0
+; LA32-NEXT: vsllwil.h.b $vr0, $vr0, 0
+; LA32-NEXT: vsllwil.w.h $vr0, $vr0, 0
; LA32-NEXT: vinsgr2vr.w $vr1, $a0, 0
-; LA32-NEXT: vilvl.b $vr1, $vr1, $vr1
-; LA32-NEXT: vilvl.h $vr1, $vr1, $vr1
-; LA32-NEXT: vslli.w $vr1, $vr1, 24
-; LA32-NEXT: vsrai.w $vr1, $vr1, 24
+; LA32-NEXT: vsllwil.h.b $vr1, $vr1, 0
+; LA32-NEXT: vsllwil.w.h $vr1, $vr1, 0
; LA32-NEXT: vst $vr1, $a1, 16
; LA32-NEXT: vst $vr0, $a1, 0
; LA32-NEXT: ret
@@ -249,15 +218,13 @@ define void @load_sext_8i8_to_8i32(ptr %ptr, ptr %dst) {
; LA64: # %bb.0: # %entry
; LA64-NEXT: ld.d $a0, $a0, 0
; LA64-NEXT: vinsgr2vr.d $vr0, $a0, 0
-; LA64-NEXT: vilvl.b $vr0, $vr0, $vr0
-; LA64-NEXT: vilvl.h $vr1, $vr0, $vr0
-; LA64-NEXT: vslli.w $vr1, $vr1, 24
-; LA64-NEXT: vsrai.w $vr1, $vr1, 24
-; LA64-NEXT: vilvh.h $vr0, $vr0, $vr0
-; LA64-NEXT: vslli.w $vr0, $vr0, 24
-; LA64-NEXT: vsrai.w $vr0, $vr0, 24
-; LA64-NEXT: vst $vr0, $a1, 16
-; LA64-NEXT: vst $vr1, $a1, 0
+; LA64-NEXT: vsrli.d $vr1, $vr0, 32
+; LA64-NEXT: vsllwil.h.b $vr1, $vr1, 0
+; LA64-NEXT: vsllwil.w.h $vr1, $vr1, 0
+; LA64-NEXT: vsllwil.h.b $vr0, $vr0, 0
+; LA64-NEXT: vsllwil.w.h $vr0, $vr0, 0
+; LA64-NEXT: vst $vr0, $a1, 0
+; LA64-NEXT: vst $vr1, $a1, 16
; LA64-NEXT: ret
entry:
%A = load <8 x i8>, ptr %ptr
@@ -273,50 +240,50 @@ define void @load_sext_8i8_to_8i64(ptr %ptr, ptr %dst) {
; LA32-NEXT: ld.w $a0, $a0, 4
; LA32-NEXT: vinsgr2vr.w $vr0, $a2, 0
; LA32-NEXT: vinsgr2vr.w $vr0, $a0, 1
-; LA32-NEXT: vilvl.b $vr0, $vr0, $vr0
-; LA32-NEXT: vilvl.h $vr1, $vr0, $vr0
-; LA32-NEXT: vilvl.w $vr2, $vr1, $vr1
-; LA32-NEXT: vslli.d $vr2, $vr2, 56
-; LA32-NEXT: vsrai.d $vr2, $vr2, 56
-; LA32-NEXT: vilvh.w $vr1, $vr1, $vr1
-; LA32-NEXT: vslli.d $vr1, $vr1, 56
-; LA32-NEXT: vsrai.d $vr1, $vr1, 56
-; LA32-NEXT: vilvh.h $vr0, $vr0, $vr0
-; LA32-NEXT: vilvl.w $vr3, $vr0, $vr0
-; LA32-NEXT: vslli.d $vr3, $vr3, 56
-; LA32-NEXT: vsrai.d $vr3, $vr3, 56
-; LA32-NEXT: vilvh.w $vr0, $vr0, $vr0
-; LA32-NEXT: vslli.d $vr0, $vr0, 56
-; LA32-NEXT: vsrai.d $vr0, $vr0, 56
-; LA32-NEXT: vst $vr0, $a1, 48
-; LA32-NEXT: vst $vr3, $a1, 32
+; LA32-NEXT: vshuf4i.b $vr1, $vr0, 14
+; LA32-NEXT: vsllwil.h.b $vr1, $vr1, 0
+; LA32-NEXT: vsllwil.w.h $vr1, $vr1, 0
+; LA32-NEXT: vsllwil.d.w $vr1, $vr1, 0
+; LA32-NEXT: vsrli.d $vr2, $vr0, 32
+; LA32-NEXT: vsllwil.h.b $vr2, $vr2, 0
+; LA32-NEXT: vsllwil.w.h $vr2, $vr2, 0
+; LA32-NEXT: vsllwil.d.w $vr2, $vr2, 0
+; LA32-NEXT: vsrli.d $vr3, $vr0, 48
+; LA32-NEXT: vsllwil.h.b $vr3, $vr3, 0
+; LA32-NEXT: vsllwil.w.h $vr3, $vr3, 0
+; LA32-NEXT: vsllwil.d.w $vr3, $vr3, 0
+; LA32-NEXT: vsllwil.h.b $vr0, $vr0, 0
+; LA32-NEXT: vsllwil.w.h $vr0, $vr0, 0
+; LA32-NEXT: vsllwil.d.w $vr0, $vr0, 0
+; LA32-NEXT: vst $vr0, $a1, 0
+; LA32-NEXT: vst $vr3, $a1, 48
+; LA32-NEXT: vst $vr2, $a1, 32
; LA32-NEXT: vst $vr1, $a1, 16
-; LA32-NEXT: vst $vr2, $a1, 0
; LA32-NEXT: ret
;
; LA64-LABEL: load_sext_8i8_to_8i64:
; LA64: # %bb.0: # %entry
; LA64-NEXT: ld.d $a0, $a0, 0
; LA64-NEXT: vinsgr2vr.d $vr0, $a0, 0
-; LA64-NEXT: vilvl.b $vr0, $vr0, $vr0
-; LA64-NEXT: vilvl.h $vr1, $vr0, $vr0
-; LA64-NEXT: vilvl.w $vr2, $vr1, $vr1
-; LA64-NEXT: vslli.d $vr2, $vr2, 56
-; LA64-NEXT: vsrai.d $vr2, $vr2, 56
-; LA64-NEXT: vilvh.w $vr1, $vr1, $vr1
-; LA64-NEXT: vslli.d $vr1, $vr1, 56
-; LA64-NEXT: vsrai.d $vr1, $vr1, 56
-; LA64-NEXT: vilvh.h $vr0, $vr0, $vr0
-; LA64-NEXT: vilvl.w $vr3, $vr0, $vr0
-; LA64-NEXT: vslli.d $vr3, $vr3, 56
-; LA64-NEXT: vsrai.d $vr3, $vr3, 56
-; LA64-NEXT: vilvh.w $vr0, $vr0, $vr0
-; LA64-NEXT: vslli.d $vr0, $vr0, 56
-; LA64-NEXT: vsrai.d $vr0, $vr0, 56
-; LA64-NEXT: vst $vr0, $a1, 48
-; LA64-NEXT: vst $vr3, $a1, 32
+; LA64-NEXT: vshuf4i.b $vr1, $vr0, 14
+; LA64-NEXT: vsllwil.h.b $vr1, $vr1, 0
+; LA64-NEXT: vsllwil.w.h $vr1, $vr1, 0
+; LA64-NEXT: vsllwil.d.w $vr1, $vr1, 0
+; LA64-NEXT: vsrli.d $vr2, $vr0, 32
+; LA64-NEXT: vsllwil.h.b $vr2, $vr2, 0
+; LA64-NEXT: vsllwil.w.h $vr2, $vr2, 0
+; LA64-NEXT: vsllwil.d.w $vr2, $vr2, 0
+; LA64-NEXT: vsrli.d $vr3, $vr0, 48
+; LA64-NEXT: vsllwil.h.b $vr3, $vr3, 0
+; LA64-NEXT: vsllwil.w.h $vr3, $vr3, 0
+; LA64-NEXT: vsllwil.d.w $vr3, $vr3, 0
+; LA64-NEXT: vsllwil.h.b $vr0, $vr0, 0
+; LA64-NEXT: vsllwil.w.h $vr0, $vr0, 0
+; LA64-NEXT: vsllwil.d.w $vr0, $vr0, 0
+; LA64-NEXT: vst $vr0, $a1, 0
+; LA64-NEXT: vst $vr3, $a1, 48
+; LA64-NEXT: vst $vr2, $a1, 32
; LA64-NEXT: vst $vr1, $a1, 16
-; LA64-NEXT: vst $vr2, $a1, 0
; LA64-NEXT: ret
entry:
%A = load <8 x i8>, ptr %ptr
@@ -329,14 +296,11 @@ define void @load_sext_8i16_to_8i32(ptr %ptr, ptr %dst) {
; CHECK-LABEL: load_sext_8i16_to_8i32:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vld $vr0, $a0, 0
-; CHECK-NEXT: vilvl.h $vr1, $vr0, $vr0
-; CHECK-NEXT: vslli.w $vr1, $vr1, 16
-; CHECK-NEXT: vsrai.w $vr1, $vr1, 16
-; CHECK-NEXT: vilvh.h $vr0, $vr0, $vr0
-; CHECK-NEXT: vslli.w $vr0, $vr0, 16
-; CHECK-NEXT: vsrai.w $vr0, $vr0, 16
-; CHECK-NEXT: vst $vr0, $a1, 16
-; CHECK-NEXT: vst $vr1, $a1, 0
+; CHECK-NEXT: vbsrl.v $vr1, $vr0, 8
+; CHECK-NEXT: vsllwil.w.h $vr1, $vr1, 0
+; CHECK-NEXT: vsllwil.w.h $vr0, $vr0, 0
+; CHECK-NEXT: vst $vr0, $a1, 0
+; CHECK-NEXT: vst $vr1, $a1, 16
; CHECK-NEXT: ret
entry:
%A = load <8 x i16>, ptr %ptr
@@ -349,24 +313,21 @@ define void @load_sext_8i16_to_8i64(ptr %ptr, ptr %dst) {
; CHECK-LABEL: load_sext_8i16_to_8i64:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vld $vr0, $a0, 0
-; CHECK-NEXT: vilvl.h $vr1, $vr0, $vr0
-; CHECK-NEXT: vilvl.w $vr2, $vr1, $vr1
-; CHECK-NEXT: vslli.d $vr2, $vr2, 48
-; CHECK-NEXT: vsrai.d $vr2, $vr2, 48
-; CHECK-NEXT: vilvh.w $vr1, $vr1, $vr1
-; CHECK-NEXT: vslli.d $vr1, $vr1, 48
-; CHECK-NEXT: vsrai.d $vr1, $vr1, 48
-; CHECK-NEXT: vilvh.h $vr0, $vr0, $vr0
-; CHECK-NEXT: vilvl.w $vr3, $vr0, $vr0
-; CHECK-NEXT: vslli.d $vr3, $vr3, 48
-; CHECK-NEXT: vsrai.d $vr3, $vr3, 48
-; CHECK-NEXT: vilvh.w $vr0, $vr0, $vr0
-; CHECK-NEXT: vslli.d $vr0, $vr0, 48
-; CHECK-NEXT: vsrai.d $vr0, $vr0, 48
-; CHECK-NEXT: vst $vr0, $a1, 48
-; CHECK-NEXT: vst $vr3, $a1, 32
+; CHECK-NEXT: vshuf4i.h $vr1, $vr0, 14
+; CHECK-NEXT: vsllwil.w.h $vr1, $vr1, 0
+; CHECK-NEXT: vsllwil.d.w $vr1, $vr1, 0
+; CHECK-NEXT: vbsrl.v $vr2, $vr0, 8
+; CHECK-NEXT: vsllwil.w.h $vr2, $vr2, 0
+; CHECK-NEXT: vsllwil.d.w $vr2, $vr2, 0
+; CHECK-NEXT: vbsrl.v $vr3, $vr0, 12
+; CHECK-NEXT: vsllwil.w.h $vr3, $vr3, 0
+; CHECK-NEXT: vsllwil.d.w $vr3, $vr3, 0
+; CHECK-NEXT: vsllwil.w.h $vr0, $vr0, 0
+; CHECK-NEXT: vsllwil.d.w $vr0, $vr0, 0
+; CHECK-NEXT: vst $vr0, $a1, 0
+; CHECK-NEXT: vst $vr3, $a1, 48
+; CHECK-NEXT: vst $vr2, $a1, 32
; CHECK-NEXT: vst $vr1, $a1, 16
-; CHECK-NEXT: vst $vr2, $a1, 0
; CHECK-NEXT: ret
entry:
%A = load <8 x i16>, ptr %ptr
@@ -380,22 +341,16 @@ define void @load_sext_8i32_to_8i64(ptr %ptr, ptr %dst) {
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vld $vr0, $a0, 0
; CHECK-NEXT: vld $vr1, $a0, 16
-; CHECK-NEXT: vshuf4i.w $vr2, $vr0, 16
-; CHECK-NEXT: vslli.d $vr2, $vr2, 32
-; CHECK-NEXT: vsrai.d $vr2, $vr2, 32
-; CHECK-NEXT: vshuf4i.w $vr0, $vr0, 50
-; CHECK-NEXT: vslli.d $vr0, $vr0, 32
-; CHECK-NEXT: vsrai.d $vr0, $vr0, 32
-; CHECK-NEXT: vshuf4i.w $vr3, $vr1, 16
-; CHECK-NEXT: vslli.d $vr3, $vr3, 32
-; CHECK-NEXT: vsrai.d $vr3, $vr3, 32
-; CHECK-NEXT: vshuf4i.w $vr1, $vr1, 50
-; CHECK-NEXT: vslli.d $vr1, $vr1, 32
-; CHECK-NEXT: vsrai.d $vr1, $vr1, 32
-; CHECK-NEXT: vst $vr1, $a1, 48
-; CHECK-NEXT: vst $vr3, $a1, 32
-; CHECK-NEXT: vst $vr0, $a1, 16
-; CHECK-NEXT: vst $vr2, $a1, 0
+; CHECK-NEXT: vshuf4i.w $vr2, $vr0, 14
+; CHECK-NEXT: vsllwil.d.w $vr2, $vr2, 0
+; CHECK-NEXT: vshuf4i.w $vr3, $vr1, 14
+; CHECK-NEXT: vsllwil.d.w $vr3, $vr3, 0
+; CHECK-NEXT: vsllwil.d.w $vr0, $vr0, 0
+; CHECK-NEXT: vsllwil.d.w $vr1, $vr1, 0
+; CHECK-NEXT: vst $vr1, $a1, 32
+; CHECK-NEXT: vst $vr0, $a1, 0
+; CHECK-NEXT: vst $vr3, $a1, 48
+; CHECK-NEXT: vst $vr2, $a1, 16
; CHECK-NEXT: ret
entry:
%A = load <8 x i32>, ptr %ptr
@@ -408,14 +363,11 @@ define void @load_sext_16i8_to_16i16(ptr %ptr, ptr %dst) {
; CHECK-LABEL: load_sext_16i8_to_16i16:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vld $vr0, $a0, 0
-; CHECK-NEXT: vilvl.b $vr1, $vr0, $vr0
-; CHECK-NEXT: vslli.h $vr1, $vr1, 8
-; CHECK-NEXT: vsrai.h $vr1, $vr1, 8
-; CHECK-NEXT: vilvh.b $vr0, $vr0, $vr0
-; CHECK-NEXT: vslli.h $vr0, $vr0, 8
-; CHECK-NEXT: vsrai.h $vr0, $vr0, 8
-; CHECK-NEXT: vst $vr0, $a1, 16
-; CHECK-NEXT: vst $vr1, $a1, 0
+; CHECK-NEXT: vbsrl.v $vr1, $vr0, 8
+; CHECK-NEXT: vsllwil.h.b $vr1, $vr1, 0
+; CHECK-NEXT: vsllwil.h.b $vr0, $vr0, 0
+; CHECK-NEXT: vst $vr0, $a1, 0
+; CHECK-NEXT: vst $vr1, $a1, 16
; CHECK-NEXT: ret
entry:
%A = load <16 x i8>, ptr %ptr
@@ -428,24 +380,21 @@ define void @load_sext_16i8_to_16i32(ptr %ptr, ptr %dst) {
; CHECK-LABEL: load_sext_16i8_to_16i32:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vld $vr0, $a0, 0
-; CHECK-NEXT: vilvl.b $vr1, $vr0, $vr0
-; CHECK-NEXT: vilvl.h $vr2, $vr1, $vr1
-; CHECK-NEXT: vslli.w $vr2, $vr2, 24
-; CHECK-NEXT: vsrai.w $vr2, $vr2, 24
-; CHECK-NEXT: vilvh.h $vr1, $vr1, $vr1
-; CHECK-NEXT: vslli.w $vr1, $vr1, 24
-; CHECK-NEXT: vsrai.w $vr1, $vr1, 24
-; CHECK-NEXT: vilvh.b $vr0, $vr0, $vr0
-; CHECK-NEXT: vilvl.h $vr3, $vr0, $vr0
-; CHECK-NEXT: vslli.w $vr3, $vr3, 24
-; CHECK-NEXT: vsrai.w $vr3, $vr3, 24
-; CHECK-NEXT: vilvh.h $vr0, $vr0, $vr0
-; CHECK-NEXT: vslli.w $vr0, $vr0, 24
-; CHECK-NEXT: vsrai.w $vr0, $vr0, 24
-; CHECK-NEXT: vst $vr0, $a1, 48
-; CHECK-NEXT: vst $vr3, $a1, 32
+; CHECK-NEXT: vsrli.d $vr1, $vr0, 32
+; CHECK-NEXT: vsllwil.h.b $vr1, $vr1, 0
+; CHECK-NEXT: vsllwil.w.h $vr1, $vr1, 0
+; CHECK-NEXT: vbsrl.v $vr2, $vr0, 8
+; CHECK-NEXT: vsllwil.h.b $vr2, $vr2, 0
+; CHECK-NEXT: vsllwil.w.h $vr2, $vr2, 0
+; CHECK-NEXT: vbsrl.v $vr3, $vr0, 12
+; CHECK-NEXT: vsllwil.h.b $vr3, $vr3, 0
+; CHECK-NEXT: vsllwil.w.h $vr3, $vr3, 0
+; CHECK-NEXT: vsllwil.h.b $vr0, $vr0, 0
+; CHECK-NEXT: vsllwil.w.h $vr0, $vr0, 0
+; CHECK-NEXT: vst $vr0, $a1, 0
+; CHECK-NEXT: vst $vr3, $a1, 48
+; CHECK-NEXT: vst $vr2, $a1, 32
; CHECK-NEXT: vst $vr1, $a1, 16
-; CHECK-NEXT: vst $vr2, $a1, 0
; CHECK-NEXT: ret
entry:
%A = load <16 x i8>, ptr %ptr
@@ -458,44 +407,45 @@ define void @load_sext_16i8_to_16i64(ptr %ptr, ptr %dst) {
; CHECK-LABEL: load_sext_16i8_to_16i64:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vld $vr0, $a0, 0
-; CHECK-NEXT: vilvl.b $vr1, $vr0, $vr0
-; CHECK-NEXT: vilvl.h $vr2, $vr1, $vr1
-; CHECK-NEXT: vilvl.w $vr3, $vr2, $vr2
-; CHECK-NEXT: vslli.d $vr3, $vr3, 56
-; CHECK-NEXT: vsrai.d $vr3, $vr3, 56
-; CHECK-NEXT: vilvh.w $vr2, $vr2, $vr2
-; CHECK-NEXT: vslli.d $vr2, $vr2, 56
-; CHECK-NEXT: vsrai.d $vr2, $vr2, 56
-; CHECK-NEXT: vilvh.h $vr1, $vr1, $vr1
-; CHECK-NEXT: vilvl.w $vr4, $vr1, $vr1
-; CHECK-NEXT: vslli.d $vr4, $vr4, 56
-; CHECK-NEXT: vsrai.d $vr4, $vr4, 56
-; CHECK-NEXT: vilvh.w $vr1, $vr1, $vr1
-; CHECK-NEXT: vslli.d $vr1, $vr1, 56
-; CHECK-NEXT: vsrai.d $vr1, $vr1, 56
-; CHECK-NEXT: vilvh.b $vr0, $vr0, $vr0
-; CHECK-NEXT: vilvl.h $vr5, $vr0, $vr0
-; CHECK-NEXT: vilvl.w $vr6, $vr5, $vr5
-; CHECK-NEXT: vslli.d $vr6, $vr6, 56
-; CHECK-NEXT: vsrai.d $vr6, $vr6, 56
-; CHECK-NEXT: vilvh.w $vr5, $vr5, $vr5
-; CHECK-NEXT: vslli.d $vr5, $vr5, 56
-; CHECK-NEXT: vsrai.d $vr5, $vr5, 56
-; CHECK-NEXT: vilvh.h $vr0, $vr0, $vr0
-; CHECK-NEXT: vilvl.w $vr7, $vr0, $vr0
-; CHECK-NEXT: vslli.d $vr7, $vr7, 56
-; CHECK-NEXT: vsrai.d $vr7, $vr7, 56
-; CHECK-NEXT: vilvh.w $vr0, $vr0, $vr0
-; CHECK-NEXT: vslli.d $vr0, $vr0, 56
-; CHECK-NEXT: vsrai.d $vr0, $vr0, 56
-; CHECK-NEXT: vst $vr0, $a1, 112
-; CHECK-NEXT: vst $vr7, $a1, 96
+; CHECK-NEXT: vshuf4i.b $vr1, $vr0, 14
+; CHECK-NEXT: vsllwil.h.b $vr1, $vr1, 0
+; CHECK-NEXT: vsllwil.w.h $vr1, $vr1, 0
+; CHECK-NEXT: vsllwil.d.w $vr1, $vr1, 0
+; CHECK-NEXT: vsrli.d $vr2, $vr0, 32
+; CHECK-NEXT: vsllwil.h.b $vr2, $vr2, 0
+; CHECK-NEXT: vsllwil.w.h $vr2, $vr2, 0
+; CHECK-NEXT: vsllwil.d.w $vr2, $vr2, 0
+; CHECK-NEXT: vsrli.d $vr3, $vr0, 48
+; CHECK-NEXT: vsllwil.h.b $vr3, $vr3, 0
+; CHECK-NEXT: vsllwil.w.h $vr3, $vr3, 0
+; CHECK-NEXT: vsllwil.d.w $vr3, $vr3, 0
+; CHECK-NEXT: vbsrl.v $vr4, $vr0, 8
+; CHECK-NEXT: vsllwil.h.b $vr4, $vr4, 0
+; CHECK-NEXT: vsllwil.w.h $vr4, $vr4, 0
+; CHECK-NEXT: vsllwil.d.w $vr4, $vr4, 0
+; CHECK-NEXT: vbsrl.v $vr5, $vr0, 10
+; CHECK-NEXT: vsllwil.h.b $vr5, $vr5, 0
+; CHECK-NEXT: vsllwil.w.h $vr5, $vr5, 0
+; CHECK-NEXT: vsllwil.d.w $vr5, $vr5, 0
+; CHECK-NEXT: vbsrl.v $vr6, $vr0, 12
+; CHECK-NEXT: vsllwil.h.b $vr6, $vr6, 0
+; CHECK-NEXT: vsllwil.w.h $vr6, $vr6, 0
+; CHECK-NEXT: vsllwil.d.w $vr6, $vr6, 0
+; CHECK-NEXT: vbsrl.v $vr7, $vr0, 14
+; CHECK-NEXT: vsllwil.h.b $vr7, $vr7, 0
+; CHECK-NEXT: vsllwil.w.h $vr7, $vr7, 0
+; CHECK-NEXT: vsllwil.d.w $vr7, $vr7, 0
+; CHECK-NEXT: vsllwil.h.b $vr0, $vr0, 0
+; CHECK-NEXT: vsllwil.w.h $vr0, $vr0, 0
+; CHECK-NEXT: vsllwil.d.w $vr0, $vr0, 0
+; CHECK-NEXT: vst $vr0, $a1, 0
+; CHECK-NEXT: vst $vr7, $a1, 112
+; CHECK-NEXT: vst $vr6, $a1, 96
; CHECK-NEXT: vst $vr5, $a1, 80
-; CHECK-NEXT: vst $vr6, $a1, 64
-; CHECK-NEXT: vst $vr1, $a1, 48
-; CHECK-NEXT: vst $vr4, $a1, 32
-; CHECK-NEXT: vst $vr2, $a1, 16
-; CHECK-NEXT: vst $vr3, $a1, 0
+; CHECK-NEXT: vst $vr4, $a1, 64
+; CHECK-NEXT: vst $vr3, $a1, 48
+; CHECK-NEXT: vst $vr2, $a1, 32
+; CHECK-NEXT: vst $vr1, $a1, 16
; CHECK-NEXT: ret
entry:
%A = load <16 x i8>, ptr %ptr
@@ -509,22 +459,16 @@ define void @load_sext_16i16_to_16i32(ptr %ptr, ptr %dst) {
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vld $vr0, $a0, 0
; CHECK-NEXT: vld $vr1, $a0, 16
-; CHECK-NEXT: vilvl.h $vr2, $vr0, $vr0
-; CHECK-NEXT: vslli.w $vr2, $vr2, 16
-; CHECK-NEXT: vsrai.w $vr2, $vr2, 16
-; CHECK-NEXT: vilvh.h $vr0, $vr0, $vr0
-; CHECK-NEXT: vslli.w $vr0, $vr0, 16
-; CHECK-NEXT: vsrai.w $vr0, $vr0, 16
-; CHECK-NEXT: vilvl.h $vr3, $vr1, $vr1
-; CHECK-NEXT: vslli.w $vr3, $vr3, 16
-; CHECK-NEXT: vsrai.w $vr3, $vr3, 16
-; CHECK-NEXT: vilvh.h $vr1, $vr1, $vr1
-; CHECK-NEXT: vslli.w $vr1, $vr1, 16
-; CHECK-NEXT: vsrai.w $vr1, $vr1, 16
-; CHECK-NEXT: vst $vr1, $a1, 48
-; CHECK-NEXT: vst $vr3, $a1, 32
-; CHECK-NEXT: vst $vr0, $a1, 16
-; CHECK-NEXT: vst $vr2, $a1, 0
+; CHECK-NEXT: vbsrl.v $vr2, $vr0, 8
+; CHECK-NEXT: vsllwil.w.h $vr2, $vr2, 0
+; CHECK-NEXT: vbsrl.v $vr3, $vr1, 8
+; CHECK-NEXT: vsllwil.w.h $vr3, $vr3, 0
+; CHECK-NEXT: vsllwil.w.h $vr0, $vr0, 0
+; CHECK-NEXT: vsllwil.w.h $vr1, $vr1, 0
+; CHECK-NEXT: vst $vr1, $a1, 32
+; CHECK-NEXT: vst $vr0, $a1, 0
+; CHECK-NEXT: vst $vr3, $a1, 48
+; CHECK-NEXT: vst $vr2, $a1, 16
; CHECK-NEXT: ret
entry:
%A = load <16 x i16>, ptr %ptr
@@ -538,42 +482,36 @@ define void @load_sext_16i16_to_16i64(ptr %ptr, ptr %dst) {
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vld $vr0, $a0, 0
; CHECK-NEXT: vld $vr1, $a0, 16
-; CHECK-NEXT: vilvl.h $vr2, $vr0, $vr0
-; CHECK-NEXT: vilvl.w $vr3, $vr2, $vr2
-; CHECK-NEXT: vslli.d $vr3, $vr3, 48
-; CHECK-NEXT: vsrai.d $vr3, $vr3, 48
-; CHECK-NEXT: vilvh.w $vr2, $vr2, $vr2
-; CHECK-NEXT: vslli.d $vr2, $vr2, 48
-; CHECK-NEXT: vsrai.d $vr2, $vr2, 48
-; CHECK-NEXT: vilvh.h $vr0, $vr0, $vr0
-; CHECK-NEXT: vilvl.w $vr4, $vr0, $vr0
-; CHECK-NEXT: vslli.d $vr4, $vr4, 48
-; CHECK-NEXT: vsrai.d $vr4, $vr4, 48
-; CHECK-NEXT: vilvh.w $vr0, $vr0, $vr0
-; CHECK-NEXT: vslli.d $vr0, $vr0, 48
-; CHECK-NEXT: vsrai.d $vr0, $vr0, 48
-; CHECK-NEXT: vilvl.h $vr5, $vr1, $vr1
-; CHECK-NEXT: vilvl.w $vr6, $vr5, $vr5
-; CHECK-NEXT: vslli.d $vr6, $vr6, 48
-; CHECK-NEXT: vsrai.d $vr6, $vr6, 48
-; CHECK-NEXT: vilvh.w $vr5, $vr5, $vr5
-; CHECK-NEXT: vslli.d $vr5, $vr5, 48
-; CHECK-NEXT: vsrai.d $vr5, $vr5, 48
-; CHECK-NEXT: vilvh.h $vr1, $vr1, $vr1
-; CHECK-NEXT: vilvl.w $vr7, $vr1, $vr1
-; CHECK-NEXT: vslli.d $vr7, $vr7, 48
-; CHECK-NEXT: vsrai.d $vr7, $vr7, 48
-; CHECK-NEXT: vilvh.w $vr1, $vr1, $vr1
-; CHECK-NEXT: vslli.d $vr1, $vr1, 48
-; CHECK-NEXT: vsrai.d $vr1, $vr1, 48
-; CHECK-NEXT: vst $vr1, $a1, 112
-; CHECK-NEXT: vst $vr7, $a1, 96
+; CHECK-NEXT: vshuf4i.h $vr2, $vr0, 14
+; CHECK-NEXT: vsllwil.w.h $vr2, $vr2, 0
+; CHECK-NEXT: vsllwil.d.w $vr2, $vr2, 0
+; CHECK-NEXT: vbsrl.v $vr3, $vr0, 8
+; CHECK-NEXT: vsllwil.w.h $vr3, $vr3, 0
+; CHECK-NEXT: vsllwil.d.w $vr3, $vr3, 0
+; CHECK-NEXT: vbsrl.v $vr4, $vr0, 12
+; CHECK-NEXT: vsllwil.w.h $vr4, $vr4, 0
+; CHECK-NEXT: vsllwil.d.w $vr4, $vr4, 0
+; CHECK-NEXT: vshuf4i.h $vr5, $vr1, 14
+; CHECK-NEXT: vsllwil.w.h $vr5, $vr5, 0
+; CHECK-NEXT: vsllwil.d.w $vr5, $vr5, 0
+; CHECK-NEXT: vbsrl.v $vr6, $vr1, 8
+; CHECK-NEXT: vsllwil.w.h $vr6, $vr6, 0
+; CHECK-NEXT: vsllwil.d.w $vr6, $vr6, 0
+; CHECK-NEXT: vbsrl.v $vr7, $vr1, 12
+; CHECK-NEXT: vsllwil.w.h $vr7, $vr7, 0
+; CHECK-NEXT: vsllwil.d.w $vr7, $vr7, 0
+; CHECK-NEXT: vsllwil.w.h $vr0, $vr0, 0
+; CHECK-NEXT: vsllwil.d.w $vr0, $vr0, 0
+; CHECK-NEXT: vsllwil.w.h $vr1, $vr1, 0
+; CHECK-NEXT: vsllwil.d.w $vr1, $vr1, 0
+; CHECK-NEXT: vst $vr1, $a1, 64
+; CHECK-NEXT: vst $vr0, $a1, 0
+; CHECK-NEXT: vst $vr7, $a1, 112
+; CHECK-NEXT: vst $vr6, $a1, 96
; CHECK-NEXT: vst $vr5, $a1, 80
-; CHECK-NEXT: vst $vr6, $a1, 64
-; CHECK-NEXT: vst $vr0, $a1, 48
-; CHECK-NEXT: vst $vr4, $a1, 32
+; CHECK-NEXT: vst $vr4, $a1, 48
+; CHECK-NEXT: vst $vr3, $a1, 32
; CHECK-NEXT: vst $vr2, $a1, 16
-; CHECK-NEXT: vst $vr3, $a1, 0
; CHECK-NEXT: ret
entry:
%A = load <16 x i16>, ptr %ptr
diff --git a/llvm/test/CodeGen/LoongArch/lsx/vec-shuffle-sign-ext.ll b/llvm/test/CodeGen/LoongArch/lsx/vec-shuffle-sign-ext.ll
index 5e0ff9a..ee1374a 100644
--- a/llvm/test/CodeGen/LoongArch/lsx/vec-shuffle-sign-ext.ll
+++ b/llvm/test/CodeGen/LoongArch/lsx/vec-shuffle-sign-ext.ll
@@ -7,10 +7,9 @@ define void @shuffle_sign_ext_2i8_to_2i64(ptr %ptr, ptr %dst) nounwind {
; CHECK: # %bb.0:
; CHECK-NEXT: ld.h $a0, $a0, 0
; CHECK-NEXT: vinsgr2vr.h $vr0, $a0, 0
-; CHECK-NEXT: vrepli.b $vr1, 0
-; CHECK-NEXT: vilvl.b $vr0, $vr1, $vr0
-; CHECK-NEXT: vilvl.h $vr0, $vr1, $vr0
-; CHECK-NEXT: vilvl.w $vr0, $vr1, $vr0
+; CHECK-NEXT: vsllwil.hu.bu $vr0, $vr0, 0
+; CHECK-NEXT: vsllwil.wu.hu $vr0, $vr0, 0
+; CHECK-NEXT: vsllwil.du.wu $vr0, $vr0, 0
; CHECK-NEXT: vst $vr0, $a1, 0
; CHECK-NEXT: ret
%x = load <2 x i8>, ptr %ptr
@@ -25,9 +24,8 @@ define void @shuffle_sign_ext_2i16_to_2i64(ptr %ptr, ptr %dst) nounwind {
; CHECK: # %bb.0:
; CHECK-NEXT: ld.w $a0, $a0, 0
; CHECK-NEXT: vinsgr2vr.w $vr0, $a0, 0
-; CHECK-NEXT: vrepli.b $vr1, 0
-; CHECK-NEXT: vilvl.h $vr0, $vr1, $vr0
-; CHECK-NEXT: vilvl.w $vr0, $vr1, $vr0
+; CHECK-NEXT: vsllwil.wu.hu $vr0, $vr0, 0
+; CHECK-NEXT: vsllwil.du.wu $vr0, $vr0, 0
; CHECK-NEXT: vst $vr0, $a1, 0
; CHECK-NEXT: ret
%x = load <2 x i16>, ptr %ptr
@@ -42,9 +40,9 @@ define void @shuffle_sign_ext_2i32_to_2i64(ptr %ptr, ptr %dst) nounwind {
; LA32: # %bb.0:
; LA32-NEXT: ld.w $a2, $a0, 0
; LA32-NEXT: ld.w $a0, $a0, 4
-; LA32-NEXT: vrepli.b $vr0, 0
; LA32-NEXT: vinsgr2vr.w $vr0, $a2, 0
-; LA32-NEXT: vinsgr2vr.w $vr0, $a0, 2
+; LA32-NEXT: vinsgr2vr.w $vr0, $a0, 1
+; LA32-NEXT: vsllwil.du.wu $vr0, $vr0, 0
; LA32-NEXT: vst $vr0, $a1, 0
; LA32-NEXT: ret
;
@@ -52,8 +50,7 @@ define void @shuffle_sign_ext_2i32_to_2i64(ptr %ptr, ptr %dst) nounwind {
; LA64: # %bb.0:
; LA64-NEXT: ld.d $a0, $a0, 0
; LA64-NEXT: vinsgr2vr.d $vr0, $a0, 0
-; LA64-NEXT: vrepli.b $vr1, 0
-; LA64-NEXT: vilvl.w $vr0, $vr1, $vr0
+; LA64-NEXT: vsllwil.du.wu $vr0, $vr0, 0
; LA64-NEXT: vst $vr0, $a1, 0
; LA64-NEXT: ret
%x = load <2 x i32>, ptr %ptr
@@ -68,9 +65,8 @@ define void @shuffle_sign_ext_4i8_to_4i32(ptr %ptr, ptr %dst) nounwind {
; CHECK: # %bb.0:
; CHECK-NEXT: ld.w $a0, $a0, 0
; CHECK-NEXT: vinsgr2vr.w $vr0, $a0, 0
-; CHECK-NEXT: vrepli.b $vr1, 0
-; CHECK-NEXT: vilvl.b $vr0, $vr1, $vr0
-; CHECK-NEXT: vilvl.h $vr0, $vr1, $vr0
+; CHECK-NEXT: vsllwil.hu.bu $vr0, $vr0, 0
+; CHECK-NEXT: vsllwil.wu.hu $vr0, $vr0, 0
; CHECK-NEXT: vst $vr0, $a1, 0
; CHECK-NEXT: ret
%x = load <4 x i8>, ptr %ptr
@@ -87,8 +83,7 @@ define void @shuffle_sign_ext_4i16_to_4i32(ptr %ptr, ptr %dst) nounwind {
; LA32-NEXT: ld.w $a0, $a0, 4
; LA32-NEXT: vinsgr2vr.w $vr0, $a2, 0
; LA32-NEXT: vinsgr2vr.w $vr0, $a0, 1
-; LA32-NEXT: vrepli.b $vr1, 0
-; LA32-NEXT: vilvl.h $vr0, $vr1, $vr0
+; LA32-NEXT: vsllwil.wu.hu $vr0, $vr0, 0
; LA32-NEXT: vst $vr0, $a1, 0
; LA32-NEXT: ret
;
@@ -96,8 +91,7 @@ define void @shuffle_sign_ext_4i16_to_4i32(ptr %ptr, ptr %dst) nounwind {
; LA64: # %bb.0:
; LA64-NEXT: ld.d $a0, $a0, 0
; LA64-NEXT: vinsgr2vr.d $vr0, $a0, 0
-; LA64-NEXT: vrepli.b $vr1, 0
-; LA64-NEXT: vilvl.h $vr0, $vr1, $vr0
+; LA64-NEXT: vsllwil.wu.hu $vr0, $vr0, 0
; LA64-NEXT: vst $vr0, $a1, 0
; LA64-NEXT: ret
%x = load <4 x i16>, ptr %ptr
@@ -114,8 +108,7 @@ define void @shuffle_sign_ext_8i8_to_8i16(ptr %ptr, ptr %dst) nounwind {
; LA32-NEXT: ld.w $a0, $a0, 4
; LA32-NEXT: vinsgr2vr.w $vr0, $a2, 0
; LA32-NEXT: vinsgr2vr.w $vr0, $a0, 1
-; LA32-NEXT: vrepli.b $vr1, 0
-; LA32-NEXT: vilvl.b $vr0, $vr1, $vr0
+; LA32-NEXT: vsllwil.hu.bu $vr0, $vr0, 0
; LA32-NEXT: vst $vr0, $a1, 0
; LA32-NEXT: ret
;
@@ -123,8 +116,7 @@ define void @shuffle_sign_ext_8i8_to_8i16(ptr %ptr, ptr %dst) nounwind {
; LA64: # %bb.0:
; LA64-NEXT: ld.d $a0, $a0, 0
; LA64-NEXT: vinsgr2vr.d $vr0, $a0, 0
-; LA64-NEXT: vrepli.b $vr1, 0
-; LA64-NEXT: vilvl.b $vr0, $vr1, $vr0
+; LA64-NEXT: vsllwil.hu.bu $vr0, $vr0, 0
; LA64-NEXT: vst $vr0, $a1, 0
; LA64-NEXT: ret
%x = load <8 x i8>, ptr %ptr
diff --git a/llvm/test/CodeGen/LoongArch/lsx/vec-zext.ll b/llvm/test/CodeGen/LoongArch/lsx/vec-zext.ll
index 2ace0bf..7156e61 100644
--- a/llvm/test/CodeGen/LoongArch/lsx/vec-zext.ll
+++ b/llvm/test/CodeGen/LoongArch/lsx/vec-zext.ll
@@ -7,10 +7,9 @@ define void @load_zext_2i8_to_2i64(ptr %ptr, ptr %dst) {
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: ld.h $a0, $a0, 0
; CHECK-NEXT: vinsgr2vr.h $vr0, $a0, 0
-; CHECK-NEXT: vrepli.b $vr1, 0
-; CHECK-NEXT: vilvl.b $vr0, $vr1, $vr0
-; CHECK-NEXT: vilvl.h $vr0, $vr1, $vr0
-; CHECK-NEXT: vilvl.w $vr0, $vr1, $vr0
+; CHECK-NEXT: vsllwil.hu.bu $vr0, $vr0, 0
+; CHECK-NEXT: vsllwil.wu.hu $vr0, $vr0, 0
+; CHECK-NEXT: vsllwil.du.wu $vr0, $vr0, 0
; CHECK-NEXT: vst $vr0, $a1, 0
; CHECK-NEXT: ret
entry:
@@ -25,9 +24,8 @@ define void @load_zext_2i16_to_2i64(ptr %ptr, ptr %dst) {
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: ld.w $a0, $a0, 0
; CHECK-NEXT: vinsgr2vr.w $vr0, $a0, 0
-; CHECK-NEXT: vrepli.b $vr1, 0
-; CHECK-NEXT: vilvl.h $vr0, $vr1, $vr0
-; CHECK-NEXT: vilvl.w $vr0, $vr1, $vr0
+; CHECK-NEXT: vsllwil.wu.hu $vr0, $vr0, 0
+; CHECK-NEXT: vsllwil.du.wu $vr0, $vr0, 0
; CHECK-NEXT: vst $vr0, $a1, 0
; CHECK-NEXT: ret
entry:
@@ -42,9 +40,9 @@ define void @load_zext_2i32_to_2i64(ptr %ptr, ptr %dst) {
; LA32: # %bb.0: # %entry
; LA32-NEXT: ld.w $a2, $a0, 0
; LA32-NEXT: ld.w $a0, $a0, 4
-; LA32-NEXT: vrepli.b $vr0, 0
; LA32-NEXT: vinsgr2vr.w $vr0, $a2, 0
-; LA32-NEXT: vinsgr2vr.w $vr0, $a0, 2
+; LA32-NEXT: vinsgr2vr.w $vr0, $a0, 1
+; LA32-NEXT: vsllwil.du.wu $vr0, $vr0, 0
; LA32-NEXT: vst $vr0, $a1, 0
; LA32-NEXT: ret
;
@@ -52,8 +50,7 @@ define void @load_zext_2i32_to_2i64(ptr %ptr, ptr %dst) {
; LA64: # %bb.0: # %entry
; LA64-NEXT: ld.d $a0, $a0, 0
; LA64-NEXT: vinsgr2vr.d $vr0, $a0, 0
-; LA64-NEXT: vrepli.b $vr1, 0
-; LA64-NEXT: vilvl.w $vr0, $vr1, $vr0
+; LA64-NEXT: vsllwil.du.wu $vr0, $vr0, 0
; LA64-NEXT: vst $vr0, $a1, 0
; LA64-NEXT: ret
entry:
@@ -68,9 +65,8 @@ define void @load_zext_4i8_to_4i32(ptr %ptr, ptr %dst) {
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: ld.w $a0, $a0, 0
; CHECK-NEXT: vinsgr2vr.w $vr0, $a0, 0
-; CHECK-NEXT: vrepli.b $vr1, 0
-; CHECK-NEXT: vilvl.b $vr0, $vr1, $vr0
-; CHECK-NEXT: vilvl.h $vr0, $vr1, $vr0
+; CHECK-NEXT: vsllwil.hu.bu $vr0, $vr0, 0
+; CHECK-NEXT: vsllwil.wu.hu $vr0, $vr0, 0
; CHECK-NEXT: vst $vr0, $a1, 0
; CHECK-NEXT: ret
entry:
@@ -85,13 +81,15 @@ define void @load_zext_4i8_to_4i64(ptr %ptr, ptr %dst) {
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: ld.w $a0, $a0, 0
; CHECK-NEXT: vinsgr2vr.w $vr0, $a0, 0
-; CHECK-NEXT: vrepli.b $vr1, 0
-; CHECK-NEXT: vilvl.b $vr0, $vr1, $vr0
-; CHECK-NEXT: vilvl.h $vr0, $vr1, $vr0
-; CHECK-NEXT: vilvl.w $vr2, $vr1, $vr0
-; CHECK-NEXT: vilvh.w $vr0, $vr1, $vr0
-; CHECK-NEXT: vst $vr0, $a1, 16
-; CHECK-NEXT: vst $vr2, $a1, 0
+; CHECK-NEXT: vshuf4i.b $vr1, $vr0, 14
+; CHECK-NEXT: vsllwil.hu.bu $vr1, $vr1, 0
+; CHECK-NEXT: vsllwil.wu.hu $vr1, $vr1, 0
+; CHECK-NEXT: vsllwil.du.wu $vr1, $vr1, 0
+; CHECK-NEXT: vsllwil.hu.bu $vr0, $vr0, 0
+; CHECK-NEXT: vsllwil.wu.hu $vr0, $vr0, 0
+; CHECK-NEXT: vsllwil.du.wu $vr0, $vr0, 0
+; CHECK-NEXT: vst $vr0, $a1, 0
+; CHECK-NEXT: vst $vr1, $a1, 16
; CHECK-NEXT: ret
entry:
%A = load <4 x i8>, ptr %ptr
@@ -107,8 +105,7 @@ define void @load_zext_4i16_to_4i32(ptr %ptr, ptr %dst) {
; LA32-NEXT: ld.w $a0, $a0, 4
; LA32-NEXT: vinsgr2vr.w $vr0, $a2, 0
; LA32-NEXT: vinsgr2vr.w $vr0, $a0, 1
-; LA32-NEXT: vrepli.b $vr1, 0
-; LA32-NEXT: vilvl.h $vr0, $vr1, $vr0
+; LA32-NEXT: vsllwil.wu.hu $vr0, $vr0, 0
; LA32-NEXT: vst $vr0, $a1, 0
; LA32-NEXT: ret
;
@@ -116,8 +113,7 @@ define void @load_zext_4i16_to_4i32(ptr %ptr, ptr %dst) {
; LA64: # %bb.0: # %entry
; LA64-NEXT: ld.d $a0, $a0, 0
; LA64-NEXT: vinsgr2vr.d $vr0, $a0, 0
-; LA64-NEXT: vrepli.b $vr1, 0
-; LA64-NEXT: vilvl.h $vr0, $vr1, $vr0
+; LA64-NEXT: vsllwil.wu.hu $vr0, $vr0, 0
; LA64-NEXT: vst $vr0, $a1, 0
; LA64-NEXT: ret
entry:
@@ -131,14 +127,13 @@ define void @load_zext_4i16_to_4i64(ptr %ptr, ptr %dst) {
; LA32-LABEL: load_zext_4i16_to_4i64:
; LA32: # %bb.0: # %entry
; LA32-NEXT: ld.w $a2, $a0, 0
-; LA32-NEXT: vinsgr2vr.w $vr0, $a2, 0
; LA32-NEXT: ld.w $a0, $a0, 4
-; LA32-NEXT: vrepli.b $vr1, 0
-; LA32-NEXT: vilvl.h $vr0, $vr1, $vr0
-; LA32-NEXT: vilvl.w $vr0, $vr1, $vr0
-; LA32-NEXT: vinsgr2vr.w $vr2, $a0, 0
-; LA32-NEXT: vilvl.h $vr2, $vr1, $vr2
-; LA32-NEXT: vilvl.w $vr1, $vr1, $vr2
+; LA32-NEXT: vinsgr2vr.w $vr0, $a2, 0
+; LA32-NEXT: vsllwil.wu.hu $vr0, $vr0, 0
+; LA32-NEXT: vsllwil.du.wu $vr0, $vr0, 0
+; LA32-NEXT: vinsgr2vr.w $vr1, $a0, 0
+; LA32-NEXT: vsllwil.wu.hu $vr1, $vr1, 0
+; LA32-NEXT: vsllwil.du.wu $vr1, $vr1, 0
; LA32-NEXT: vst $vr1, $a1, 16
; LA32-NEXT: vst $vr0, $a1, 0
; LA32-NEXT: ret
@@ -147,12 +142,13 @@ define void @load_zext_4i16_to_4i64(ptr %ptr, ptr %dst) {
; LA64: # %bb.0: # %entry
; LA64-NEXT: ld.d $a0, $a0, 0
; LA64-NEXT: vinsgr2vr.d $vr0, $a0, 0
-; LA64-NEXT: vrepli.b $vr1, 0
-; LA64-NEXT: vilvl.h $vr0, $vr1, $vr0
-; LA64-NEXT: vilvl.w $vr2, $vr1, $vr0
-; LA64-NEXT: vilvh.w $vr0, $vr1, $vr0
-; LA64-NEXT: vst $vr0, $a1, 16
-; LA64-NEXT: vst $vr2, $a1, 0
+; LA64-NEXT: vshuf4i.h $vr1, $vr0, 14
+; LA64-NEXT: vsllwil.wu.hu $vr1, $vr1, 0
+; LA64-NEXT: vsllwil.du.wu $vr1, $vr1, 0
+; LA64-NEXT: vsllwil.wu.hu $vr0, $vr0, 0
+; LA64-NEXT: vsllwil.du.wu $vr0, $vr0, 0
+; LA64-NEXT: vst $vr0, $a1, 0
+; LA64-NEXT: vst $vr1, $a1, 16
; LA64-NEXT: ret
entry:
%A = load <4 x i16>, ptr %ptr
@@ -165,11 +161,11 @@ define void @load_zext_4i32_to_4i64(ptr %ptr, ptr %dst) {
; CHECK-LABEL: load_zext_4i32_to_4i64:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vld $vr0, $a0, 0
-; CHECK-NEXT: vrepli.b $vr1, 0
-; CHECK-NEXT: vilvl.w $vr2, $vr1, $vr0
-; CHECK-NEXT: vilvh.w $vr0, $vr1, $vr0
-; CHECK-NEXT: vst $vr0, $a1, 16
-; CHECK-NEXT: vst $vr2, $a1, 0
+; CHECK-NEXT: vshuf4i.w $vr1, $vr0, 14
+; CHECK-NEXT: vsllwil.du.wu $vr1, $vr1, 0
+; CHECK-NEXT: vsllwil.du.wu $vr0, $vr0, 0
+; CHECK-NEXT: vst $vr0, $a1, 0
+; CHECK-NEXT: vst $vr1, $a1, 16
; CHECK-NEXT: ret
entry:
%A = load <4 x i32>, ptr %ptr
@@ -185,8 +181,7 @@ define void @load_zext_8i8_to_8i16(ptr %ptr, ptr %dst) {
; LA32-NEXT: ld.w $a0, $a0, 4
; LA32-NEXT: vinsgr2vr.w $vr0, $a2, 0
; LA32-NEXT: vinsgr2vr.w $vr0, $a0, 1
-; LA32-NEXT: vrepli.b $vr1, 0
-; LA32-NEXT: vilvl.b $vr0, $vr1, $vr0
+; LA32-NEXT: vsllwil.hu.bu $vr0, $vr0, 0
; LA32-NEXT: vst $vr0, $a1, 0
; LA32-NEXT: ret
;
@@ -194,8 +189,7 @@ define void @load_zext_8i8_to_8i16(ptr %ptr, ptr %dst) {
; LA64: # %bb.0: # %entry
; LA64-NEXT: ld.d $a0, $a0, 0
; LA64-NEXT: vinsgr2vr.d $vr0, $a0, 0
-; LA64-NEXT: vrepli.b $vr1, 0
-; LA64-NEXT: vilvl.b $vr0, $vr1, $vr0
+; LA64-NEXT: vsllwil.hu.bu $vr0, $vr0, 0
; LA64-NEXT: vst $vr0, $a1, 0
; LA64-NEXT: ret
entry:
@@ -209,14 +203,13 @@ define void @load_zext_8i8_to_8i32(ptr %ptr, ptr %dst) {
; LA32-LABEL: load_zext_8i8_to_8i32:
; LA32: # %bb.0: # %entry
; LA32-NEXT: ld.w $a2, $a0, 0
-; LA32-NEXT: vinsgr2vr.w $vr0, $a2, 0
; LA32-NEXT: ld.w $a0, $a0, 4
-; LA32-NEXT: vrepli.b $vr1, 0
-; LA32-NEXT: vilvl.b $vr0, $vr1, $vr0
-; LA32-NEXT: vilvl.h $vr0, $vr1, $vr0
-; LA32-NEXT: vinsgr2vr.w $vr2, $a0, 0
-; LA32-NEXT: vilvl.b $vr2, $vr1, $vr2
-; LA32-NEXT: vilvl.h $vr1, $vr1, $vr2
+; LA32-NEXT: vinsgr2vr.w $vr0, $a2, 0
+; LA32-NEXT: vsllwil.hu.bu $vr0, $vr0, 0
+; LA32-NEXT: vsllwil.wu.hu $vr0, $vr0, 0
+; LA32-NEXT: vinsgr2vr.w $vr1, $a0, 0
+; LA32-NEXT: vsllwil.hu.bu $vr1, $vr1, 0
+; LA32-NEXT: vsllwil.wu.hu $vr1, $vr1, 0
; LA32-NEXT: vst $vr1, $a1, 16
; LA32-NEXT: vst $vr0, $a1, 0
; LA32-NEXT: ret
@@ -225,12 +218,13 @@ define void @load_zext_8i8_to_8i32(ptr %ptr, ptr %dst) {
; LA64: # %bb.0: # %entry
; LA64-NEXT: ld.d $a0, $a0, 0
; LA64-NEXT: vinsgr2vr.d $vr0, $a0, 0
-; LA64-NEXT: vrepli.b $vr1, 0
-; LA64-NEXT: vilvl.b $vr0, $vr1, $vr0
-; LA64-NEXT: vilvl.h $vr2, $vr1, $vr0
-; LA64-NEXT: vilvh.h $vr0, $vr1, $vr0
-; LA64-NEXT: vst $vr0, $a1, 16
-; LA64-NEXT: vst $vr2, $a1, 0
+; LA64-NEXT: vsrli.d $vr1, $vr0, 32
+; LA64-NEXT: vsllwil.hu.bu $vr1, $vr1, 0
+; LA64-NEXT: vsllwil.wu.hu $vr1, $vr1, 0
+; LA64-NEXT: vsllwil.hu.bu $vr0, $vr0, 0
+; LA64-NEXT: vsllwil.wu.hu $vr0, $vr0, 0
+; LA64-NEXT: vst $vr0, $a1, 0
+; LA64-NEXT: vst $vr1, $a1, 16
; LA64-NEXT: ret
entry:
%A = load <8 x i8>, ptr %ptr
@@ -246,36 +240,50 @@ define void @load_zext_8i8_to_8i64(ptr %ptr, ptr %dst) {
; LA32-NEXT: ld.w $a0, $a0, 4
; LA32-NEXT: vinsgr2vr.w $vr0, $a2, 0
; LA32-NEXT: vinsgr2vr.w $vr0, $a0, 1
-; LA32-NEXT: vrepli.b $vr1, 0
-; LA32-NEXT: vilvl.b $vr0, $vr1, $vr0
-; LA32-NEXT: vilvl.h $vr2, $vr1, $vr0
-; LA32-NEXT: vilvl.w $vr3, $vr1, $vr2
-; LA32-NEXT: vilvh.w $vr2, $vr1, $vr2
-; LA32-NEXT: vilvh.h $vr0, $vr1, $vr0
-; LA32-NEXT: vilvl.w $vr4, $vr1, $vr0
-; LA32-NEXT: vilvh.w $vr0, $vr1, $vr0
-; LA32-NEXT: vst $vr0, $a1, 48
-; LA32-NEXT: vst $vr4, $a1, 32
-; LA32-NEXT: vst $vr2, $a1, 16
-; LA32-NEXT: vst $vr3, $a1, 0
+; LA32-NEXT: vshuf4i.b $vr1, $vr0, 14
+; LA32-NEXT: vsllwil.hu.bu $vr1, $vr1, 0
+; LA32-NEXT: vsllwil.wu.hu $vr1, $vr1, 0
+; LA32-NEXT: vsllwil.du.wu $vr1, $vr1, 0
+; LA32-NEXT: vsrli.d $vr2, $vr0, 32
+; LA32-NEXT: vsllwil.hu.bu $vr2, $vr2, 0
+; LA32-NEXT: vsllwil.wu.hu $vr2, $vr2, 0
+; LA32-NEXT: vsllwil.du.wu $vr2, $vr2, 0
+; LA32-NEXT: vsrli.d $vr3, $vr0, 48
+; LA32-NEXT: vsllwil.hu.bu $vr3, $vr3, 0
+; LA32-NEXT: vsllwil.wu.hu $vr3, $vr3, 0
+; LA32-NEXT: vsllwil.du.wu $vr3, $vr3, 0
+; LA32-NEXT: vsllwil.hu.bu $vr0, $vr0, 0
+; LA32-NEXT: vsllwil.wu.hu $vr0, $vr0, 0
+; LA32-NEXT: vsllwil.du.wu $vr0, $vr0, 0
+; LA32-NEXT: vst $vr0, $a1, 0
+; LA32-NEXT: vst $vr3, $a1, 48
+; LA32-NEXT: vst $vr2, $a1, 32
+; LA32-NEXT: vst $vr1, $a1, 16
; LA32-NEXT: ret
;
; LA64-LABEL: load_zext_8i8_to_8i64:
; LA64: # %bb.0: # %entry
; LA64-NEXT: ld.d $a0, $a0, 0
; LA64-NEXT: vinsgr2vr.d $vr0, $a0, 0
-; LA64-NEXT: vrepli.b $vr1, 0
-; LA64-NEXT: vilvl.b $vr0, $vr1, $vr0
-; LA64-NEXT: vilvl.h $vr2, $vr1, $vr0
-; LA64-NEXT: vilvl.w $vr3, $vr1, $vr2
-; LA64-NEXT: vilvh.w $vr2, $vr1, $vr2
-; LA64-NEXT: vilvh.h $vr0, $vr1, $vr0
-; LA64-NEXT: vilvl.w $vr4, $vr1, $vr0
-; LA64-NEXT: vilvh.w $vr0, $vr1, $vr0
-; LA64-NEXT: vst $vr0, $a1, 48
-; LA64-NEXT: vst $vr4, $a1, 32
-; LA64-NEXT: vst $vr2, $a1, 16
-; LA64-NEXT: vst $vr3, $a1, 0
+; LA64-NEXT: vshuf4i.b $vr1, $vr0, 14
+; LA64-NEXT: vsllwil.hu.bu $vr1, $vr1, 0
+; LA64-NEXT: vsllwil.wu.hu $vr1, $vr1, 0
+; LA64-NEXT: vsllwil.du.wu $vr1, $vr1, 0
+; LA64-NEXT: vsrli.d $vr2, $vr0, 32
+; LA64-NEXT: vsllwil.hu.bu $vr2, $vr2, 0
+; LA64-NEXT: vsllwil.wu.hu $vr2, $vr2, 0
+; LA64-NEXT: vsllwil.du.wu $vr2, $vr2, 0
+; LA64-NEXT: vsrli.d $vr3, $vr0, 48
+; LA64-NEXT: vsllwil.hu.bu $vr3, $vr3, 0
+; LA64-NEXT: vsllwil.wu.hu $vr3, $vr3, 0
+; LA64-NEXT: vsllwil.du.wu $vr3, $vr3, 0
+; LA64-NEXT: vsllwil.hu.bu $vr0, $vr0, 0
+; LA64-NEXT: vsllwil.wu.hu $vr0, $vr0, 0
+; LA64-NEXT: vsllwil.du.wu $vr0, $vr0, 0
+; LA64-NEXT: vst $vr0, $a1, 0
+; LA64-NEXT: vst $vr3, $a1, 48
+; LA64-NEXT: vst $vr2, $a1, 32
+; LA64-NEXT: vst $vr1, $a1, 16
; LA64-NEXT: ret
entry:
%A = load <8 x i8>, ptr %ptr
@@ -288,11 +296,11 @@ define void @load_zext_8i16_to_8i32(ptr %ptr, ptr %dst) {
; CHECK-LABEL: load_zext_8i16_to_8i32:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vld $vr0, $a0, 0
-; CHECK-NEXT: vrepli.b $vr1, 0
-; CHECK-NEXT: vilvl.h $vr2, $vr1, $vr0
-; CHECK-NEXT: vilvh.h $vr0, $vr1, $vr0
-; CHECK-NEXT: vst $vr0, $a1, 16
-; CHECK-NEXT: vst $vr2, $a1, 0
+; CHECK-NEXT: vbsrl.v $vr1, $vr0, 8
+; CHECK-NEXT: vsllwil.wu.hu $vr1, $vr1, 0
+; CHECK-NEXT: vsllwil.wu.hu $vr0, $vr0, 0
+; CHECK-NEXT: vst $vr0, $a1, 0
+; CHECK-NEXT: vst $vr1, $a1, 16
; CHECK-NEXT: ret
entry:
%A = load <8 x i16>, ptr %ptr
@@ -305,17 +313,21 @@ define void @load_zext_8i16_to_8i64(ptr %ptr, ptr %dst) {
; CHECK-LABEL: load_zext_8i16_to_8i64:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vld $vr0, $a0, 0
-; CHECK-NEXT: vrepli.b $vr1, 0
-; CHECK-NEXT: vilvl.h $vr2, $vr1, $vr0
-; CHECK-NEXT: vilvl.w $vr3, $vr1, $vr2
-; CHECK-NEXT: vilvh.w $vr2, $vr1, $vr2
-; CHECK-NEXT: vilvh.h $vr0, $vr1, $vr0
-; CHECK-NEXT: vilvl.w $vr4, $vr1, $vr0
-; CHECK-NEXT: vilvh.w $vr0, $vr1, $vr0
-; CHECK-NEXT: vst $vr0, $a1, 48
-; CHECK-NEXT: vst $vr4, $a1, 32
-; CHECK-NEXT: vst $vr2, $a1, 16
-; CHECK-NEXT: vst $vr3, $a1, 0
+; CHECK-NEXT: vshuf4i.h $vr1, $vr0, 14
+; CHECK-NEXT: vsllwil.wu.hu $vr1, $vr1, 0
+; CHECK-NEXT: vsllwil.du.wu $vr1, $vr1, 0
+; CHECK-NEXT: vbsrl.v $vr2, $vr0, 8
+; CHECK-NEXT: vsllwil.wu.hu $vr2, $vr2, 0
+; CHECK-NEXT: vsllwil.du.wu $vr2, $vr2, 0
+; CHECK-NEXT: vbsrl.v $vr3, $vr0, 12
+; CHECK-NEXT: vsllwil.wu.hu $vr3, $vr3, 0
+; CHECK-NEXT: vsllwil.du.wu $vr3, $vr3, 0
+; CHECK-NEXT: vsllwil.wu.hu $vr0, $vr0, 0
+; CHECK-NEXT: vsllwil.du.wu $vr0, $vr0, 0
+; CHECK-NEXT: vst $vr0, $a1, 0
+; CHECK-NEXT: vst $vr3, $a1, 48
+; CHECK-NEXT: vst $vr2, $a1, 32
+; CHECK-NEXT: vst $vr1, $a1, 16
; CHECK-NEXT: ret
entry:
%A = load <8 x i16>, ptr %ptr
@@ -329,15 +341,16 @@ define void @load_zext_8i32_to_8i64(ptr %ptr, ptr %dst) {
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vld $vr0, $a0, 0
; CHECK-NEXT: vld $vr1, $a0, 16
-; CHECK-NEXT: vrepli.b $vr2, 0
-; CHECK-NEXT: vilvl.w $vr3, $vr2, $vr0
-; CHECK-NEXT: vilvh.w $vr0, $vr2, $vr0
-; CHECK-NEXT: vilvl.w $vr4, $vr2, $vr1
-; CHECK-NEXT: vilvh.w $vr1, $vr2, $vr1
-; CHECK-NEXT: vst $vr1, $a1, 48
-; CHECK-NEXT: vst $vr4, $a1, 32
-; CHECK-NEXT: vst $vr0, $a1, 16
-; CHECK-NEXT: vst $vr3, $a1, 0
+; CHECK-NEXT: vshuf4i.w $vr2, $vr0, 14
+; CHECK-NEXT: vsllwil.du.wu $vr2, $vr2, 0
+; CHECK-NEXT: vshuf4i.w $vr3, $vr1, 14
+; CHECK-NEXT: vsllwil.du.wu $vr3, $vr3, 0
+; CHECK-NEXT: vsllwil.du.wu $vr0, $vr0, 0
+; CHECK-NEXT: vsllwil.du.wu $vr1, $vr1, 0
+; CHECK-NEXT: vst $vr1, $a1, 32
+; CHECK-NEXT: vst $vr0, $a1, 0
+; CHECK-NEXT: vst $vr3, $a1, 48
+; CHECK-NEXT: vst $vr2, $a1, 16
; CHECK-NEXT: ret
entry:
%A = load <8 x i32>, ptr %ptr
@@ -350,11 +363,11 @@ define void @load_zext_16i8_to_16i16(ptr %ptr, ptr %dst) {
; CHECK-LABEL: load_zext_16i8_to_16i16:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vld $vr0, $a0, 0
-; CHECK-NEXT: vrepli.b $vr1, 0
-; CHECK-NEXT: vilvl.b $vr2, $vr1, $vr0
-; CHECK-NEXT: vilvh.b $vr0, $vr1, $vr0
-; CHECK-NEXT: vst $vr0, $a1, 16
-; CHECK-NEXT: vst $vr2, $a1, 0
+; CHECK-NEXT: vbsrl.v $vr1, $vr0, 8
+; CHECK-NEXT: vsllwil.hu.bu $vr1, $vr1, 0
+; CHECK-NEXT: vsllwil.hu.bu $vr0, $vr0, 0
+; CHECK-NEXT: vst $vr0, $a1, 0
+; CHECK-NEXT: vst $vr1, $a1, 16
; CHECK-NEXT: ret
entry:
%A = load <16 x i8>, ptr %ptr
@@ -367,17 +380,21 @@ define void @load_zext_16i8_to_16i32(ptr %ptr, ptr %dst) {
; CHECK-LABEL: load_zext_16i8_to_16i32:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vld $vr0, $a0, 0
-; CHECK-NEXT: vrepli.b $vr1, 0
-; CHECK-NEXT: vilvl.b $vr2, $vr1, $vr0
-; CHECK-NEXT: vilvl.h $vr3, $vr1, $vr2
-; CHECK-NEXT: vilvh.h $vr2, $vr1, $vr2
-; CHECK-NEXT: vilvh.b $vr0, $vr1, $vr0
-; CHECK-NEXT: vilvl.h $vr4, $vr1, $vr0
-; CHECK-NEXT: vilvh.h $vr0, $vr1, $vr0
-; CHECK-NEXT: vst $vr0, $a1, 48
-; CHECK-NEXT: vst $vr4, $a1, 32
-; CHECK-NEXT: vst $vr2, $a1, 16
-; CHECK-NEXT: vst $vr3, $a1, 0
+; CHECK-NEXT: vsrli.d $vr1, $vr0, 32
+; CHECK-NEXT: vsllwil.hu.bu $vr1, $vr1, 0
+; CHECK-NEXT: vsllwil.wu.hu $vr1, $vr1, 0
+; CHECK-NEXT: vbsrl.v $vr2, $vr0, 8
+; CHECK-NEXT: vsllwil.hu.bu $vr2, $vr2, 0
+; CHECK-NEXT: vsllwil.wu.hu $vr2, $vr2, 0
+; CHECK-NEXT: vbsrl.v $vr3, $vr0, 12
+; CHECK-NEXT: vsllwil.hu.bu $vr3, $vr3, 0
+; CHECK-NEXT: vsllwil.wu.hu $vr3, $vr3, 0
+; CHECK-NEXT: vsllwil.hu.bu $vr0, $vr0, 0
+; CHECK-NEXT: vsllwil.wu.hu $vr0, $vr0, 0
+; CHECK-NEXT: vst $vr0, $a1, 0
+; CHECK-NEXT: vst $vr3, $a1, 48
+; CHECK-NEXT: vst $vr2, $a1, 32
+; CHECK-NEXT: vst $vr1, $a1, 16
; CHECK-NEXT: ret
entry:
%A = load <16 x i8>, ptr %ptr
@@ -390,29 +407,45 @@ define void @load_zext_16i8_to_16i64(ptr %ptr, ptr %dst) {
; CHECK-LABEL: load_zext_16i8_to_16i64:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vld $vr0, $a0, 0
-; CHECK-NEXT: vrepli.b $vr1, 0
-; CHECK-NEXT: vilvl.b $vr2, $vr1, $vr0
-; CHECK-NEXT: vilvl.h $vr3, $vr1, $vr2
-; CHECK-NEXT: vilvl.w $vr4, $vr1, $vr3
-; CHECK-NEXT: vilvh.w $vr3, $vr1, $vr3
-; CHECK-NEXT: vilvh.h $vr2, $vr1, $vr2
-; CHECK-NEXT: vilvl.w $vr5, $vr1, $vr2
-; CHECK-NEXT: vilvh.w $vr2, $vr1, $vr2
-; CHECK-NEXT: vilvh.b $vr0, $vr1, $vr0
-; CHECK-NEXT: vilvl.h $vr6, $vr1, $vr0
-; CHECK-NEXT: vilvl.w $vr7, $vr1, $vr6
-; CHECK-NEXT: vilvh.w $vr6, $vr1, $vr6
-; CHECK-NEXT: vilvh.h $vr0, $vr1, $vr0
-; CHECK-NEXT: vilvl.w $vr8, $vr1, $vr0
-; CHECK-NEXT: vilvh.w $vr0, $vr1, $vr0
-; CHECK-NEXT: vst $vr0, $a1, 112
-; CHECK-NEXT: vst $vr8, $a1, 96
-; CHECK-NEXT: vst $vr6, $a1, 80
-; CHECK-NEXT: vst $vr7, $a1, 64
-; CHECK-NEXT: vst $vr2, $a1, 48
-; CHECK-NEXT: vst $vr5, $a1, 32
-; CHECK-NEXT: vst $vr3, $a1, 16
-; CHECK-NEXT: vst $vr4, $a1, 0
+; CHECK-NEXT: vshuf4i.b $vr1, $vr0, 14
+; CHECK-NEXT: vsllwil.hu.bu $vr1, $vr1, 0
+; CHECK-NEXT: vsllwil.wu.hu $vr1, $vr1, 0
+; CHECK-NEXT: vsllwil.du.wu $vr1, $vr1, 0
+; CHECK-NEXT: vsrli.d $vr2, $vr0, 32
+; CHECK-NEXT: vsllwil.hu.bu $vr2, $vr2, 0
+; CHECK-NEXT: vsllwil.wu.hu $vr2, $vr2, 0
+; CHECK-NEXT: vsllwil.du.wu $vr2, $vr2, 0
+; CHECK-NEXT: vsrli.d $vr3, $vr0, 48
+; CHECK-NEXT: vsllwil.hu.bu $vr3, $vr3, 0
+; CHECK-NEXT: vsllwil.wu.hu $vr3, $vr3, 0
+; CHECK-NEXT: vsllwil.du.wu $vr3, $vr3, 0
+; CHECK-NEXT: vbsrl.v $vr4, $vr0, 8
+; CHECK-NEXT: vsllwil.hu.bu $vr4, $vr4, 0
+; CHECK-NEXT: vsllwil.wu.hu $vr4, $vr4, 0
+; CHECK-NEXT: vsllwil.du.wu $vr4, $vr4, 0
+; CHECK-NEXT: vbsrl.v $vr5, $vr0, 10
+; CHECK-NEXT: vsllwil.hu.bu $vr5, $vr5, 0
+; CHECK-NEXT: vsllwil.wu.hu $vr5, $vr5, 0
+; CHECK-NEXT: vsllwil.du.wu $vr5, $vr5, 0
+; CHECK-NEXT: vbsrl.v $vr6, $vr0, 12
+; CHECK-NEXT: vsllwil.hu.bu $vr6, $vr6, 0
+; CHECK-NEXT: vsllwil.wu.hu $vr6, $vr6, 0
+; CHECK-NEXT: vsllwil.du.wu $vr6, $vr6, 0
+; CHECK-NEXT: vbsrl.v $vr7, $vr0, 14
+; CHECK-NEXT: vsllwil.hu.bu $vr7, $vr7, 0
+; CHECK-NEXT: vsllwil.wu.hu $vr7, $vr7, 0
+; CHECK-NEXT: vsllwil.du.wu $vr7, $vr7, 0
+; CHECK-NEXT: vsllwil.hu.bu $vr0, $vr0, 0
+; CHECK-NEXT: vsllwil.wu.hu $vr0, $vr0, 0
+; CHECK-NEXT: vsllwil.du.wu $vr0, $vr0, 0
+; CHECK-NEXT: vst $vr0, $a1, 0
+; CHECK-NEXT: vst $vr7, $a1, 112
+; CHECK-NEXT: vst $vr6, $a1, 96
+; CHECK-NEXT: vst $vr5, $a1, 80
+; CHECK-NEXT: vst $vr4, $a1, 64
+; CHECK-NEXT: vst $vr3, $a1, 48
+; CHECK-NEXT: vst $vr2, $a1, 32
+; CHECK-NEXT: vst $vr1, $a1, 16
; CHECK-NEXT: ret
entry:
%A = load <16 x i8>, ptr %ptr
@@ -426,15 +459,16 @@ define void @load_zext_16i16_to_16i32(ptr %ptr, ptr %dst) {
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vld $vr0, $a0, 0
; CHECK-NEXT: vld $vr1, $a0, 16
-; CHECK-NEXT: vrepli.b $vr2, 0
-; CHECK-NEXT: vilvl.h $vr3, $vr2, $vr0
-; CHECK-NEXT: vilvh.h $vr0, $vr2, $vr0
-; CHECK-NEXT: vilvl.h $vr4, $vr2, $vr1
-; CHECK-NEXT: vilvh.h $vr1, $vr2, $vr1
-; CHECK-NEXT: vst $vr1, $a1, 48
-; CHECK-NEXT: vst $vr4, $a1, 32
-; CHECK-NEXT: vst $vr0, $a1, 16
-; CHECK-NEXT: vst $vr3, $a1, 0
+; CHECK-NEXT: vbsrl.v $vr2, $vr0, 8
+; CHECK-NEXT: vsllwil.wu.hu $vr2, $vr2, 0
+; CHECK-NEXT: vbsrl.v $vr3, $vr1, 8
+; CHECK-NEXT: vsllwil.wu.hu $vr3, $vr3, 0
+; CHECK-NEXT: vsllwil.wu.hu $vr0, $vr0, 0
+; CHECK-NEXT: vsllwil.wu.hu $vr1, $vr1, 0
+; CHECK-NEXT: vst $vr1, $a1, 32
+; CHECK-NEXT: vst $vr0, $a1, 0
+; CHECK-NEXT: vst $vr3, $a1, 48
+; CHECK-NEXT: vst $vr2, $a1, 16
; CHECK-NEXT: ret
entry:
%A = load <16 x i16>, ptr %ptr
@@ -448,27 +482,36 @@ define void @load_zext_16i16_to_16i64(ptr %ptr, ptr %dst) {
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vld $vr0, $a0, 0
; CHECK-NEXT: vld $vr1, $a0, 16
-; CHECK-NEXT: vrepli.b $vr2, 0
-; CHECK-NEXT: vilvl.h $vr3, $vr2, $vr0
-; CHECK-NEXT: vilvl.w $vr4, $vr2, $vr3
-; CHECK-NEXT: vilvh.w $vr3, $vr2, $vr3
-; CHECK-NEXT: vilvh.h $vr0, $vr2, $vr0
-; CHECK-NEXT: vilvl.w $vr5, $vr2, $vr0
-; CHECK-NEXT: vilvh.w $vr0, $vr2, $vr0
-; CHECK-NEXT: vilvl.h $vr6, $vr2, $vr1
-; CHECK-NEXT: vilvl.w $vr7, $vr2, $vr6
-; CHECK-NEXT: vilvh.w $vr6, $vr2, $vr6
-; CHECK-NEXT: vilvh.h $vr1, $vr2, $vr1
-; CHECK-NEXT: vilvl.w $vr8, $vr2, $vr1
-; CHECK-NEXT: vilvh.w $vr1, $vr2, $vr1
-; CHECK-NEXT: vst $vr1, $a1, 112
-; CHECK-NEXT: vst $vr8, $a1, 96
-; CHECK-NEXT: vst $vr6, $a1, 80
-; CHECK-NEXT: vst $vr7, $a1, 64
-; CHECK-NEXT: vst $vr0, $a1, 48
-; CHECK-NEXT: vst $vr5, $a1, 32
-; CHECK-NEXT: vst $vr3, $a1, 16
-; CHECK-NEXT: vst $vr4, $a1, 0
+; CHECK-NEXT: vshuf4i.h $vr2, $vr0, 14
+; CHECK-NEXT: vsllwil.wu.hu $vr2, $vr2, 0
+; CHECK-NEXT: vsllwil.du.wu $vr2, $vr2, 0
+; CHECK-NEXT: vbsrl.v $vr3, $vr0, 8
+; CHECK-NEXT: vsllwil.wu.hu $vr3, $vr3, 0
+; CHECK-NEXT: vsllwil.du.wu $vr3, $vr3, 0
+; CHECK-NEXT: vbsrl.v $vr4, $vr0, 12
+; CHECK-NEXT: vsllwil.wu.hu $vr4, $vr4, 0
+; CHECK-NEXT: vsllwil.du.wu $vr4, $vr4, 0
+; CHECK-NEXT: vshuf4i.h $vr5, $vr1, 14
+; CHECK-NEXT: vsllwil.wu.hu $vr5, $vr5, 0
+; CHECK-NEXT: vsllwil.du.wu $vr5, $vr5, 0
+; CHECK-NEXT: vbsrl.v $vr6, $vr1, 8
+; CHECK-NEXT: vsllwil.wu.hu $vr6, $vr6, 0
+; CHECK-NEXT: vsllwil.du.wu $vr6, $vr6, 0
+; CHECK-NEXT: vbsrl.v $vr7, $vr1, 12
+; CHECK-NEXT: vsllwil.wu.hu $vr7, $vr7, 0
+; CHECK-NEXT: vsllwil.du.wu $vr7, $vr7, 0
+; CHECK-NEXT: vsllwil.wu.hu $vr0, $vr0, 0
+; CHECK-NEXT: vsllwil.du.wu $vr0, $vr0, 0
+; CHECK-NEXT: vsllwil.wu.hu $vr1, $vr1, 0
+; CHECK-NEXT: vsllwil.du.wu $vr1, $vr1, 0
+; CHECK-NEXT: vst $vr1, $a1, 64
+; CHECK-NEXT: vst $vr0, $a1, 0
+; CHECK-NEXT: vst $vr7, $a1, 112
+; CHECK-NEXT: vst $vr6, $a1, 96
+; CHECK-NEXT: vst $vr5, $a1, 80
+; CHECK-NEXT: vst $vr4, $a1, 48
+; CHECK-NEXT: vst $vr3, $a1, 32
+; CHECK-NEXT: vst $vr2, $a1, 16
; CHECK-NEXT: ret
entry:
%A = load <16 x i16>, ptr %ptr
diff --git a/llvm/test/CodeGen/LoongArch/lsx/vmskcond.ll b/llvm/test/CodeGen/LoongArch/lsx/vmskcond.ll
index 8bdeebe..57b382a 100644
--- a/llvm/test/CodeGen/LoongArch/lsx/vmskcond.ll
+++ b/llvm/test/CodeGen/LoongArch/lsx/vmskcond.ll
@@ -178,10 +178,9 @@ define i2 @vmsk_sgt_v2i8(<2 x i8> %a, <2 x i8> %b) {
; CHECK-LABEL: vmsk_sgt_v2i8:
; CHECK: # %bb.0:
; CHECK-NEXT: vslt.b $vr0, $vr1, $vr0
-; CHECK-NEXT: vilvl.b $vr0, $vr0, $vr0
-; CHECK-NEXT: vilvl.h $vr0, $vr0, $vr0
-; CHECK-NEXT: vilvl.w $vr0, $vr0, $vr0
-; CHECK-NEXT: vslli.d $vr0, $vr0, 56
+; CHECK-NEXT: vsllwil.h.b $vr0, $vr0, 0
+; CHECK-NEXT: vsllwil.w.h $vr0, $vr0, 0
+; CHECK-NEXT: vsllwil.d.w $vr0, $vr0, 0
; CHECK-NEXT: vmskltz.d $vr0, $vr0
; CHECK-NEXT: vpickve2gr.hu $a0, $vr0, 0
; CHECK-NEXT: ret
@@ -194,9 +193,8 @@ define i2 @vmsk_sgt_v2i16(<2 x i16> %a, <2 x i16> %b) {
; CHECK-LABEL: vmsk_sgt_v2i16:
; CHECK: # %bb.0:
; CHECK-NEXT: vslt.h $vr0, $vr1, $vr0
-; CHECK-NEXT: vilvl.h $vr0, $vr0, $vr0
-; CHECK-NEXT: vilvl.w $vr0, $vr0, $vr0
-; CHECK-NEXT: vslli.d $vr0, $vr0, 48
+; CHECK-NEXT: vsllwil.w.h $vr0, $vr0, 0
+; CHECK-NEXT: vsllwil.d.w $vr0, $vr0, 0
; CHECK-NEXT: vmskltz.d $vr0, $vr0
; CHECK-NEXT: vpickve2gr.hu $a0, $vr0, 0
; CHECK-NEXT: ret
@@ -209,8 +207,7 @@ define i2 @vmsk_sgt_v2i32(<2 x i32> %a, <2 x i32> %b) {
; CHECK-LABEL: vmsk_sgt_v2i32:
; CHECK: # %bb.0:
; CHECK-NEXT: vslt.w $vr0, $vr1, $vr0
-; CHECK-NEXT: vshuf4i.w $vr0, $vr0, 16
-; CHECK-NEXT: vslli.d $vr0, $vr0, 32
+; CHECK-NEXT: vsllwil.d.w $vr0, $vr0, 0
; CHECK-NEXT: vmskltz.d $vr0, $vr0
; CHECK-NEXT: vpickve2gr.hu $a0, $vr0, 0
; CHECK-NEXT: ret
@@ -247,9 +244,8 @@ define i4 @vmsk_sgt_v4i8(<4 x i8> %a, <4 x i8> %b) {
; CHECK-LABEL: vmsk_sgt_v4i8:
; CHECK: # %bb.0:
; CHECK-NEXT: vslt.b $vr0, $vr1, $vr0
-; CHECK-NEXT: vilvl.b $vr0, $vr0, $vr0
-; CHECK-NEXT: vilvl.h $vr0, $vr0, $vr0
-; CHECK-NEXT: vslli.w $vr0, $vr0, 24
+; CHECK-NEXT: vsllwil.h.b $vr0, $vr0, 0
+; CHECK-NEXT: vsllwil.w.h $vr0, $vr0, 0
; CHECK-NEXT: vmskltz.w $vr0, $vr0
; CHECK-NEXT: vpickve2gr.hu $a0, $vr0, 0
; CHECK-NEXT: ret
@@ -262,8 +258,7 @@ define i4 @vmsk_sgt_v4i16(<4 x i16> %a, <4 x i16> %b) {
; CHECK-LABEL: vmsk_sgt_v4i16:
; CHECK: # %bb.0:
; CHECK-NEXT: vslt.h $vr0, $vr1, $vr0
-; CHECK-NEXT: vilvl.h $vr0, $vr0, $vr0
-; CHECK-NEXT: vslli.w $vr0, $vr0, 16
+; CHECK-NEXT: vsllwil.w.h $vr0, $vr0, 0
; CHECK-NEXT: vmskltz.w $vr0, $vr0
; CHECK-NEXT: vpickve2gr.hu $a0, $vr0, 0
; CHECK-NEXT: ret
@@ -300,8 +295,7 @@ define i8 @vmsk_sgt_v8i8(<8 x i8> %a, <8 x i8> %b) {
; CHECK-LABEL: vmsk_sgt_v8i8:
; CHECK: # %bb.0:
; CHECK-NEXT: vslt.b $vr0, $vr1, $vr0
-; CHECK-NEXT: vilvl.b $vr0, $vr0, $vr0
-; CHECK-NEXT: vslli.h $vr0, $vr0, 8
+; CHECK-NEXT: vsllwil.h.b $vr0, $vr0, 0
; CHECK-NEXT: vmskltz.h $vr0, $vr0
; CHECK-NEXT: vpickve2gr.hu $a0, $vr0, 0
; CHECK-NEXT: ret
@@ -340,10 +334,9 @@ define i2 @vmsk_sgt_and_sgt_v2i8(<2 x i8> %a, <2 x i8> %b, <2 x i8> %c, <2 x i8>
; CHECK-NEXT: vslt.b $vr0, $vr1, $vr0
; CHECK-NEXT: vslt.b $vr1, $vr3, $vr2
; CHECK-NEXT: vand.v $vr0, $vr0, $vr1
-; CHECK-NEXT: vilvl.b $vr0, $vr0, $vr0
-; CHECK-NEXT: vilvl.h $vr0, $vr0, $vr0
-; CHECK-NEXT: vilvl.w $vr0, $vr0, $vr0
-; CHECK-NEXT: vslli.d $vr0, $vr0, 56
+; CHECK-NEXT: vsllwil.h.b $vr0, $vr0, 0
+; CHECK-NEXT: vsllwil.w.h $vr0, $vr0, 0
+; CHECK-NEXT: vsllwil.d.w $vr0, $vr0, 0
; CHECK-NEXT: vmskltz.d $vr0, $vr0
; CHECK-NEXT: vpickve2gr.hu $a0, $vr0, 0
; CHECK-NEXT: ret
@@ -360,9 +353,8 @@ define i2 @vmsk_sgt_and_sgt_v2i16(<2 x i16> %a, <2 x i16> %b, <2 x i16> %c, <2 x
; CHECK-NEXT: vslt.h $vr0, $vr1, $vr0
; CHECK-NEXT: vslt.h $vr1, $vr3, $vr2
; CHECK-NEXT: vand.v $vr0, $vr0, $vr1
-; CHECK-NEXT: vilvl.h $vr0, $vr0, $vr0
-; CHECK-NEXT: vilvl.w $vr0, $vr0, $vr0
-; CHECK-NEXT: vslli.d $vr0, $vr0, 48
+; CHECK-NEXT: vsllwil.w.h $vr0, $vr0, 0
+; CHECK-NEXT: vsllwil.d.w $vr0, $vr0, 0
; CHECK-NEXT: vmskltz.d $vr0, $vr0
; CHECK-NEXT: vpickve2gr.hu $a0, $vr0, 0
; CHECK-NEXT: ret
@@ -379,8 +371,7 @@ define i2 @vmsk_sgt_and_sgt_v2i32(<2 x i32> %a, <2 x i32> %b, <2 x i32> %c, <2 x
; CHECK-NEXT: vslt.w $vr0, $vr1, $vr0
; CHECK-NEXT: vslt.w $vr1, $vr3, $vr2
; CHECK-NEXT: vand.v $vr0, $vr0, $vr1
-; CHECK-NEXT: vshuf4i.w $vr0, $vr0, 16
-; CHECK-NEXT: vslli.d $vr0, $vr0, 32
+; CHECK-NEXT: vsllwil.d.w $vr0, $vr0, 0
; CHECK-NEXT: vmskltz.d $vr0, $vr0
; CHECK-NEXT: vpickve2gr.hu $a0, $vr0, 0
; CHECK-NEXT: ret
@@ -429,9 +420,8 @@ define i4 @vmsk_sgt_and_sgt_v4i8(<4 x i8> %a, <4 x i8> %b, <4 x i8> %c, <4 x i8>
; CHECK-NEXT: vslt.b $vr0, $vr1, $vr0
; CHECK-NEXT: vslt.b $vr1, $vr3, $vr2
; CHECK-NEXT: vand.v $vr0, $vr0, $vr1
-; CHECK-NEXT: vilvl.b $vr0, $vr0, $vr0
-; CHECK-NEXT: vilvl.h $vr0, $vr0, $vr0
-; CHECK-NEXT: vslli.w $vr0, $vr0, 24
+; CHECK-NEXT: vsllwil.h.b $vr0, $vr0, 0
+; CHECK-NEXT: vsllwil.w.h $vr0, $vr0, 0
; CHECK-NEXT: vmskltz.w $vr0, $vr0
; CHECK-NEXT: vpickve2gr.hu $a0, $vr0, 0
; CHECK-NEXT: ret
@@ -448,8 +438,7 @@ define i4 @vmsk_sgt_and_sgt_v4i16(<4 x i16> %a, <4 x i16> %b, <4 x i16> %c, <4 x
; CHECK-NEXT: vslt.h $vr0, $vr1, $vr0
; CHECK-NEXT: vslt.h $vr1, $vr3, $vr2
; CHECK-NEXT: vand.v $vr0, $vr0, $vr1
-; CHECK-NEXT: vilvl.h $vr0, $vr0, $vr0
-; CHECK-NEXT: vslli.w $vr0, $vr0, 16
+; CHECK-NEXT: vsllwil.w.h $vr0, $vr0, 0
; CHECK-NEXT: vmskltz.w $vr0, $vr0
; CHECK-NEXT: vpickve2gr.hu $a0, $vr0, 0
; CHECK-NEXT: ret
@@ -498,8 +487,7 @@ define i8 @vmsk_sgt_and_sgt_v8i8(<8 x i8> %a, <8 x i8> %b, <8 x i8> %c, <8 x i8>
; CHECK-NEXT: vslt.b $vr0, $vr1, $vr0
; CHECK-NEXT: vslt.b $vr1, $vr3, $vr2
; CHECK-NEXT: vand.v $vr0, $vr0, $vr1
-; CHECK-NEXT: vilvl.b $vr0, $vr0, $vr0
-; CHECK-NEXT: vslli.h $vr0, $vr0, 8
+; CHECK-NEXT: vsllwil.h.b $vr0, $vr0, 0
; CHECK-NEXT: vmskltz.h $vr0, $vr0
; CHECK-NEXT: vpickve2gr.hu $a0, $vr0, 0
; CHECK-NEXT: ret
@@ -594,9 +582,8 @@ define i4 @vmsk_eq_allzeros_v4i8(<4 x i8> %a) {
; CHECK-LABEL: vmsk_eq_allzeros_v4i8:
; CHECK: # %bb.0:
; CHECK-NEXT: vseqi.b $vr0, $vr0, 0
-; CHECK-NEXT: vilvl.b $vr0, $vr0, $vr0
-; CHECK-NEXT: vilvl.h $vr0, $vr0, $vr0
-; CHECK-NEXT: vslli.w $vr0, $vr0, 24
+; CHECK-NEXT: vsllwil.h.b $vr0, $vr0, 0
+; CHECK-NEXT: vsllwil.w.h $vr0, $vr0, 0
; CHECK-NEXT: vmskltz.w $vr0, $vr0
; CHECK-NEXT: vpickve2gr.hu $a0, $vr0, 0
; CHECK-NEXT: ret