diff options
author | WANG Rui <wangrui@loongson.cn> | 2025-09-26 09:16:51 +0800 |
---|---|---|
committer | WANG Rui <wangrui@loongson.cn> | 2025-09-26 09:16:51 +0800 |
commit | ee29dd4f25dda7fcdff963053775279dd61d75ec (patch) | |
tree | 60ad5970d8ebf3004be1bf00dc2d4560387df928 | |
parent | aec52219a8b7c60e8d2dff2440b5c4c44596b377 (diff) | |
download | llvm-users/hev/opt-vec-ext.zip llvm-users/hev/opt-vec-ext.tar.gz llvm-users/hev/opt-vec-ext.tar.bz2 |
[LoongArch] Introduce instruction patterns for vector sign/zero extensionsusers/hev/opt-vec-ext
This patch introduces legalization and instruction patterns for vector
sign and zero extension operations.
-rw-r--r-- | llvm/lib/Target/LoongArch/LoongArch.td | 1 | ||||
-rw-r--r-- | llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp | 13 | ||||
-rw-r--r-- | llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td | 60 | ||||
-rw-r--r-- | llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td | 26 | ||||
-rw-r--r-- | llvm/test/CodeGen/LoongArch/lasx/vec-sext.ll | 899 | ||||
-rw-r--r-- | llvm/test/CodeGen/LoongArch/lasx/vec-zext.ll | 1047 | ||||
-rw-r--r-- | llvm/test/CodeGen/LoongArch/lsx/vec-sext.ll | 486 | ||||
-rw-r--r-- | llvm/test/CodeGen/LoongArch/lsx/vec-shuffle-sign-ext.ll | 36 | ||||
-rw-r--r-- | llvm/test/CodeGen/LoongArch/lsx/vec-zext.ll | 399 | ||||
-rw-r--r-- | llvm/test/CodeGen/LoongArch/lsx/vmskcond.ll | 57 |
10 files changed, 785 insertions, 2239 deletions
diff --git a/llvm/lib/Target/LoongArch/LoongArch.td b/llvm/lib/Target/LoongArch/LoongArch.td index 6497ff9..62e837a 100644 --- a/llvm/lib/Target/LoongArch/LoongArch.td +++ b/llvm/lib/Target/LoongArch/LoongArch.td @@ -59,6 +59,7 @@ def FeatureExtLSX : SubtargetFeature<"lsx", "HasExtLSX", "true", "'LSX' (Loongson SIMD Extension)", [FeatureBasicD]>; def HasExtLSX : Predicate<"Subtarget->hasExtLSX()">; +def IsExtLSX : Predicate<"Subtarget->hasExtLSX() && !Subtarget->hasExtLASX()">; // Loongson Advanced SIMD eXtension (LASX) def FeatureExtLASX diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp index 801e557..104b315 100644 --- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp +++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp @@ -385,6 +385,10 @@ LoongArchTargetLowering::LoongArchTargetLowering(const TargetMachine &TM, setOperationAction(ISD::VECREDUCE_UMAX, VT, Custom); setOperationAction(ISD::VECREDUCE_UMIN, VT, Custom); } + for (MVT VT : {MVT::v2i64, MVT::v4i32, MVT::v8i16}) { + setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, VT, Legal); + setOperationAction(ISD::ZERO_EXTEND_VECTOR_INREG, VT, Legal); + } } // Set operations for 'LASX' feature. @@ -446,6 +450,15 @@ LoongArchTargetLowering::LoongArchTargetLowering(const TargetMachine &TM, VT, Expand); setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Legal); } + for (MVT VT : {MVT::v4i64, MVT::v8i32, MVT::v16i16}) { + setOperationAction(ISD::SIGN_EXTEND, VT, Legal); + setOperationAction(ISD::ZERO_EXTEND, VT, Legal); + } + for (MVT VT : + {MVT::v2i64, MVT::v4i32, MVT::v4i64, MVT::v8i16, MVT::v8i32}) { + setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, VT, Legal); + setOperationAction(ISD::ZERO_EXTEND_VECTOR_INREG, VT, Legal); + } } // Set DAG combine for LA32 and LA64. diff --git a/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td index adfe990..b338946 100644 --- a/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td +++ b/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td @@ -2063,6 +2063,66 @@ defm : subvector_subreg_lowering<LSX128, v2f64, LASX256, v4f64, 2, sub_128>; defm : subvector_subreg_lowering<LSX128, v8i16, LASX256, v16i16, 8, sub_128>; defm : subvector_subreg_lowering<LSX128, v16i8, LASX256, v32i8, 16, sub_128>; +// Sign extensions +def : Pat<(v4i64 (sext v4i32:$vj)), + (v4i64 (VEXT2XV_D_W (SUBREG_TO_REG (i64 0), v4i32:$vj, sub_128)))>; +def : Pat<(v8i32 (sext v8i16:$vj)), + (v8i32 (VEXT2XV_W_H (SUBREG_TO_REG (i64 0), v8i16:$vj, sub_128)))>; +def : Pat<(v16i16 (sext v16i8:$vj)), + (v16i16 (VEXT2XV_H_B (SUBREG_TO_REG (i64 0), v16i8:$vj, sub_128)))>; + +def : Pat<(v2i64 (sext_invec v16i8:$vj)), + (v2i64 (EXTRACT_SUBREG (VEXT2XV_D_B (SUBREG_TO_REG (i64 0), v16i8:$vj, sub_128)), + sub_128))>; +def : Pat<(v2i64 (sext_invec v8i16:$vj)), + (v2i64 (EXTRACT_SUBREG (VEXT2XV_D_H (SUBREG_TO_REG (i64 0), v8i16:$vj, sub_128)), + sub_128))>; +def : Pat<(v2i64 (sext_invec v4i32:$vj)), + (v2i64 (EXTRACT_SUBREG (VEXT2XV_D_W (SUBREG_TO_REG (i64 0), v4i32:$vj, sub_128)), + sub_128))>; +def : Pat<(v4i32 (sext_invec v16i8:$vj)), + (v4i32 (EXTRACT_SUBREG (VEXT2XV_W_B (SUBREG_TO_REG (i64 0), v16i8:$vj, sub_128)), + sub_128))>; +def : Pat<(v4i32 (sext_invec v8i16:$vj)), + (v4i32 (EXTRACT_SUBREG (VEXT2XV_W_H (SUBREG_TO_REG (i64 0), v8i16:$vj, sub_128)), + sub_128))>; +def : Pat<(v4i64 (sext_invec v32i8:$xj)), (v4i64 (VEXT2XV_D_B v32i8:$xj))>; +def : Pat<(v4i64 (sext_invec v16i16:$xj)), (v4i64 (VEXT2XV_D_H v16i16:$xj))>; +def : Pat<(v8i16 (sext_invec v16i8:$vj)), + (v8i16 (EXTRACT_SUBREG (VEXT2XV_H_B (SUBREG_TO_REG (i64 0), v16i8:$vj, sub_128)), + sub_128))>; +def : Pat<(v8i32 (sext_invec v32i8:$xj)), (v8i32 (VEXT2XV_W_B v32i8:$xj))>; + +// Zero extensions +def : Pat<(v4i64 (zext v4i32:$vj)), + (v4i64 (VEXT2XV_DU_WU (SUBREG_TO_REG (i64 0), v4i32:$vj, sub_128)))>; +def : Pat<(v8i32 (zext v8i16:$vj)), + (v8i32 (VEXT2XV_WU_HU (SUBREG_TO_REG (i64 0), v8i16:$vj, sub_128)))>; +def : Pat<(v16i16 (zext v16i8:$vj)), + (v16i16 (VEXT2XV_HU_BU (SUBREG_TO_REG (i64 0), v16i8:$vj, sub_128)))>; + +def : Pat<(v2i64 (zext_invec v16i8:$vj)), + (v2i64 (EXTRACT_SUBREG (VEXT2XV_DU_BU (SUBREG_TO_REG (i64 0), v16i8:$vj, sub_128)), + sub_128))>; +def : Pat<(v2i64 (zext_invec v8i16:$vj)), + (v2i64 (EXTRACT_SUBREG (VEXT2XV_DU_HU (SUBREG_TO_REG (i64 0), v8i16:$vj, sub_128)), + sub_128))>; +def : Pat<(v2i64 (zext_invec v4i32:$vj)), + (v2i64 (EXTRACT_SUBREG (VEXT2XV_DU_WU (SUBREG_TO_REG (i64 0), v4i32:$vj, sub_128)), + sub_128))>; +def : Pat<(v4i32 (zext_invec v16i8:$vj)), + (v4i32 (EXTRACT_SUBREG (VEXT2XV_WU_BU (SUBREG_TO_REG (i64 0), v16i8:$vj, sub_128)), + sub_128))>; +def : Pat<(v4i32 (zext_invec v8i16:$vj)), + (v4i32 (EXTRACT_SUBREG (VEXT2XV_WU_HU (SUBREG_TO_REG (i64 0), v8i16:$vj, sub_128)), + sub_128))>; +def : Pat<(v4i64 (zext_invec v32i8:$xj)), (v4i64 (VEXT2XV_DU_BU v32i8:$xj))>; +def : Pat<(v4i64 (zext_invec v16i16:$xj)), (v4i64 (VEXT2XV_DU_HU v16i16:$xj))>; +def : Pat<(v8i16 (zext_invec v16i8:$vj)), + (v8i16 (EXTRACT_SUBREG (VEXT2XV_HU_BU (SUBREG_TO_REG (i64 0), v16i8:$vj, sub_128)), + sub_128))>; +def : Pat<(v8i32 (zext_invec v32i8:$xj)), (v8i32 (VEXT2XV_WU_BU v32i8:$xj))>; + } // Predicates = [HasExtLASX] /// Intrinsic pattern diff --git a/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td index b0eb51a..eb1fe93 100644 --- a/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td +++ b/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td @@ -2174,6 +2174,32 @@ def : Pat<(loongarch_vmsknez (v16i8 LSX128:$vj)), (PseudoVMSKNEZ_B LSX128:$vj)>; } // Predicates = [HasExtLSX] +let Predicates = [IsExtLSX] in { + +// Sign extensions +def : Pat<(v2i64 (sext_invec v16i8:$vj)), + (v2i64 (VSLLWIL_D_W (VSLLWIL_W_H (VSLLWIL_H_B v16i8:$vj, 0), 0), 0))>; +def : Pat<(v2i64 (sext_invec v8i16:$vj)), + (v2i64 (VSLLWIL_D_W (VSLLWIL_W_H v8i16:$vj, 0), 0))>; +def : Pat<(v2i64 (sext_invec v4i32:$vj)), (v2i64 (VSLLWIL_D_W v4i32:$vj, 0))>; +def : Pat<(v4i32 (sext_invec v16i8:$vj)), + (v4i32 (VSLLWIL_W_H (VSLLWIL_H_B v16i8:$vj, 0), 0))>; +def : Pat<(v4i32 (sext_invec v8i16:$vj)), (v4i32 (VSLLWIL_W_H v8i16:$vj, 0))>; +def : Pat<(v8i16 (sext_invec v16i8:$vj)), (v8i16 (VSLLWIL_H_B v16i8:$vj, 0))>; + +// Zero extensions +def : Pat<(v2i64 (zext_invec v16i8:$vj)), + (v2i64 (VSLLWIL_DU_WU (VSLLWIL_WU_HU (VSLLWIL_HU_BU v16i8:$vj, 0), 0), 0))>; +def : Pat<(v2i64 (zext_invec v8i16:$vj)), + (v2i64 (VSLLWIL_DU_WU (VSLLWIL_WU_HU v8i16:$vj, 0), 0))>; +def : Pat<(v2i64 (zext_invec v4i32:$vj)), (v2i64 (VSLLWIL_DU_WU v4i32:$vj, 0))>; +def : Pat<(v4i32 (zext_invec v16i8:$vj)), + (v4i32 (VSLLWIL_WU_HU (VSLLWIL_HU_BU v16i8:$vj, 0), 0))>; +def : Pat<(v4i32 (zext_invec v8i16:$vj)), (v4i32 (VSLLWIL_WU_HU v8i16:$vj, 0))>; +def : Pat<(v8i16 (zext_invec v16i8:$vj)), (v8i16 (VSLLWIL_HU_BU v16i8:$vj, 0))>; + +} // Predicates = [IsExtLSX] + /// Intrinsic pattern class deriveLSXIntrinsic<string Inst> { diff --git a/llvm/test/CodeGen/LoongArch/lasx/vec-sext.ll b/llvm/test/CodeGen/LoongArch/lasx/vec-sext.ll index 953e6c4..8884aac 100644 --- a/llvm/test/CodeGen/LoongArch/lasx/vec-sext.ll +++ b/llvm/test/CodeGen/LoongArch/lasx/vec-sext.ll @@ -7,11 +7,7 @@ define void @load_sext_2i8_to_2i64(ptr %ptr, ptr %dst) { ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: ld.h $a0, $a0, 0 ; CHECK-NEXT: vinsgr2vr.h $vr0, $a0, 0 -; CHECK-NEXT: vilvl.b $vr0, $vr0, $vr0 -; CHECK-NEXT: vilvl.h $vr0, $vr0, $vr0 -; CHECK-NEXT: vilvl.w $vr0, $vr0, $vr0 -; CHECK-NEXT: vslli.d $vr0, $vr0, 56 -; CHECK-NEXT: vsrai.d $vr0, $vr0, 56 +; CHECK-NEXT: vext2xv.d.b $xr0, $xr0 ; CHECK-NEXT: vst $vr0, $a1, 0 ; CHECK-NEXT: ret entry: @@ -26,10 +22,7 @@ define void @load_sext_2i16_to_2i64(ptr %ptr, ptr %dst) { ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: ld.w $a0, $a0, 0 ; CHECK-NEXT: vinsgr2vr.w $vr0, $a0, 0 -; CHECK-NEXT: vilvl.h $vr0, $vr0, $vr0 -; CHECK-NEXT: vilvl.w $vr0, $vr0, $vr0 -; CHECK-NEXT: vslli.d $vr0, $vr0, 48 -; CHECK-NEXT: vsrai.d $vr0, $vr0, 48 +; CHECK-NEXT: vext2xv.d.h $xr0, $xr0 ; CHECK-NEXT: vst $vr0, $a1, 0 ; CHECK-NEXT: ret entry: @@ -45,9 +38,8 @@ define void @load_sext_2i32_to_2i64(ptr %ptr, ptr %dst) { ; LA32-NEXT: ld.w $a2, $a0, 0 ; LA32-NEXT: ld.w $a0, $a0, 4 ; LA32-NEXT: vinsgr2vr.w $vr0, $a2, 0 -; LA32-NEXT: vinsgr2vr.w $vr0, $a0, 2 -; LA32-NEXT: vslli.d $vr0, $vr0, 32 -; LA32-NEXT: vsrai.d $vr0, $vr0, 32 +; LA32-NEXT: vinsgr2vr.w $vr0, $a0, 1 +; LA32-NEXT: vext2xv.d.w $xr0, $xr0 ; LA32-NEXT: vst $vr0, $a1, 0 ; LA32-NEXT: ret ; @@ -55,9 +47,7 @@ define void @load_sext_2i32_to_2i64(ptr %ptr, ptr %dst) { ; LA64: # %bb.0: # %entry ; LA64-NEXT: ld.d $a0, $a0, 0 ; LA64-NEXT: vinsgr2vr.d $vr0, $a0, 0 -; LA64-NEXT: vshuf4i.w $vr0, $vr0, 16 -; LA64-NEXT: vslli.d $vr0, $vr0, 32 -; LA64-NEXT: vsrai.d $vr0, $vr0, 32 +; LA64-NEXT: vext2xv.d.w $xr0, $xr0 ; LA64-NEXT: vst $vr0, $a1, 0 ; LA64-NEXT: ret entry: @@ -72,10 +62,7 @@ define void @load_sext_4i8_to_4i32(ptr %ptr, ptr %dst) { ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: ld.w $a0, $a0, 0 ; CHECK-NEXT: vinsgr2vr.w $vr0, $a0, 0 -; CHECK-NEXT: vilvl.b $vr0, $vr0, $vr0 -; CHECK-NEXT: vilvl.h $vr0, $vr0, $vr0 -; CHECK-NEXT: vslli.w $vr0, $vr0, 24 -; CHECK-NEXT: vsrai.w $vr0, $vr0, 24 +; CHECK-NEXT: vext2xv.w.b $xr0, $xr0 ; CHECK-NEXT: vst $vr0, $a1, 0 ; CHECK-NEXT: ret entry: @@ -89,13 +76,8 @@ define void @load_sext_4i8_to_4i64(ptr %ptr, ptr %dst) { ; CHECK-LABEL: load_sext_4i8_to_4i64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: ld.w $a0, $a0, 0 -; CHECK-NEXT: pcalau12i $a2, %pc_hi20(.LCPI4_0) -; CHECK-NEXT: xvld $xr0, $a2, %pc_lo12(.LCPI4_0) -; CHECK-NEXT: vinsgr2vr.w $vr1, $a0, 0 -; CHECK-NEXT: xvpermi.d $xr1, $xr1, 68 -; CHECK-NEXT: xvshuf.b $xr0, $xr0, $xr1, $xr0 -; CHECK-NEXT: xvslli.d $xr0, $xr0, 56 -; CHECK-NEXT: xvsrai.d $xr0, $xr0, 56 +; CHECK-NEXT: vinsgr2vr.w $vr0, $a0, 0 +; CHECK-NEXT: vext2xv.d.b $xr0, $xr0 ; CHECK-NEXT: xvst $xr0, $a1, 0 ; CHECK-NEXT: ret entry: @@ -112,9 +94,7 @@ define void @load_sext_4i16_to_4i32(ptr %ptr, ptr %dst) { ; LA32-NEXT: ld.w $a0, $a0, 4 ; LA32-NEXT: vinsgr2vr.w $vr0, $a2, 0 ; LA32-NEXT: vinsgr2vr.w $vr0, $a0, 1 -; LA32-NEXT: vilvl.h $vr0, $vr0, $vr0 -; LA32-NEXT: vslli.w $vr0, $vr0, 16 -; LA32-NEXT: vsrai.w $vr0, $vr0, 16 +; LA32-NEXT: vext2xv.w.h $xr0, $xr0 ; LA32-NEXT: vst $vr0, $a1, 0 ; LA32-NEXT: ret ; @@ -122,9 +102,7 @@ define void @load_sext_4i16_to_4i32(ptr %ptr, ptr %dst) { ; LA64: # %bb.0: # %entry ; LA64-NEXT: ld.d $a0, $a0, 0 ; LA64-NEXT: vinsgr2vr.d $vr0, $a0, 0 -; LA64-NEXT: vilvl.h $vr0, $vr0, $vr0 -; LA64-NEXT: vslli.w $vr0, $vr0, 16 -; LA64-NEXT: vsrai.w $vr0, $vr0, 16 +; LA64-NEXT: vext2xv.w.h $xr0, $xr0 ; LA64-NEXT: vst $vr0, $a1, 0 ; LA64-NEXT: ret entry: @@ -139,27 +117,17 @@ define void @load_sext_4i16_to_4i64(ptr %ptr, ptr %dst) { ; LA32: # %bb.0: # %entry ; LA32-NEXT: ld.w $a2, $a0, 0 ; LA32-NEXT: ld.w $a0, $a0, 4 -; LA32-NEXT: pcalau12i $a3, %pc_hi20(.LCPI6_0) -; LA32-NEXT: xvld $xr0, $a3, %pc_lo12(.LCPI6_0) -; LA32-NEXT: vinsgr2vr.w $vr1, $a2, 0 -; LA32-NEXT: vinsgr2vr.w $vr1, $a0, 1 -; LA32-NEXT: xvpermi.d $xr1, $xr1, 68 -; LA32-NEXT: xvshuf.h $xr0, $xr0, $xr1 -; LA32-NEXT: xvslli.d $xr0, $xr0, 48 -; LA32-NEXT: xvsrai.d $xr0, $xr0, 48 +; LA32-NEXT: vinsgr2vr.w $vr0, $a2, 0 +; LA32-NEXT: vinsgr2vr.w $vr0, $a0, 1 +; LA32-NEXT: vext2xv.d.h $xr0, $xr0 ; LA32-NEXT: xvst $xr0, $a1, 0 ; LA32-NEXT: ret ; ; LA64-LABEL: load_sext_4i16_to_4i64: ; LA64: # %bb.0: # %entry ; LA64-NEXT: ld.d $a0, $a0, 0 -; LA64-NEXT: pcalau12i $a2, %pc_hi20(.LCPI6_0) -; LA64-NEXT: xvld $xr0, $a2, %pc_lo12(.LCPI6_0) -; LA64-NEXT: vinsgr2vr.d $vr1, $a0, 0 -; LA64-NEXT: xvpermi.d $xr1, $xr1, 68 -; LA64-NEXT: xvshuf.h $xr0, $xr0, $xr1 -; LA64-NEXT: xvslli.d $xr0, $xr0, 48 -; LA64-NEXT: xvsrai.d $xr0, $xr0, 48 +; LA64-NEXT: vinsgr2vr.d $vr0, $a0, 0 +; LA64-NEXT: vext2xv.d.h $xr0, $xr0 ; LA64-NEXT: xvst $xr0, $a1, 0 ; LA64-NEXT: ret entry: @@ -170,43 +138,12 @@ entry: } define void @load_sext_4i32_to_4i64(ptr %ptr, ptr %dst) { -; LA32-LABEL: load_sext_4i32_to_4i64: -; LA32: # %bb.0: # %entry -; LA32-NEXT: vld $vr0, $a0, 0 -; LA32-NEXT: vextrins.w $vr1, $vr0, 2 -; LA32-NEXT: vpickve2gr.w $a0, $vr0, 2 -; LA32-NEXT: srai.w $a0, $a0, 31 -; LA32-NEXT: vinsgr2vr.w $vr1, $a0, 1 -; LA32-NEXT: vextrins.w $vr1, $vr0, 35 -; LA32-NEXT: vpickve2gr.w $a0, $vr0, 3 -; LA32-NEXT: srai.w $a0, $a0, 31 -; LA32-NEXT: vinsgr2vr.w $vr1, $a0, 3 -; LA32-NEXT: vpickve2gr.w $a0, $vr0, 0 -; LA32-NEXT: srai.w $a0, $a0, 31 -; LA32-NEXT: vori.b $vr2, $vr0, 0 -; LA32-NEXT: vinsgr2vr.w $vr2, $a0, 1 -; LA32-NEXT: vextrins.w $vr2, $vr0, 33 -; LA32-NEXT: vpickve2gr.w $a0, $vr0, 1 -; LA32-NEXT: srai.w $a0, $a0, 31 -; LA32-NEXT: vinsgr2vr.w $vr2, $a0, 3 -; LA32-NEXT: xvpermi.q $xr2, $xr1, 2 -; LA32-NEXT: xvst $xr2, $a1, 0 -; LA32-NEXT: ret -; -; LA64-LABEL: load_sext_4i32_to_4i64: -; LA64: # %bb.0: # %entry -; LA64-NEXT: vld $vr0, $a0, 0 -; LA64-NEXT: vpickve2gr.w $a0, $vr0, 2 -; LA64-NEXT: vinsgr2vr.d $vr1, $a0, 0 -; LA64-NEXT: vpickve2gr.w $a0, $vr0, 3 -; LA64-NEXT: vinsgr2vr.d $vr1, $a0, 1 -; LA64-NEXT: vpickve2gr.w $a0, $vr0, 0 -; LA64-NEXT: vinsgr2vr.d $vr2, $a0, 0 -; LA64-NEXT: vpickve2gr.w $a0, $vr0, 1 -; LA64-NEXT: vinsgr2vr.d $vr2, $a0, 1 -; LA64-NEXT: xvpermi.q $xr2, $xr1, 2 -; LA64-NEXT: xvst $xr2, $a1, 0 -; LA64-NEXT: ret +; CHECK-LABEL: load_sext_4i32_to_4i64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vld $vr0, $a0, 0 +; CHECK-NEXT: vext2xv.d.w $xr0, $xr0 +; CHECK-NEXT: xvst $xr0, $a1, 0 +; CHECK-NEXT: ret entry: %A = load <4 x i32>, ptr %ptr %B = sext <4 x i32> %A to <4 x i64> @@ -221,9 +158,7 @@ define void @load_sext_8i8_to_8i16(ptr %ptr, ptr %dst) { ; LA32-NEXT: ld.w $a0, $a0, 4 ; LA32-NEXT: vinsgr2vr.w $vr0, $a2, 0 ; LA32-NEXT: vinsgr2vr.w $vr0, $a0, 1 -; LA32-NEXT: vilvl.b $vr0, $vr0, $vr0 -; LA32-NEXT: vslli.h $vr0, $vr0, 8 -; LA32-NEXT: vsrai.h $vr0, $vr0, 8 +; LA32-NEXT: vext2xv.h.b $xr0, $xr0 ; LA32-NEXT: vst $vr0, $a1, 0 ; LA32-NEXT: ret ; @@ -231,9 +166,7 @@ define void @load_sext_8i8_to_8i16(ptr %ptr, ptr %dst) { ; LA64: # %bb.0: # %entry ; LA64-NEXT: ld.d $a0, $a0, 0 ; LA64-NEXT: vinsgr2vr.d $vr0, $a0, 0 -; LA64-NEXT: vilvl.b $vr0, $vr0, $vr0 -; LA64-NEXT: vslli.h $vr0, $vr0, 8 -; LA64-NEXT: vsrai.h $vr0, $vr0, 8 +; LA64-NEXT: vext2xv.h.b $xr0, $xr0 ; LA64-NEXT: vst $vr0, $a1, 0 ; LA64-NEXT: ret entry: @@ -248,27 +181,17 @@ define void @load_sext_8i8_to_8i32(ptr %ptr, ptr %dst) { ; LA32: # %bb.0: # %entry ; LA32-NEXT: ld.w $a2, $a0, 0 ; LA32-NEXT: ld.w $a0, $a0, 4 -; LA32-NEXT: pcalau12i $a3, %pc_hi20(.LCPI9_0) -; LA32-NEXT: xvld $xr0, $a3, %pc_lo12(.LCPI9_0) -; LA32-NEXT: vinsgr2vr.w $vr1, $a2, 0 -; LA32-NEXT: vinsgr2vr.w $vr1, $a0, 1 -; LA32-NEXT: xvpermi.d $xr1, $xr1, 68 -; LA32-NEXT: xvshuf.b $xr0, $xr0, $xr1, $xr0 -; LA32-NEXT: xvslli.w $xr0, $xr0, 24 -; LA32-NEXT: xvsrai.w $xr0, $xr0, 24 +; LA32-NEXT: vinsgr2vr.w $vr0, $a2, 0 +; LA32-NEXT: vinsgr2vr.w $vr0, $a0, 1 +; LA32-NEXT: vext2xv.w.b $xr0, $xr0 ; LA32-NEXT: xvst $xr0, $a1, 0 ; LA32-NEXT: ret ; ; LA64-LABEL: load_sext_8i8_to_8i32: ; LA64: # %bb.0: # %entry ; LA64-NEXT: ld.d $a0, $a0, 0 -; LA64-NEXT: pcalau12i $a2, %pc_hi20(.LCPI9_0) -; LA64-NEXT: xvld $xr0, $a2, %pc_lo12(.LCPI9_0) -; LA64-NEXT: vinsgr2vr.d $vr1, $a0, 0 -; LA64-NEXT: xvpermi.d $xr1, $xr1, 68 -; LA64-NEXT: xvshuf.b $xr0, $xr0, $xr1, $xr0 -; LA64-NEXT: xvslli.w $xr0, $xr0, 24 -; LA64-NEXT: xvsrai.w $xr0, $xr0, 24 +; LA64-NEXT: vinsgr2vr.d $vr0, $a0, 0 +; LA64-NEXT: vext2xv.w.b $xr0, $xr0 ; LA64-NEXT: xvst $xr0, $a1, 0 ; LA64-NEXT: ret entry: @@ -282,21 +205,13 @@ define void @load_sext_8i8_to_8i64(ptr %ptr, ptr %dst) { ; LA32-LABEL: load_sext_8i8_to_8i64: ; LA32: # %bb.0: # %entry ; LA32-NEXT: ld.w $a2, $a0, 0 -; LA32-NEXT: vinsgr2vr.w $vr0, $a2, 0 ; LA32-NEXT: ld.w $a0, $a0, 4 -; LA32-NEXT: xvpermi.d $xr1, $xr0, 68 +; LA32-NEXT: vinsgr2vr.w $vr0, $a2, 0 +; LA32-NEXT: vext2xv.d.b $xr1, $xr0 ; LA32-NEXT: # kill: def $vr0 killed $vr0 killed $xr0 -; LA32-NEXT: pcalau12i $a2, %pc_hi20(.LCPI10_0) -; LA32-NEXT: xvld $xr2, $a2, %pc_lo12(.LCPI10_0) ; LA32-NEXT: vinsgr2vr.w $vr0, $a0, 1 ; LA32-NEXT: vreplvei.w $vr0, $vr0, 1 -; LA32-NEXT: xvpermi.d $xr0, $xr0, 68 -; LA32-NEXT: xvshuf.b $xr0, $xr0, $xr0, $xr2 -; LA32-NEXT: xvslli.d $xr0, $xr0, 56 -; LA32-NEXT: xvsrai.d $xr0, $xr0, 56 -; LA32-NEXT: xvshuf.b $xr1, $xr0, $xr1, $xr2 -; LA32-NEXT: xvslli.d $xr1, $xr1, 56 -; LA32-NEXT: xvsrai.d $xr1, $xr1, 56 +; LA32-NEXT: vext2xv.d.b $xr0, $xr0 ; LA32-NEXT: xvst $xr1, $a1, 0 ; LA32-NEXT: xvst $xr0, $a1, 32 ; LA32-NEXT: ret @@ -304,20 +219,12 @@ define void @load_sext_8i8_to_8i64(ptr %ptr, ptr %dst) { ; LA64-LABEL: load_sext_8i8_to_8i64: ; LA64: # %bb.0: # %entry ; LA64-NEXT: ld.d $a0, $a0, 0 -; LA64-NEXT: pcalau12i $a2, %pc_hi20(.LCPI10_0) -; LA64-NEXT: xvld $xr0, $a2, %pc_lo12(.LCPI10_0) -; LA64-NEXT: vinsgr2vr.d $vr1, $a0, 0 -; LA64-NEXT: vsrli.d $vr2, $vr1, 32 -; LA64-NEXT: xvpermi.d $xr2, $xr2, 68 -; LA64-NEXT: xvshuf.b $xr2, $xr0, $xr2, $xr0 -; LA64-NEXT: xvslli.d $xr2, $xr2, 56 -; LA64-NEXT: xvsrai.d $xr2, $xr2, 56 -; LA64-NEXT: xvpermi.d $xr1, $xr1, 68 -; LA64-NEXT: xvshuf.b $xr0, $xr0, $xr1, $xr0 -; LA64-NEXT: xvslli.d $xr0, $xr0, 56 -; LA64-NEXT: xvsrai.d $xr0, $xr0, 56 +; LA64-NEXT: vinsgr2vr.d $vr0, $a0, 0 +; LA64-NEXT: vsrli.d $vr1, $vr0, 32 +; LA64-NEXT: vext2xv.d.b $xr1, $xr1 +; LA64-NEXT: vext2xv.d.b $xr0, $xr0 ; LA64-NEXT: xvst $xr0, $a1, 0 -; LA64-NEXT: xvst $xr2, $a1, 32 +; LA64-NEXT: xvst $xr1, $a1, 32 ; LA64-NEXT: ret entry: %A = load <8 x i8>, ptr %ptr @@ -330,32 +237,8 @@ define void @load_sext_8i16_to_8i32(ptr %ptr, ptr %dst) { ; CHECK-LABEL: load_sext_8i16_to_8i32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vld $vr0, $a0, 0 -; CHECK-NEXT: vpickve2gr.h $a0, $vr0, 4 -; CHECK-NEXT: ext.w.h $a0, $a0 -; CHECK-NEXT: vinsgr2vr.w $vr1, $a0, 0 -; CHECK-NEXT: vpickve2gr.h $a0, $vr0, 5 -; CHECK-NEXT: ext.w.h $a0, $a0 -; CHECK-NEXT: vinsgr2vr.w $vr1, $a0, 1 -; CHECK-NEXT: vpickve2gr.h $a0, $vr0, 6 -; CHECK-NEXT: ext.w.h $a0, $a0 -; CHECK-NEXT: vinsgr2vr.w $vr1, $a0, 2 -; CHECK-NEXT: vpickve2gr.h $a0, $vr0, 7 -; CHECK-NEXT: ext.w.h $a0, $a0 -; CHECK-NEXT: vinsgr2vr.w $vr1, $a0, 3 -; CHECK-NEXT: vpickve2gr.h $a0, $vr0, 0 -; CHECK-NEXT: ext.w.h $a0, $a0 -; CHECK-NEXT: vinsgr2vr.w $vr2, $a0, 0 -; CHECK-NEXT: vpickve2gr.h $a0, $vr0, 1 -; CHECK-NEXT: ext.w.h $a0, $a0 -; CHECK-NEXT: vinsgr2vr.w $vr2, $a0, 1 -; CHECK-NEXT: vpickve2gr.h $a0, $vr0, 2 -; CHECK-NEXT: ext.w.h $a0, $a0 -; CHECK-NEXT: vinsgr2vr.w $vr2, $a0, 2 -; CHECK-NEXT: vpickve2gr.h $a0, $vr0, 3 -; CHECK-NEXT: ext.w.h $a0, $a0 -; CHECK-NEXT: vinsgr2vr.w $vr2, $a0, 3 -; CHECK-NEXT: xvpermi.q $xr2, $xr1, 2 -; CHECK-NEXT: xvst $xr2, $a1, 0 +; CHECK-NEXT: vext2xv.w.h $xr0, $xr0 +; CHECK-NEXT: xvst $xr0, $a1, 0 ; CHECK-NEXT: ret entry: %A = load <8 x i16>, ptr %ptr @@ -365,93 +248,16 @@ entry: } define void @load_sext_8i16_to_8i64(ptr %ptr, ptr %dst) { -; LA32-LABEL: load_sext_8i16_to_8i64: -; LA32: # %bb.0: # %entry -; LA32-NEXT: vld $vr0, $a0, 0 -; LA32-NEXT: vpickve2gr.h $a0, $vr0, 2 -; LA32-NEXT: ext.w.h $a0, $a0 -; LA32-NEXT: vinsgr2vr.w $vr1, $a0, 0 -; LA32-NEXT: srai.w $a2, $a0, 31 -; LA32-NEXT: vinsgr2vr.w $vr1, $a2, 1 -; LA32-NEXT: vpickve2gr.h $a2, $vr0, 3 -; LA32-NEXT: ext.w.h $a2, $a2 -; LA32-NEXT: vinsgr2vr.w $vr1, $a2, 2 -; LA32-NEXT: srai.w $a3, $a2, 31 -; LA32-NEXT: vinsgr2vr.w $vr1, $a3, 3 -; LA32-NEXT: vpickve2gr.h $a3, $vr0, 0 -; LA32-NEXT: ext.w.h $a3, $a3 -; LA32-NEXT: vinsgr2vr.w $vr2, $a3, 0 -; LA32-NEXT: vpickve2gr.h $a4, $vr0, 1 -; LA32-NEXT: ext.w.h $a4, $a4 -; LA32-NEXT: vinsgr2vr.w $vr2, $a4, 1 -; LA32-NEXT: srai.w $a3, $a3, 31 -; LA32-NEXT: vinsgr2vr.w $vr2, $a3, 1 -; LA32-NEXT: vinsgr2vr.w $vr2, $a0, 2 -; LA32-NEXT: vinsgr2vr.w $vr2, $a4, 2 -; LA32-NEXT: vinsgr2vr.w $vr2, $a2, 3 -; LA32-NEXT: srai.w $a0, $a4, 31 -; LA32-NEXT: vinsgr2vr.w $vr2, $a0, 3 -; LA32-NEXT: xvpermi.q $xr2, $xr1, 2 -; LA32-NEXT: vpickve2gr.h $a0, $vr0, 6 -; LA32-NEXT: ext.w.h $a0, $a0 -; LA32-NEXT: vinsgr2vr.w $vr1, $a0, 0 -; LA32-NEXT: srai.w $a2, $a0, 31 -; LA32-NEXT: vinsgr2vr.w $vr1, $a2, 1 -; LA32-NEXT: vpickve2gr.h $a2, $vr0, 7 -; LA32-NEXT: ext.w.h $a2, $a2 -; LA32-NEXT: vinsgr2vr.w $vr1, $a2, 2 -; LA32-NEXT: srai.w $a3, $a2, 31 -; LA32-NEXT: vinsgr2vr.w $vr1, $a3, 3 -; LA32-NEXT: vpickve2gr.h $a3, $vr0, 4 -; LA32-NEXT: ext.w.h $a3, $a3 -; LA32-NEXT: vinsgr2vr.w $vr3, $a3, 0 -; LA32-NEXT: vpickve2gr.h $a4, $vr0, 5 -; LA32-NEXT: ext.w.h $a4, $a4 -; LA32-NEXT: vinsgr2vr.w $vr3, $a4, 1 -; LA32-NEXT: srai.w $a3, $a3, 31 -; LA32-NEXT: vinsgr2vr.w $vr3, $a3, 1 -; LA32-NEXT: vinsgr2vr.w $vr3, $a0, 2 -; LA32-NEXT: vinsgr2vr.w $vr3, $a4, 2 -; LA32-NEXT: vinsgr2vr.w $vr3, $a2, 3 -; LA32-NEXT: srai.w $a0, $a4, 31 -; LA32-NEXT: vinsgr2vr.w $vr3, $a0, 3 -; LA32-NEXT: xvpermi.q $xr3, $xr1, 2 -; LA32-NEXT: xvst $xr3, $a1, 32 -; LA32-NEXT: xvst $xr2, $a1, 0 -; LA32-NEXT: ret -; -; LA64-LABEL: load_sext_8i16_to_8i64: -; LA64: # %bb.0: # %entry -; LA64-NEXT: vld $vr0, $a0, 0 -; LA64-NEXT: vpickve2gr.h $a0, $vr0, 2 -; LA64-NEXT: ext.w.h $a0, $a0 -; LA64-NEXT: vinsgr2vr.d $vr1, $a0, 0 -; LA64-NEXT: vpickve2gr.h $a0, $vr0, 3 -; LA64-NEXT: ext.w.h $a0, $a0 -; LA64-NEXT: vinsgr2vr.d $vr1, $a0, 1 -; LA64-NEXT: vpickve2gr.h $a0, $vr0, 0 -; LA64-NEXT: ext.w.h $a0, $a0 -; LA64-NEXT: vinsgr2vr.d $vr2, $a0, 0 -; LA64-NEXT: vpickve2gr.h $a0, $vr0, 1 -; LA64-NEXT: ext.w.h $a0, $a0 -; LA64-NEXT: vinsgr2vr.d $vr2, $a0, 1 -; LA64-NEXT: xvpermi.q $xr2, $xr1, 2 -; LA64-NEXT: vpickve2gr.h $a0, $vr0, 6 -; LA64-NEXT: ext.w.h $a0, $a0 -; LA64-NEXT: vinsgr2vr.d $vr1, $a0, 0 -; LA64-NEXT: vpickve2gr.h $a0, $vr0, 7 -; LA64-NEXT: ext.w.h $a0, $a0 -; LA64-NEXT: vinsgr2vr.d $vr1, $a0, 1 -; LA64-NEXT: vpickve2gr.h $a0, $vr0, 4 -; LA64-NEXT: ext.w.h $a0, $a0 -; LA64-NEXT: vinsgr2vr.d $vr3, $a0, 0 -; LA64-NEXT: vpickve2gr.h $a0, $vr0, 5 -; LA64-NEXT: ext.w.h $a0, $a0 -; LA64-NEXT: vinsgr2vr.d $vr3, $a0, 1 -; LA64-NEXT: xvpermi.q $xr3, $xr1, 2 -; LA64-NEXT: xvst $xr3, $a1, 32 -; LA64-NEXT: xvst $xr2, $a1, 0 -; LA64-NEXT: ret +; CHECK-LABEL: load_sext_8i16_to_8i64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vld $vr0, $a0, 0 +; CHECK-NEXT: vext2xv.w.h $xr0, $xr0 +; CHECK-NEXT: xvpermi.q $xr1, $xr0, 1 +; CHECK-NEXT: vext2xv.d.w $xr1, $xr1 +; CHECK-NEXT: vext2xv.d.w $xr0, $xr0 +; CHECK-NEXT: xvst $xr0, $a1, 0 +; CHECK-NEXT: xvst $xr1, $a1, 32 +; CHECK-NEXT: ret entry: %A = load <8 x i16>, ptr %ptr %B = sext <8 x i16> %A to <8 x i64> @@ -460,73 +266,15 @@ entry: } define void @load_sext_8i32_to_8i64(ptr %ptr, ptr %dst) { -; LA32-LABEL: load_sext_8i32_to_8i64: -; LA32: # %bb.0: # %entry -; LA32-NEXT: xvld $xr0, $a0, 0 -; LA32-NEXT: xvpermi.q $xr1, $xr0, 1 -; LA32-NEXT: vextrins.w $vr2, $vr1, 2 -; LA32-NEXT: vpickve2gr.w $a0, $vr1, 2 -; LA32-NEXT: srai.w $a0, $a0, 31 -; LA32-NEXT: vinsgr2vr.w $vr2, $a0, 1 -; LA32-NEXT: vextrins.w $vr2, $vr1, 35 -; LA32-NEXT: vpickve2gr.w $a0, $vr1, 3 -; LA32-NEXT: srai.w $a0, $a0, 31 -; LA32-NEXT: vinsgr2vr.w $vr2, $a0, 3 -; LA32-NEXT: vpickve2gr.w $a0, $vr1, 0 -; LA32-NEXT: srai.w $a0, $a0, 31 -; LA32-NEXT: vori.b $vr3, $vr1, 0 -; LA32-NEXT: vinsgr2vr.w $vr3, $a0, 1 -; LA32-NEXT: vextrins.w $vr3, $vr1, 33 -; LA32-NEXT: vpickve2gr.w $a0, $vr1, 1 -; LA32-NEXT: srai.w $a0, $a0, 31 -; LA32-NEXT: vinsgr2vr.w $vr3, $a0, 3 -; LA32-NEXT: xvpermi.q $xr3, $xr2, 2 -; LA32-NEXT: vextrins.w $vr1, $vr0, 2 -; LA32-NEXT: vpickve2gr.w $a0, $vr0, 2 -; LA32-NEXT: srai.w $a0, $a0, 31 -; LA32-NEXT: vinsgr2vr.w $vr1, $a0, 1 -; LA32-NEXT: vextrins.w $vr1, $vr0, 35 -; LA32-NEXT: vpickve2gr.w $a0, $vr0, 3 -; LA32-NEXT: srai.w $a0, $a0, 31 -; LA32-NEXT: vinsgr2vr.w $vr1, $a0, 3 -; LA32-NEXT: vpickve2gr.w $a0, $vr0, 0 -; LA32-NEXT: srai.w $a0, $a0, 31 -; LA32-NEXT: vori.b $vr2, $vr0, 0 -; LA32-NEXT: vinsgr2vr.w $vr2, $a0, 1 -; LA32-NEXT: vextrins.w $vr2, $vr0, 33 -; LA32-NEXT: vpickve2gr.w $a0, $vr0, 1 -; LA32-NEXT: srai.w $a0, $a0, 31 -; LA32-NEXT: vinsgr2vr.w $vr2, $a0, 3 -; LA32-NEXT: xvpermi.q $xr2, $xr1, 2 -; LA32-NEXT: xvst $xr2, $a1, 0 -; LA32-NEXT: xvst $xr3, $a1, 32 -; LA32-NEXT: ret -; -; LA64-LABEL: load_sext_8i32_to_8i64: -; LA64: # %bb.0: # %entry -; LA64-NEXT: xvld $xr0, $a0, 0 -; LA64-NEXT: xvpermi.q $xr1, $xr0, 1 -; LA64-NEXT: vpickve2gr.w $a0, $vr1, 2 -; LA64-NEXT: vinsgr2vr.d $vr2, $a0, 0 -; LA64-NEXT: vpickve2gr.w $a0, $vr1, 3 -; LA64-NEXT: vinsgr2vr.d $vr2, $a0, 1 -; LA64-NEXT: vpickve2gr.w $a0, $vr1, 0 -; LA64-NEXT: vinsgr2vr.d $vr3, $a0, 0 -; LA64-NEXT: vpickve2gr.w $a0, $vr1, 1 -; LA64-NEXT: vinsgr2vr.d $vr3, $a0, 1 -; LA64-NEXT: xvpermi.q $xr3, $xr2, 2 -; LA64-NEXT: vpickve2gr.w $a0, $vr0, 2 -; LA64-NEXT: vinsgr2vr.d $vr1, $a0, 0 -; LA64-NEXT: vpickve2gr.w $a0, $vr0, 3 -; LA64-NEXT: vinsgr2vr.d $vr1, $a0, 1 -; LA64-NEXT: vpickve2gr.w $a0, $vr0, 0 -; LA64-NEXT: vinsgr2vr.d $vr2, $a0, 0 -; LA64-NEXT: vpickve2gr.w $a0, $vr0, 1 -; LA64-NEXT: vinsgr2vr.d $vr2, $a0, 1 -; LA64-NEXT: xvpermi.q $xr2, $xr1, 2 -; LA64-NEXT: xvst $xr2, $a1, 0 -; LA64-NEXT: xvst $xr3, $a1, 32 -; LA64-NEXT: ret +; CHECK-LABEL: load_sext_8i32_to_8i64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvld $xr0, $a0, 0 +; CHECK-NEXT: xvpermi.q $xr1, $xr0, 1 +; CHECK-NEXT: vext2xv.d.w $xr1, $xr1 +; CHECK-NEXT: vext2xv.d.w $xr0, $xr0 +; CHECK-NEXT: xvst $xr0, $a1, 0 +; CHECK-NEXT: xvst $xr1, $a1, 32 +; CHECK-NEXT: ret entry: %A = load <8 x i32>, ptr %ptr %B = sext <8 x i32> %A to <8 x i64> @@ -538,56 +286,8 @@ define void @load_sext_16i8_to_16i16(ptr %ptr, ptr %dst) { ; CHECK-LABEL: load_sext_16i8_to_16i16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vld $vr0, $a0, 0 -; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 8 -; CHECK-NEXT: ext.w.b $a0, $a0 -; CHECK-NEXT: vinsgr2vr.h $vr1, $a0, 0 -; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 9 -; CHECK-NEXT: ext.w.b $a0, $a0 -; CHECK-NEXT: vinsgr2vr.h $vr1, $a0, 1 -; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 10 -; CHECK-NEXT: ext.w.b $a0, $a0 -; CHECK-NEXT: vinsgr2vr.h $vr1, $a0, 2 -; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 11 -; CHECK-NEXT: ext.w.b $a0, $a0 -; CHECK-NEXT: vinsgr2vr.h $vr1, $a0, 3 -; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 12 -; CHECK-NEXT: ext.w.b $a0, $a0 -; CHECK-NEXT: vinsgr2vr.h $vr1, $a0, 4 -; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 13 -; CHECK-NEXT: ext.w.b $a0, $a0 -; CHECK-NEXT: vinsgr2vr.h $vr1, $a0, 5 -; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 14 -; CHECK-NEXT: ext.w.b $a0, $a0 -; CHECK-NEXT: vinsgr2vr.h $vr1, $a0, 6 -; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 15 -; CHECK-NEXT: ext.w.b $a0, $a0 -; CHECK-NEXT: vinsgr2vr.h $vr1, $a0, 7 -; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 0 -; CHECK-NEXT: ext.w.b $a0, $a0 -; CHECK-NEXT: vinsgr2vr.h $vr2, $a0, 0 -; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 1 -; CHECK-NEXT: ext.w.b $a0, $a0 -; CHECK-NEXT: vinsgr2vr.h $vr2, $a0, 1 -; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 2 -; CHECK-NEXT: ext.w.b $a0, $a0 -; CHECK-NEXT: vinsgr2vr.h $vr2, $a0, 2 -; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 3 -; CHECK-NEXT: ext.w.b $a0, $a0 -; CHECK-NEXT: vinsgr2vr.h $vr2, $a0, 3 -; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 4 -; CHECK-NEXT: ext.w.b $a0, $a0 -; CHECK-NEXT: vinsgr2vr.h $vr2, $a0, 4 -; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 5 -; CHECK-NEXT: ext.w.b $a0, $a0 -; CHECK-NEXT: vinsgr2vr.h $vr2, $a0, 5 -; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 6 -; CHECK-NEXT: ext.w.b $a0, $a0 -; CHECK-NEXT: vinsgr2vr.h $vr2, $a0, 6 -; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 7 -; CHECK-NEXT: ext.w.b $a0, $a0 -; CHECK-NEXT: vinsgr2vr.h $vr2, $a0, 7 -; CHECK-NEXT: xvpermi.q $xr2, $xr1, 2 -; CHECK-NEXT: xvst $xr2, $a1, 0 +; CHECK-NEXT: vext2xv.h.b $xr0, $xr0 +; CHECK-NEXT: xvst $xr0, $a1, 0 ; CHECK-NEXT: ret entry: %A = load <16 x i8>, ptr %ptr @@ -600,58 +300,12 @@ define void @load_sext_16i8_to_16i32(ptr %ptr, ptr %dst) { ; CHECK-LABEL: load_sext_16i8_to_16i32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vld $vr0, $a0, 0 -; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 4 -; CHECK-NEXT: ext.w.b $a0, $a0 -; CHECK-NEXT: vinsgr2vr.w $vr1, $a0, 0 -; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 5 -; CHECK-NEXT: ext.w.b $a0, $a0 -; CHECK-NEXT: vinsgr2vr.w $vr1, $a0, 1 -; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 6 -; CHECK-NEXT: ext.w.b $a0, $a0 -; CHECK-NEXT: vinsgr2vr.w $vr1, $a0, 2 -; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 7 -; CHECK-NEXT: ext.w.b $a0, $a0 -; CHECK-NEXT: vinsgr2vr.w $vr1, $a0, 3 -; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 0 -; CHECK-NEXT: ext.w.b $a0, $a0 -; CHECK-NEXT: vinsgr2vr.w $vr2, $a0, 0 -; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 1 -; CHECK-NEXT: ext.w.b $a0, $a0 -; CHECK-NEXT: vinsgr2vr.w $vr2, $a0, 1 -; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 2 -; CHECK-NEXT: ext.w.b $a0, $a0 -; CHECK-NEXT: vinsgr2vr.w $vr2, $a0, 2 -; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 3 -; CHECK-NEXT: ext.w.b $a0, $a0 -; CHECK-NEXT: vinsgr2vr.w $vr2, $a0, 3 -; CHECK-NEXT: xvpermi.q $xr2, $xr1, 2 -; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 12 -; CHECK-NEXT: ext.w.b $a0, $a0 -; CHECK-NEXT: vinsgr2vr.w $vr1, $a0, 0 -; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 13 -; CHECK-NEXT: ext.w.b $a0, $a0 -; CHECK-NEXT: vinsgr2vr.w $vr1, $a0, 1 -; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 14 -; CHECK-NEXT: ext.w.b $a0, $a0 -; CHECK-NEXT: vinsgr2vr.w $vr1, $a0, 2 -; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 15 -; CHECK-NEXT: ext.w.b $a0, $a0 -; CHECK-NEXT: vinsgr2vr.w $vr1, $a0, 3 -; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 8 -; CHECK-NEXT: ext.w.b $a0, $a0 -; CHECK-NEXT: vinsgr2vr.w $vr3, $a0, 0 -; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 9 -; CHECK-NEXT: ext.w.b $a0, $a0 -; CHECK-NEXT: vinsgr2vr.w $vr3, $a0, 1 -; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 10 -; CHECK-NEXT: ext.w.b $a0, $a0 -; CHECK-NEXT: vinsgr2vr.w $vr3, $a0, 2 -; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 11 -; CHECK-NEXT: ext.w.b $a0, $a0 -; CHECK-NEXT: vinsgr2vr.w $vr3, $a0, 3 -; CHECK-NEXT: xvpermi.q $xr3, $xr1, 2 -; CHECK-NEXT: xvst $xr3, $a1, 32 -; CHECK-NEXT: xvst $xr2, $a1, 0 +; CHECK-NEXT: vext2xv.h.b $xr0, $xr0 +; CHECK-NEXT: xvpermi.q $xr1, $xr0, 1 +; CHECK-NEXT: vext2xv.w.h $xr1, $xr1 +; CHECK-NEXT: vext2xv.w.h $xr0, $xr0 +; CHECK-NEXT: xvst $xr0, $a1, 0 +; CHECK-NEXT: xvst $xr1, $a1, 32 ; CHECK-NEXT: ret entry: %A = load <16 x i8>, ptr %ptr @@ -661,171 +315,24 @@ entry: } define void @load_sext_16i8_to_16i64(ptr %ptr, ptr %dst) { -; LA32-LABEL: load_sext_16i8_to_16i64: -; LA32: # %bb.0: # %entry -; LA32-NEXT: vld $vr1, $a0, 0 -; LA32-NEXT: vpickve2gr.b $a0, $vr1, 2 -; LA32-NEXT: ext.w.b $a0, $a0 -; LA32-NEXT: vinsgr2vr.w $vr2, $a0, 0 -; LA32-NEXT: srai.w $a2, $a0, 31 -; LA32-NEXT: vinsgr2vr.w $vr2, $a2, 1 -; LA32-NEXT: vpickve2gr.b $a2, $vr1, 3 -; LA32-NEXT: ext.w.b $a2, $a2 -; LA32-NEXT: vinsgr2vr.w $vr2, $a2, 2 -; LA32-NEXT: srai.w $a3, $a2, 31 -; LA32-NEXT: vinsgr2vr.w $vr2, $a3, 3 -; LA32-NEXT: vpickve2gr.b $a3, $vr1, 0 -; LA32-NEXT: ext.w.b $a3, $a3 -; LA32-NEXT: vinsgr2vr.w $vr0, $a3, 0 -; LA32-NEXT: vpickve2gr.b $a4, $vr1, 1 -; LA32-NEXT: ext.w.b $a4, $a4 -; LA32-NEXT: vinsgr2vr.w $vr0, $a4, 1 -; LA32-NEXT: srai.w $a3, $a3, 31 -; LA32-NEXT: vinsgr2vr.w $vr0, $a3, 1 -; LA32-NEXT: vinsgr2vr.w $vr0, $a0, 2 -; LA32-NEXT: vinsgr2vr.w $vr0, $a4, 2 -; LA32-NEXT: vinsgr2vr.w $vr0, $a2, 3 -; LA32-NEXT: srai.w $a0, $a4, 31 -; LA32-NEXT: vinsgr2vr.w $vr0, $a0, 3 -; LA32-NEXT: xvpermi.q $xr0, $xr2, 2 -; LA32-NEXT: vpickve2gr.b $a0, $vr1, 6 -; LA32-NEXT: ext.w.b $a0, $a0 -; LA32-NEXT: vinsgr2vr.w $vr3, $a0, 0 -; LA32-NEXT: srai.w $a2, $a0, 31 -; LA32-NEXT: vinsgr2vr.w $vr3, $a2, 1 -; LA32-NEXT: vpickve2gr.b $a2, $vr1, 7 -; LA32-NEXT: ext.w.b $a2, $a2 -; LA32-NEXT: vinsgr2vr.w $vr3, $a2, 2 -; LA32-NEXT: srai.w $a3, $a2, 31 -; LA32-NEXT: vinsgr2vr.w $vr3, $a3, 3 -; LA32-NEXT: vpickve2gr.b $a3, $vr1, 4 -; LA32-NEXT: ext.w.b $a3, $a3 -; LA32-NEXT: vinsgr2vr.w $vr2, $a3, 0 -; LA32-NEXT: vpickve2gr.b $a4, $vr1, 5 -; LA32-NEXT: ext.w.b $a4, $a4 -; LA32-NEXT: vinsgr2vr.w $vr2, $a4, 1 -; LA32-NEXT: srai.w $a3, $a3, 31 -; LA32-NEXT: vinsgr2vr.w $vr2, $a3, 1 -; LA32-NEXT: vinsgr2vr.w $vr2, $a0, 2 -; LA32-NEXT: vinsgr2vr.w $vr2, $a4, 2 -; LA32-NEXT: vinsgr2vr.w $vr2, $a2, 3 -; LA32-NEXT: srai.w $a0, $a4, 31 -; LA32-NEXT: vinsgr2vr.w $vr2, $a0, 3 -; LA32-NEXT: xvpermi.q $xr2, $xr3, 2 -; LA32-NEXT: vpickve2gr.b $a0, $vr1, 10 -; LA32-NEXT: ext.w.b $a0, $a0 -; LA32-NEXT: vinsgr2vr.w $vr3, $a0, 0 -; LA32-NEXT: srai.w $a2, $a0, 31 -; LA32-NEXT: vinsgr2vr.w $vr3, $a2, 1 -; LA32-NEXT: vpickve2gr.b $a2, $vr1, 11 -; LA32-NEXT: ext.w.b $a2, $a2 -; LA32-NEXT: vinsgr2vr.w $vr3, $a2, 2 -; LA32-NEXT: srai.w $a3, $a2, 31 -; LA32-NEXT: vinsgr2vr.w $vr3, $a3, 3 -; LA32-NEXT: vpickve2gr.b $a3, $vr1, 8 -; LA32-NEXT: ext.w.b $a3, $a3 -; LA32-NEXT: vinsgr2vr.w $vr4, $a3, 0 -; LA32-NEXT: vpickve2gr.b $a4, $vr1, 9 -; LA32-NEXT: ext.w.b $a4, $a4 -; LA32-NEXT: vinsgr2vr.w $vr4, $a4, 1 -; LA32-NEXT: srai.w $a3, $a3, 31 -; LA32-NEXT: vinsgr2vr.w $vr4, $a3, 1 -; LA32-NEXT: vinsgr2vr.w $vr4, $a0, 2 -; LA32-NEXT: vinsgr2vr.w $vr4, $a4, 2 -; LA32-NEXT: vinsgr2vr.w $vr4, $a2, 3 -; LA32-NEXT: srai.w $a0, $a4, 31 -; LA32-NEXT: vinsgr2vr.w $vr4, $a0, 3 -; LA32-NEXT: xvpermi.q $xr4, $xr3, 2 -; LA32-NEXT: vpickve2gr.b $a0, $vr1, 14 -; LA32-NEXT: ext.w.b $a0, $a0 -; LA32-NEXT: vinsgr2vr.w $vr3, $a0, 0 -; LA32-NEXT: srai.w $a2, $a0, 31 -; LA32-NEXT: vinsgr2vr.w $vr3, $a2, 1 -; LA32-NEXT: vpickve2gr.b $a2, $vr1, 15 -; LA32-NEXT: ext.w.b $a2, $a2 -; LA32-NEXT: vinsgr2vr.w $vr3, $a2, 2 -; LA32-NEXT: srai.w $a3, $a2, 31 -; LA32-NEXT: vinsgr2vr.w $vr3, $a3, 3 -; LA32-NEXT: vpickve2gr.b $a3, $vr1, 12 -; LA32-NEXT: ext.w.b $a3, $a3 -; LA32-NEXT: vinsgr2vr.w $vr5, $a3, 0 -; LA32-NEXT: vpickve2gr.b $a4, $vr1, 13 -; LA32-NEXT: ext.w.b $a4, $a4 -; LA32-NEXT: vinsgr2vr.w $vr5, $a4, 1 -; LA32-NEXT: srai.w $a3, $a3, 31 -; LA32-NEXT: vinsgr2vr.w $vr5, $a3, 1 -; LA32-NEXT: vinsgr2vr.w $vr5, $a0, 2 -; LA32-NEXT: vinsgr2vr.w $vr5, $a4, 2 -; LA32-NEXT: vinsgr2vr.w $vr5, $a2, 3 -; LA32-NEXT: srai.w $a0, $a4, 31 -; LA32-NEXT: vinsgr2vr.w $vr5, $a0, 3 -; LA32-NEXT: xvpermi.q $xr5, $xr3, 2 -; LA32-NEXT: xvst $xr5, $a1, 96 -; LA32-NEXT: xvst $xr4, $a1, 64 -; LA32-NEXT: xvst $xr2, $a1, 32 -; LA32-NEXT: xvst $xr0, $a1, 0 -; LA32-NEXT: ret -; -; LA64-LABEL: load_sext_16i8_to_16i64: -; LA64: # %bb.0: # %entry -; LA64-NEXT: vld $vr0, $a0, 0 -; LA64-NEXT: vpickve2gr.b $a0, $vr0, 2 -; LA64-NEXT: ext.w.b $a0, $a0 -; LA64-NEXT: vinsgr2vr.d $vr2, $a0, 0 -; LA64-NEXT: vpickve2gr.b $a0, $vr0, 3 -; LA64-NEXT: ext.w.b $a0, $a0 -; LA64-NEXT: vinsgr2vr.d $vr2, $a0, 1 -; LA64-NEXT: vpickve2gr.b $a0, $vr0, 0 -; LA64-NEXT: ext.w.b $a0, $a0 -; LA64-NEXT: vinsgr2vr.d $vr1, $a0, 0 -; LA64-NEXT: vpickve2gr.b $a0, $vr0, 1 -; LA64-NEXT: ext.w.b $a0, $a0 -; LA64-NEXT: vinsgr2vr.d $vr1, $a0, 1 -; LA64-NEXT: xvpermi.q $xr1, $xr2, 2 -; LA64-NEXT: vpickve2gr.b $a0, $vr0, 6 -; LA64-NEXT: ext.w.b $a0, $a0 -; LA64-NEXT: vinsgr2vr.d $vr2, $a0, 0 -; LA64-NEXT: vpickve2gr.b $a0, $vr0, 7 -; LA64-NEXT: ext.w.b $a0, $a0 -; LA64-NEXT: vinsgr2vr.d $vr2, $a0, 1 -; LA64-NEXT: vpickve2gr.b $a0, $vr0, 4 -; LA64-NEXT: ext.w.b $a0, $a0 -; LA64-NEXT: vinsgr2vr.d $vr3, $a0, 0 -; LA64-NEXT: vpickve2gr.b $a0, $vr0, 5 -; LA64-NEXT: ext.w.b $a0, $a0 -; LA64-NEXT: vinsgr2vr.d $vr3, $a0, 1 -; LA64-NEXT: xvpermi.q $xr3, $xr2, 2 -; LA64-NEXT: vpickve2gr.b $a0, $vr0, 10 -; LA64-NEXT: ext.w.b $a0, $a0 -; LA64-NEXT: vinsgr2vr.d $vr2, $a0, 0 -; LA64-NEXT: vpickve2gr.b $a0, $vr0, 11 -; LA64-NEXT: ext.w.b $a0, $a0 -; LA64-NEXT: vinsgr2vr.d $vr2, $a0, 1 -; LA64-NEXT: vpickve2gr.b $a0, $vr0, 8 -; LA64-NEXT: ext.w.b $a0, $a0 -; LA64-NEXT: vinsgr2vr.d $vr4, $a0, 0 -; LA64-NEXT: vpickve2gr.b $a0, $vr0, 9 -; LA64-NEXT: ext.w.b $a0, $a0 -; LA64-NEXT: vinsgr2vr.d $vr4, $a0, 1 -; LA64-NEXT: xvpermi.q $xr4, $xr2, 2 -; LA64-NEXT: vpickve2gr.b $a0, $vr0, 14 -; LA64-NEXT: ext.w.b $a0, $a0 -; LA64-NEXT: vinsgr2vr.d $vr2, $a0, 0 -; LA64-NEXT: vpickve2gr.b $a0, $vr0, 15 -; LA64-NEXT: ext.w.b $a0, $a0 -; LA64-NEXT: vinsgr2vr.d $vr2, $a0, 1 -; LA64-NEXT: vpickve2gr.b $a0, $vr0, 12 -; LA64-NEXT: ext.w.b $a0, $a0 -; LA64-NEXT: vinsgr2vr.d $vr5, $a0, 0 -; LA64-NEXT: vpickve2gr.b $a0, $vr0, 13 -; LA64-NEXT: ext.w.b $a0, $a0 -; LA64-NEXT: vinsgr2vr.d $vr5, $a0, 1 -; LA64-NEXT: xvpermi.q $xr5, $xr2, 2 -; LA64-NEXT: xvst $xr5, $a1, 96 -; LA64-NEXT: xvst $xr4, $a1, 64 -; LA64-NEXT: xvst $xr3, $a1, 32 -; LA64-NEXT: xvst $xr1, $a1, 0 -; LA64-NEXT: ret +; CHECK-LABEL: load_sext_16i8_to_16i64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vld $vr0, $a0, 0 +; CHECK-NEXT: vext2xv.h.b $xr0, $xr0 +; CHECK-NEXT: xvpermi.q $xr1, $xr0, 1 +; CHECK-NEXT: vext2xv.w.h $xr1, $xr1 +; CHECK-NEXT: xvpermi.q $xr2, $xr1, 1 +; CHECK-NEXT: vext2xv.d.w $xr2, $xr2 +; CHECK-NEXT: vext2xv.w.h $xr0, $xr0 +; CHECK-NEXT: xvpermi.q $xr3, $xr0, 1 +; CHECK-NEXT: vext2xv.d.w $xr3, $xr3 +; CHECK-NEXT: vext2xv.d.w $xr1, $xr1 +; CHECK-NEXT: vext2xv.d.w $xr0, $xr0 +; CHECK-NEXT: xvst $xr0, $a1, 0 +; CHECK-NEXT: xvst $xr1, $a1, 64 +; CHECK-NEXT: xvst $xr3, $a1, 32 +; CHECK-NEXT: xvst $xr2, $a1, 96 +; CHECK-NEXT: ret entry: %A = load <16 x i8>, ptr %ptr %B = sext <16 x i8> %A to <16 x i64> @@ -838,58 +345,10 @@ define void @load_sext_16i16_to_16i32(ptr %ptr, ptr %dst) { ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xvld $xr0, $a0, 0 ; CHECK-NEXT: xvpermi.q $xr1, $xr0, 1 -; CHECK-NEXT: vpickve2gr.h $a0, $vr1, 4 -; CHECK-NEXT: ext.w.h $a0, $a0 -; CHECK-NEXT: vinsgr2vr.w $vr2, $a0, 0 -; CHECK-NEXT: vpickve2gr.h $a0, $vr1, 5 -; CHECK-NEXT: ext.w.h $a0, $a0 -; CHECK-NEXT: vinsgr2vr.w $vr2, $a0, 1 -; CHECK-NEXT: vpickve2gr.h $a0, $vr1, 6 -; CHECK-NEXT: ext.w.h $a0, $a0 -; CHECK-NEXT: vinsgr2vr.w $vr2, $a0, 2 -; CHECK-NEXT: vpickve2gr.h $a0, $vr1, 7 -; CHECK-NEXT: ext.w.h $a0, $a0 -; CHECK-NEXT: vinsgr2vr.w $vr2, $a0, 3 -; CHECK-NEXT: vpickve2gr.h $a0, $vr1, 0 -; CHECK-NEXT: ext.w.h $a0, $a0 -; CHECK-NEXT: vinsgr2vr.w $vr3, $a0, 0 -; CHECK-NEXT: vpickve2gr.h $a0, $vr1, 1 -; CHECK-NEXT: ext.w.h $a0, $a0 -; CHECK-NEXT: vinsgr2vr.w $vr3, $a0, 1 -; CHECK-NEXT: vpickve2gr.h $a0, $vr1, 2 -; CHECK-NEXT: ext.w.h $a0, $a0 -; CHECK-NEXT: vinsgr2vr.w $vr3, $a0, 2 -; CHECK-NEXT: vpickve2gr.h $a0, $vr1, 3 -; CHECK-NEXT: ext.w.h $a0, $a0 -; CHECK-NEXT: vinsgr2vr.w $vr3, $a0, 3 -; CHECK-NEXT: xvpermi.q $xr3, $xr2, 2 -; CHECK-NEXT: vpickve2gr.h $a0, $vr0, 4 -; CHECK-NEXT: ext.w.h $a0, $a0 -; CHECK-NEXT: vinsgr2vr.w $vr1, $a0, 0 -; CHECK-NEXT: vpickve2gr.h $a0, $vr0, 5 -; CHECK-NEXT: ext.w.h $a0, $a0 -; CHECK-NEXT: vinsgr2vr.w $vr1, $a0, 1 -; CHECK-NEXT: vpickve2gr.h $a0, $vr0, 6 -; CHECK-NEXT: ext.w.h $a0, $a0 -; CHECK-NEXT: vinsgr2vr.w $vr1, $a0, 2 -; CHECK-NEXT: vpickve2gr.h $a0, $vr0, 7 -; CHECK-NEXT: ext.w.h $a0, $a0 -; CHECK-NEXT: vinsgr2vr.w $vr1, $a0, 3 -; CHECK-NEXT: vpickve2gr.h $a0, $vr0, 0 -; CHECK-NEXT: ext.w.h $a0, $a0 -; CHECK-NEXT: vinsgr2vr.w $vr2, $a0, 0 -; CHECK-NEXT: vpickve2gr.h $a0, $vr0, 1 -; CHECK-NEXT: ext.w.h $a0, $a0 -; CHECK-NEXT: vinsgr2vr.w $vr2, $a0, 1 -; CHECK-NEXT: vpickve2gr.h $a0, $vr0, 2 -; CHECK-NEXT: ext.w.h $a0, $a0 -; CHECK-NEXT: vinsgr2vr.w $vr2, $a0, 2 -; CHECK-NEXT: vpickve2gr.h $a0, $vr0, 3 -; CHECK-NEXT: ext.w.h $a0, $a0 -; CHECK-NEXT: vinsgr2vr.w $vr2, $a0, 3 -; CHECK-NEXT: xvpermi.q $xr2, $xr1, 2 -; CHECK-NEXT: xvst $xr2, $a1, 0 -; CHECK-NEXT: xvst $xr3, $a1, 32 +; CHECK-NEXT: vext2xv.w.h $xr1, $xr1 +; CHECK-NEXT: vext2xv.w.h $xr0, $xr0 +; CHECK-NEXT: xvst $xr0, $a1, 0 +; CHECK-NEXT: xvst $xr1, $a1, 32 ; CHECK-NEXT: ret entry: %A = load <16 x i16>, ptr %ptr @@ -899,173 +358,23 @@ entry: } define void @load_sext_16i16_to_16i64(ptr %ptr, ptr %dst) { -; LA32-LABEL: load_sext_16i16_to_16i64: -; LA32: # %bb.0: # %entry -; LA32-NEXT: xvld $xr1, $a0, 0 -; LA32-NEXT: xvpermi.q $xr3, $xr1, 1 -; LA32-NEXT: vpickve2gr.h $a0, $vr3, 2 -; LA32-NEXT: ext.w.h $a0, $a0 -; LA32-NEXT: vinsgr2vr.w $vr2, $a0, 0 -; LA32-NEXT: srai.w $a2, $a0, 31 -; LA32-NEXT: vinsgr2vr.w $vr2, $a2, 1 -; LA32-NEXT: vpickve2gr.h $a2, $vr3, 3 -; LA32-NEXT: ext.w.h $a2, $a2 -; LA32-NEXT: vinsgr2vr.w $vr2, $a2, 2 -; LA32-NEXT: srai.w $a3, $a2, 31 -; LA32-NEXT: vinsgr2vr.w $vr2, $a3, 3 -; LA32-NEXT: vpickve2gr.h $a3, $vr3, 0 -; LA32-NEXT: ext.w.h $a3, $a3 -; LA32-NEXT: vinsgr2vr.w $vr0, $a3, 0 -; LA32-NEXT: vpickve2gr.h $a4, $vr3, 1 -; LA32-NEXT: ext.w.h $a4, $a4 -; LA32-NEXT: vinsgr2vr.w $vr0, $a4, 1 -; LA32-NEXT: srai.w $a3, $a3, 31 -; LA32-NEXT: vinsgr2vr.w $vr0, $a3, 1 -; LA32-NEXT: vinsgr2vr.w $vr0, $a0, 2 -; LA32-NEXT: vinsgr2vr.w $vr0, $a4, 2 -; LA32-NEXT: vinsgr2vr.w $vr0, $a2, 3 -; LA32-NEXT: srai.w $a0, $a4, 31 -; LA32-NEXT: vinsgr2vr.w $vr0, $a0, 3 -; LA32-NEXT: xvpermi.q $xr0, $xr2, 2 -; LA32-NEXT: vpickve2gr.h $a0, $vr3, 6 -; LA32-NEXT: ext.w.h $a0, $a0 -; LA32-NEXT: vinsgr2vr.w $vr4, $a0, 0 -; LA32-NEXT: srai.w $a2, $a0, 31 -; LA32-NEXT: vinsgr2vr.w $vr4, $a2, 1 -; LA32-NEXT: vpickve2gr.h $a2, $vr3, 7 -; LA32-NEXT: ext.w.h $a2, $a2 -; LA32-NEXT: vinsgr2vr.w $vr4, $a2, 2 -; LA32-NEXT: srai.w $a3, $a2, 31 -; LA32-NEXT: vinsgr2vr.w $vr4, $a3, 3 -; LA32-NEXT: vpickve2gr.h $a3, $vr3, 4 -; LA32-NEXT: ext.w.h $a3, $a3 -; LA32-NEXT: vinsgr2vr.w $vr2, $a3, 0 -; LA32-NEXT: vpickve2gr.h $a4, $vr3, 5 -; LA32-NEXT: ext.w.h $a4, $a4 -; LA32-NEXT: vinsgr2vr.w $vr2, $a4, 1 -; LA32-NEXT: srai.w $a3, $a3, 31 -; LA32-NEXT: vinsgr2vr.w $vr2, $a3, 1 -; LA32-NEXT: vinsgr2vr.w $vr2, $a0, 2 -; LA32-NEXT: vinsgr2vr.w $vr2, $a4, 2 -; LA32-NEXT: vinsgr2vr.w $vr2, $a2, 3 -; LA32-NEXT: srai.w $a0, $a4, 31 -; LA32-NEXT: vinsgr2vr.w $vr2, $a0, 3 -; LA32-NEXT: xvpermi.q $xr2, $xr4, 2 -; LA32-NEXT: vpickve2gr.h $a0, $vr1, 2 -; LA32-NEXT: ext.w.h $a0, $a0 -; LA32-NEXT: vinsgr2vr.w $vr3, $a0, 0 -; LA32-NEXT: srai.w $a2, $a0, 31 -; LA32-NEXT: vinsgr2vr.w $vr3, $a2, 1 -; LA32-NEXT: vpickve2gr.h $a2, $vr1, 3 -; LA32-NEXT: ext.w.h $a2, $a2 -; LA32-NEXT: vinsgr2vr.w $vr3, $a2, 2 -; LA32-NEXT: srai.w $a3, $a2, 31 -; LA32-NEXT: vinsgr2vr.w $vr3, $a3, 3 -; LA32-NEXT: vpickve2gr.h $a3, $vr1, 0 -; LA32-NEXT: ext.w.h $a3, $a3 -; LA32-NEXT: vinsgr2vr.w $vr4, $a3, 0 -; LA32-NEXT: vpickve2gr.h $a4, $vr1, 1 -; LA32-NEXT: ext.w.h $a4, $a4 -; LA32-NEXT: vinsgr2vr.w $vr4, $a4, 1 -; LA32-NEXT: srai.w $a3, $a3, 31 -; LA32-NEXT: vinsgr2vr.w $vr4, $a3, 1 -; LA32-NEXT: vinsgr2vr.w $vr4, $a0, 2 -; LA32-NEXT: vinsgr2vr.w $vr4, $a4, 2 -; LA32-NEXT: vinsgr2vr.w $vr4, $a2, 3 -; LA32-NEXT: srai.w $a0, $a4, 31 -; LA32-NEXT: vinsgr2vr.w $vr4, $a0, 3 -; LA32-NEXT: xvpermi.q $xr4, $xr3, 2 -; LA32-NEXT: vpickve2gr.h $a0, $vr1, 6 -; LA32-NEXT: ext.w.h $a0, $a0 -; LA32-NEXT: vinsgr2vr.w $vr3, $a0, 0 -; LA32-NEXT: srai.w $a2, $a0, 31 -; LA32-NEXT: vinsgr2vr.w $vr3, $a2, 1 -; LA32-NEXT: vpickve2gr.h $a2, $vr1, 7 -; LA32-NEXT: ext.w.h $a2, $a2 -; LA32-NEXT: vinsgr2vr.w $vr3, $a2, 2 -; LA32-NEXT: srai.w $a3, $a2, 31 -; LA32-NEXT: vinsgr2vr.w $vr3, $a3, 3 -; LA32-NEXT: vpickve2gr.h $a3, $vr1, 4 -; LA32-NEXT: ext.w.h $a3, $a3 -; LA32-NEXT: vinsgr2vr.w $vr5, $a3, 0 -; LA32-NEXT: vpickve2gr.h $a4, $vr1, 5 -; LA32-NEXT: ext.w.h $a4, $a4 -; LA32-NEXT: vinsgr2vr.w $vr5, $a4, 1 -; LA32-NEXT: srai.w $a3, $a3, 31 -; LA32-NEXT: vinsgr2vr.w $vr5, $a3, 1 -; LA32-NEXT: vinsgr2vr.w $vr5, $a0, 2 -; LA32-NEXT: vinsgr2vr.w $vr5, $a4, 2 -; LA32-NEXT: vinsgr2vr.w $vr5, $a2, 3 -; LA32-NEXT: srai.w $a0, $a4, 31 -; LA32-NEXT: vinsgr2vr.w $vr5, $a0, 3 -; LA32-NEXT: xvpermi.q $xr5, $xr3, 2 -; LA32-NEXT: xvst $xr5, $a1, 32 -; LA32-NEXT: xvst $xr4, $a1, 0 -; LA32-NEXT: xvst $xr2, $a1, 96 -; LA32-NEXT: xvst $xr0, $a1, 64 -; LA32-NEXT: ret -; -; LA64-LABEL: load_sext_16i16_to_16i64: -; LA64: # %bb.0: # %entry -; LA64-NEXT: xvld $xr0, $a0, 0 -; LA64-NEXT: xvpermi.q $xr2, $xr0, 1 -; LA64-NEXT: vpickve2gr.h $a0, $vr2, 2 -; LA64-NEXT: ext.w.h $a0, $a0 -; LA64-NEXT: vinsgr2vr.d $vr3, $a0, 0 -; LA64-NEXT: vpickve2gr.h $a0, $vr2, 3 -; LA64-NEXT: ext.w.h $a0, $a0 -; LA64-NEXT: vinsgr2vr.d $vr3, $a0, 1 -; LA64-NEXT: vpickve2gr.h $a0, $vr2, 0 -; LA64-NEXT: ext.w.h $a0, $a0 -; LA64-NEXT: vinsgr2vr.d $vr1, $a0, 0 -; LA64-NEXT: vpickve2gr.h $a0, $vr2, 1 -; LA64-NEXT: ext.w.h $a0, $a0 -; LA64-NEXT: vinsgr2vr.d $vr1, $a0, 1 -; LA64-NEXT: xvpermi.q $xr1, $xr3, 2 -; LA64-NEXT: vpickve2gr.h $a0, $vr2, 6 -; LA64-NEXT: ext.w.h $a0, $a0 -; LA64-NEXT: vinsgr2vr.d $vr3, $a0, 0 -; LA64-NEXT: vpickve2gr.h $a0, $vr2, 7 -; LA64-NEXT: ext.w.h $a0, $a0 -; LA64-NEXT: vinsgr2vr.d $vr3, $a0, 1 -; LA64-NEXT: vpickve2gr.h $a0, $vr2, 4 -; LA64-NEXT: ext.w.h $a0, $a0 -; LA64-NEXT: vinsgr2vr.d $vr4, $a0, 0 -; LA64-NEXT: vpickve2gr.h $a0, $vr2, 5 -; LA64-NEXT: ext.w.h $a0, $a0 -; LA64-NEXT: vinsgr2vr.d $vr4, $a0, 1 -; LA64-NEXT: xvpermi.q $xr4, $xr3, 2 -; LA64-NEXT: vpickve2gr.h $a0, $vr0, 2 -; LA64-NEXT: ext.w.h $a0, $a0 -; LA64-NEXT: vinsgr2vr.d $vr2, $a0, 0 -; LA64-NEXT: vpickve2gr.h $a0, $vr0, 3 -; LA64-NEXT: ext.w.h $a0, $a0 -; LA64-NEXT: vinsgr2vr.d $vr2, $a0, 1 -; LA64-NEXT: vpickve2gr.h $a0, $vr0, 0 -; LA64-NEXT: ext.w.h $a0, $a0 -; LA64-NEXT: vinsgr2vr.d $vr3, $a0, 0 -; LA64-NEXT: vpickve2gr.h $a0, $vr0, 1 -; LA64-NEXT: ext.w.h $a0, $a0 -; LA64-NEXT: vinsgr2vr.d $vr3, $a0, 1 -; LA64-NEXT: xvpermi.q $xr3, $xr2, 2 -; LA64-NEXT: vpickve2gr.h $a0, $vr0, 6 -; LA64-NEXT: ext.w.h $a0, $a0 -; LA64-NEXT: vinsgr2vr.d $vr2, $a0, 0 -; LA64-NEXT: vpickve2gr.h $a0, $vr0, 7 -; LA64-NEXT: ext.w.h $a0, $a0 -; LA64-NEXT: vinsgr2vr.d $vr2, $a0, 1 -; LA64-NEXT: vpickve2gr.h $a0, $vr0, 4 -; LA64-NEXT: ext.w.h $a0, $a0 -; LA64-NEXT: vinsgr2vr.d $vr5, $a0, 0 -; LA64-NEXT: vpickve2gr.h $a0, $vr0, 5 -; LA64-NEXT: ext.w.h $a0, $a0 -; LA64-NEXT: vinsgr2vr.d $vr5, $a0, 1 -; LA64-NEXT: xvpermi.q $xr5, $xr2, 2 -; LA64-NEXT: xvst $xr5, $a1, 32 -; LA64-NEXT: xvst $xr3, $a1, 0 -; LA64-NEXT: xvst $xr4, $a1, 96 -; LA64-NEXT: xvst $xr1, $a1, 64 -; LA64-NEXT: ret +; CHECK-LABEL: load_sext_16i16_to_16i64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvld $xr0, $a0, 0 +; CHECK-NEXT: xvpermi.q $xr1, $xr0, 1 +; CHECK-NEXT: vext2xv.w.h $xr1, $xr1 +; CHECK-NEXT: xvpermi.q $xr2, $xr1, 1 +; CHECK-NEXT: vext2xv.d.w $xr2, $xr2 +; CHECK-NEXT: vext2xv.w.h $xr0, $xr0 +; CHECK-NEXT: xvpermi.q $xr3, $xr0, 1 +; CHECK-NEXT: vext2xv.d.w $xr3, $xr3 +; CHECK-NEXT: vext2xv.d.w $xr1, $xr1 +; CHECK-NEXT: vext2xv.d.w $xr0, $xr0 +; CHECK-NEXT: xvst $xr0, $a1, 0 +; CHECK-NEXT: xvst $xr1, $a1, 64 +; CHECK-NEXT: xvst $xr3, $a1, 32 +; CHECK-NEXT: xvst $xr2, $a1, 96 +; CHECK-NEXT: ret entry: %A = load <16 x i16>, ptr %ptr %B = sext <16 x i16> %A to <16 x i64> diff --git a/llvm/test/CodeGen/LoongArch/lasx/vec-zext.ll b/llvm/test/CodeGen/LoongArch/lasx/vec-zext.ll index f0548cc..5269202 100644 --- a/llvm/test/CodeGen/LoongArch/lasx/vec-zext.ll +++ b/llvm/test/CodeGen/LoongArch/lasx/vec-zext.ll @@ -7,10 +7,7 @@ define void @load_zext_2i8_to_2i64(ptr %ptr, ptr %dst) { ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: ld.h $a0, $a0, 0 ; CHECK-NEXT: vinsgr2vr.h $vr0, $a0, 0 -; CHECK-NEXT: vrepli.b $vr1, 0 -; CHECK-NEXT: vilvl.b $vr0, $vr1, $vr0 -; CHECK-NEXT: vilvl.h $vr0, $vr1, $vr0 -; CHECK-NEXT: vilvl.w $vr0, $vr1, $vr0 +; CHECK-NEXT: vext2xv.du.bu $xr0, $xr0 ; CHECK-NEXT: vst $vr0, $a1, 0 ; CHECK-NEXT: ret entry: @@ -25,9 +22,7 @@ define void @load_zext_2i16_to_2i64(ptr %ptr, ptr %dst) { ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: ld.w $a0, $a0, 0 ; CHECK-NEXT: vinsgr2vr.w $vr0, $a0, 0 -; CHECK-NEXT: vrepli.b $vr1, 0 -; CHECK-NEXT: vilvl.h $vr0, $vr1, $vr0 -; CHECK-NEXT: vilvl.w $vr0, $vr1, $vr0 +; CHECK-NEXT: vext2xv.du.hu $xr0, $xr0 ; CHECK-NEXT: vst $vr0, $a1, 0 ; CHECK-NEXT: ret entry: @@ -42,9 +37,9 @@ define void @load_zext_2i32_to_2i64(ptr %ptr, ptr %dst) { ; LA32: # %bb.0: # %entry ; LA32-NEXT: ld.w $a2, $a0, 0 ; LA32-NEXT: ld.w $a0, $a0, 4 -; LA32-NEXT: vrepli.b $vr0, 0 ; LA32-NEXT: vinsgr2vr.w $vr0, $a2, 0 -; LA32-NEXT: vinsgr2vr.w $vr0, $a0, 2 +; LA32-NEXT: vinsgr2vr.w $vr0, $a0, 1 +; LA32-NEXT: vext2xv.du.wu $xr0, $xr0 ; LA32-NEXT: vst $vr0, $a1, 0 ; LA32-NEXT: ret ; @@ -52,8 +47,7 @@ define void @load_zext_2i32_to_2i64(ptr %ptr, ptr %dst) { ; LA64: # %bb.0: # %entry ; LA64-NEXT: ld.d $a0, $a0, 0 ; LA64-NEXT: vinsgr2vr.d $vr0, $a0, 0 -; LA64-NEXT: vrepli.b $vr1, 0 -; LA64-NEXT: vilvl.w $vr0, $vr1, $vr0 +; LA64-NEXT: vext2xv.du.wu $xr0, $xr0 ; LA64-NEXT: vst $vr0, $a1, 0 ; LA64-NEXT: ret entry: @@ -68,9 +62,7 @@ define void @load_zext_4i8_to_4i32(ptr %ptr, ptr %dst) { ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: ld.w $a0, $a0, 0 ; CHECK-NEXT: vinsgr2vr.w $vr0, $a0, 0 -; CHECK-NEXT: vrepli.b $vr1, 0 -; CHECK-NEXT: vilvl.b $vr0, $vr1, $vr0 -; CHECK-NEXT: vilvl.h $vr0, $vr1, $vr0 +; CHECK-NEXT: vext2xv.wu.bu $xr0, $xr0 ; CHECK-NEXT: vst $vr0, $a1, 0 ; CHECK-NEXT: ret entry: @@ -85,24 +77,8 @@ define void @load_zext_4i8_to_4i64(ptr %ptr, ptr %dst) { ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: ld.w $a0, $a0, 0 ; CHECK-NEXT: vinsgr2vr.w $vr0, $a0, 0 -; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 0 -; CHECK-NEXT: xvrepli.b $xr1, 0 -; CHECK-NEXT: xvreplgr2vr.b $xr2, $a0 -; CHECK-NEXT: xvpermi.q $xr2, $xr1, 18 -; CHECK-NEXT: xvextrins.b $xr1, $xr2, 0 -; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 1 -; CHECK-NEXT: xvreplgr2vr.b $xr2, $a0 -; CHECK-NEXT: xvpermi.q $xr2, $xr1, 18 -; CHECK-NEXT: xvextrins.b $xr1, $xr2, 136 -; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 2 -; CHECK-NEXT: xvreplgr2vr.b $xr2, $a0 -; CHECK-NEXT: xvpermi.q $xr2, $xr1, 48 -; CHECK-NEXT: xvextrins.b $xr1, $xr2, 0 -; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 3 -; CHECK-NEXT: xvreplgr2vr.b $xr0, $a0 -; CHECK-NEXT: xvpermi.q $xr0, $xr1, 48 -; CHECK-NEXT: xvextrins.b $xr1, $xr0, 136 -; CHECK-NEXT: xvst $xr1, $a1, 0 +; CHECK-NEXT: vext2xv.du.bu $xr0, $xr0 +; CHECK-NEXT: xvst $xr0, $a1, 0 ; CHECK-NEXT: ret entry: %A = load <4 x i8>, ptr %ptr @@ -118,8 +94,7 @@ define void @load_zext_4i16_to_4i32(ptr %ptr, ptr %dst) { ; LA32-NEXT: ld.w $a0, $a0, 4 ; LA32-NEXT: vinsgr2vr.w $vr0, $a2, 0 ; LA32-NEXT: vinsgr2vr.w $vr0, $a0, 1 -; LA32-NEXT: vrepli.b $vr1, 0 -; LA32-NEXT: vilvl.h $vr0, $vr1, $vr0 +; LA32-NEXT: vext2xv.wu.hu $xr0, $xr0 ; LA32-NEXT: vst $vr0, $a1, 0 ; LA32-NEXT: ret ; @@ -127,8 +102,7 @@ define void @load_zext_4i16_to_4i32(ptr %ptr, ptr %dst) { ; LA64: # %bb.0: # %entry ; LA64-NEXT: ld.d $a0, $a0, 0 ; LA64-NEXT: vinsgr2vr.d $vr0, $a0, 0 -; LA64-NEXT: vrepli.b $vr1, 0 -; LA64-NEXT: vilvl.h $vr0, $vr1, $vr0 +; LA64-NEXT: vext2xv.wu.hu $xr0, $xr0 ; LA64-NEXT: vst $vr0, $a1, 0 ; LA64-NEXT: ret entry: @@ -145,48 +119,16 @@ define void @load_zext_4i16_to_4i64(ptr %ptr, ptr %dst) { ; LA32-NEXT: ld.w $a0, $a0, 4 ; LA32-NEXT: vinsgr2vr.w $vr0, $a2, 0 ; LA32-NEXT: vinsgr2vr.w $vr0, $a0, 1 -; LA32-NEXT: vpickve2gr.h $a0, $vr0, 0 -; LA32-NEXT: xvrepli.b $xr1, 0 -; LA32-NEXT: xvreplgr2vr.h $xr2, $a0 -; LA32-NEXT: xvpermi.q $xr2, $xr1, 18 -; LA32-NEXT: xvextrins.h $xr1, $xr2, 0 -; LA32-NEXT: vpickve2gr.h $a0, $vr0, 1 -; LA32-NEXT: xvreplgr2vr.h $xr2, $a0 -; LA32-NEXT: xvpermi.q $xr2, $xr1, 18 -; LA32-NEXT: xvextrins.h $xr1, $xr2, 68 -; LA32-NEXT: vpickve2gr.h $a0, $vr0, 2 -; LA32-NEXT: xvreplgr2vr.h $xr2, $a0 -; LA32-NEXT: xvpermi.q $xr2, $xr1, 48 -; LA32-NEXT: xvextrins.h $xr1, $xr2, 0 -; LA32-NEXT: vpickve2gr.h $a0, $vr0, 3 -; LA32-NEXT: xvreplgr2vr.h $xr0, $a0 -; LA32-NEXT: xvpermi.q $xr0, $xr1, 48 -; LA32-NEXT: xvextrins.h $xr1, $xr0, 68 -; LA32-NEXT: xvst $xr1, $a1, 0 +; LA32-NEXT: vext2xv.du.hu $xr0, $xr0 +; LA32-NEXT: xvst $xr0, $a1, 0 ; LA32-NEXT: ret ; ; LA64-LABEL: load_zext_4i16_to_4i64: ; LA64: # %bb.0: # %entry ; LA64-NEXT: ld.d $a0, $a0, 0 ; LA64-NEXT: vinsgr2vr.d $vr0, $a0, 0 -; LA64-NEXT: vpickve2gr.h $a0, $vr0, 0 -; LA64-NEXT: xvrepli.b $xr1, 0 -; LA64-NEXT: xvreplgr2vr.h $xr2, $a0 -; LA64-NEXT: xvpermi.q $xr2, $xr1, 18 -; LA64-NEXT: xvextrins.h $xr1, $xr2, 0 -; LA64-NEXT: vpickve2gr.h $a0, $vr0, 1 -; LA64-NEXT: xvreplgr2vr.h $xr2, $a0 -; LA64-NEXT: xvpermi.q $xr2, $xr1, 18 -; LA64-NEXT: xvextrins.h $xr1, $xr2, 68 -; LA64-NEXT: vpickve2gr.h $a0, $vr0, 2 -; LA64-NEXT: xvreplgr2vr.h $xr2, $a0 -; LA64-NEXT: xvpermi.q $xr2, $xr1, 48 -; LA64-NEXT: xvextrins.h $xr1, $xr2, 0 -; LA64-NEXT: vpickve2gr.h $a0, $vr0, 3 -; LA64-NEXT: xvreplgr2vr.h $xr0, $a0 -; LA64-NEXT: xvpermi.q $xr0, $xr1, 48 -; LA64-NEXT: xvextrins.h $xr1, $xr0, 68 -; LA64-NEXT: xvst $xr1, $a1, 0 +; LA64-NEXT: vext2xv.du.hu $xr0, $xr0 +; LA64-NEXT: xvst $xr0, $a1, 0 ; LA64-NEXT: ret entry: %A = load <4 x i16>, ptr %ptr @@ -196,39 +138,12 @@ entry: } define void @load_zext_4i32_to_4i64(ptr %ptr, ptr %dst) { -; LA32-LABEL: load_zext_4i32_to_4i64: -; LA32: # %bb.0: # %entry -; LA32-NEXT: vld $vr0, $a0, 0 -; LA32-NEXT: vpickve2gr.w $a0, $vr0, 0 -; LA32-NEXT: xvrepli.b $xr1, 0 -; LA32-NEXT: xvinsgr2vr.w $xr1, $a0, 0 -; LA32-NEXT: vpickve2gr.w $a0, $vr0, 1 -; LA32-NEXT: xvinsgr2vr.w $xr1, $a0, 2 -; LA32-NEXT: vpickve2gr.w $a0, $vr0, 2 -; LA32-NEXT: xvinsgr2vr.w $xr1, $a0, 4 -; LA32-NEXT: vpickve2gr.w $a0, $vr0, 3 -; LA32-NEXT: xvinsgr2vr.w $xr1, $a0, 6 -; LA32-NEXT: xvst $xr1, $a1, 0 -; LA32-NEXT: ret -; -; LA64-LABEL: load_zext_4i32_to_4i64: -; LA64: # %bb.0: # %entry -; LA64-NEXT: vld $vr0, $a0, 0 -; LA64-NEXT: vpickve2gr.w $a0, $vr0, 2 -; LA64-NEXT: bstrpick.d $a0, $a0, 31, 0 -; LA64-NEXT: vinsgr2vr.d $vr1, $a0, 0 -; LA64-NEXT: vpickve2gr.w $a0, $vr0, 3 -; LA64-NEXT: bstrpick.d $a0, $a0, 31, 0 -; LA64-NEXT: vinsgr2vr.d $vr1, $a0, 1 -; LA64-NEXT: vpickve2gr.w $a0, $vr0, 0 -; LA64-NEXT: bstrpick.d $a0, $a0, 31, 0 -; LA64-NEXT: vinsgr2vr.d $vr2, $a0, 0 -; LA64-NEXT: vpickve2gr.w $a0, $vr0, 1 -; LA64-NEXT: bstrpick.d $a0, $a0, 31, 0 -; LA64-NEXT: vinsgr2vr.d $vr2, $a0, 1 -; LA64-NEXT: xvpermi.q $xr2, $xr1, 2 -; LA64-NEXT: xvst $xr2, $a1, 0 -; LA64-NEXT: ret +; CHECK-LABEL: load_zext_4i32_to_4i64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vld $vr0, $a0, 0 +; CHECK-NEXT: vext2xv.du.wu $xr0, $xr0 +; CHECK-NEXT: xvst $xr0, $a1, 0 +; CHECK-NEXT: ret entry: %A = load <4 x i32>, ptr %ptr %B = zext <4 x i32> %A to <4 x i64> @@ -243,8 +158,7 @@ define void @load_zext_8i8_to_8i16(ptr %ptr, ptr %dst) { ; LA32-NEXT: ld.w $a0, $a0, 4 ; LA32-NEXT: vinsgr2vr.w $vr0, $a2, 0 ; LA32-NEXT: vinsgr2vr.w $vr0, $a0, 1 -; LA32-NEXT: vrepli.b $vr1, 0 -; LA32-NEXT: vilvl.b $vr0, $vr1, $vr0 +; LA32-NEXT: vext2xv.hu.bu $xr0, $xr0 ; LA32-NEXT: vst $vr0, $a1, 0 ; LA32-NEXT: ret ; @@ -252,8 +166,7 @@ define void @load_zext_8i8_to_8i16(ptr %ptr, ptr %dst) { ; LA64: # %bb.0: # %entry ; LA64-NEXT: ld.d $a0, $a0, 0 ; LA64-NEXT: vinsgr2vr.d $vr0, $a0, 0 -; LA64-NEXT: vrepli.b $vr1, 0 -; LA64-NEXT: vilvl.b $vr0, $vr1, $vr0 +; LA64-NEXT: vext2xv.hu.bu $xr0, $xr0 ; LA64-NEXT: vst $vr0, $a1, 0 ; LA64-NEXT: ret entry: @@ -270,80 +183,16 @@ define void @load_zext_8i8_to_8i32(ptr %ptr, ptr %dst) { ; LA32-NEXT: ld.w $a0, $a0, 4 ; LA32-NEXT: vinsgr2vr.w $vr0, $a2, 0 ; LA32-NEXT: vinsgr2vr.w $vr0, $a0, 1 -; LA32-NEXT: vpickve2gr.b $a0, $vr0, 0 -; LA32-NEXT: xvrepli.b $xr1, 0 -; LA32-NEXT: xvreplgr2vr.b $xr2, $a0 -; LA32-NEXT: xvpermi.q $xr2, $xr1, 18 -; LA32-NEXT: xvextrins.b $xr1, $xr2, 0 -; LA32-NEXT: vpickve2gr.b $a0, $vr0, 1 -; LA32-NEXT: xvreplgr2vr.b $xr2, $a0 -; LA32-NEXT: xvpermi.q $xr2, $xr1, 18 -; LA32-NEXT: xvextrins.b $xr1, $xr2, 68 -; LA32-NEXT: vpickve2gr.b $a0, $vr0, 2 -; LA32-NEXT: xvreplgr2vr.b $xr2, $a0 -; LA32-NEXT: xvpermi.q $xr2, $xr1, 18 -; LA32-NEXT: xvextrins.b $xr1, $xr2, 136 -; LA32-NEXT: vpickve2gr.b $a0, $vr0, 3 -; LA32-NEXT: xvreplgr2vr.b $xr2, $a0 -; LA32-NEXT: xvpermi.q $xr2, $xr1, 18 -; LA32-NEXT: xvextrins.b $xr1, $xr2, 204 -; LA32-NEXT: vpickve2gr.b $a0, $vr0, 4 -; LA32-NEXT: xvreplgr2vr.b $xr2, $a0 -; LA32-NEXT: xvpermi.q $xr2, $xr1, 48 -; LA32-NEXT: xvextrins.b $xr1, $xr2, 0 -; LA32-NEXT: vpickve2gr.b $a0, $vr0, 5 -; LA32-NEXT: xvreplgr2vr.b $xr2, $a0 -; LA32-NEXT: xvpermi.q $xr2, $xr1, 48 -; LA32-NEXT: xvextrins.b $xr1, $xr2, 68 -; LA32-NEXT: vpickve2gr.b $a0, $vr0, 6 -; LA32-NEXT: xvreplgr2vr.b $xr2, $a0 -; LA32-NEXT: xvpermi.q $xr2, $xr1, 48 -; LA32-NEXT: xvextrins.b $xr1, $xr2, 136 -; LA32-NEXT: vpickve2gr.b $a0, $vr0, 7 -; LA32-NEXT: xvreplgr2vr.b $xr0, $a0 -; LA32-NEXT: xvpermi.q $xr0, $xr1, 48 -; LA32-NEXT: xvextrins.b $xr1, $xr0, 204 -; LA32-NEXT: xvst $xr1, $a1, 0 +; LA32-NEXT: vext2xv.wu.bu $xr0, $xr0 +; LA32-NEXT: xvst $xr0, $a1, 0 ; LA32-NEXT: ret ; ; LA64-LABEL: load_zext_8i8_to_8i32: ; LA64: # %bb.0: # %entry ; LA64-NEXT: ld.d $a0, $a0, 0 ; LA64-NEXT: vinsgr2vr.d $vr0, $a0, 0 -; LA64-NEXT: vpickve2gr.b $a0, $vr0, 0 -; LA64-NEXT: xvrepli.b $xr1, 0 -; LA64-NEXT: xvreplgr2vr.b $xr2, $a0 -; LA64-NEXT: xvpermi.q $xr2, $xr1, 18 -; LA64-NEXT: xvextrins.b $xr1, $xr2, 0 -; LA64-NEXT: vpickve2gr.b $a0, $vr0, 1 -; LA64-NEXT: xvreplgr2vr.b $xr2, $a0 -; LA64-NEXT: xvpermi.q $xr2, $xr1, 18 -; LA64-NEXT: xvextrins.b $xr1, $xr2, 68 -; LA64-NEXT: vpickve2gr.b $a0, $vr0, 2 -; LA64-NEXT: xvreplgr2vr.b $xr2, $a0 -; LA64-NEXT: xvpermi.q $xr2, $xr1, 18 -; LA64-NEXT: xvextrins.b $xr1, $xr2, 136 -; LA64-NEXT: vpickve2gr.b $a0, $vr0, 3 -; LA64-NEXT: xvreplgr2vr.b $xr2, $a0 -; LA64-NEXT: xvpermi.q $xr2, $xr1, 18 -; LA64-NEXT: xvextrins.b $xr1, $xr2, 204 -; LA64-NEXT: vpickve2gr.b $a0, $vr0, 4 -; LA64-NEXT: xvreplgr2vr.b $xr2, $a0 -; LA64-NEXT: xvpermi.q $xr2, $xr1, 48 -; LA64-NEXT: xvextrins.b $xr1, $xr2, 0 -; LA64-NEXT: vpickve2gr.b $a0, $vr0, 5 -; LA64-NEXT: xvreplgr2vr.b $xr2, $a0 -; LA64-NEXT: xvpermi.q $xr2, $xr1, 48 -; LA64-NEXT: xvextrins.b $xr1, $xr2, 68 -; LA64-NEXT: vpickve2gr.b $a0, $vr0, 6 -; LA64-NEXT: xvreplgr2vr.b $xr2, $a0 -; LA64-NEXT: xvpermi.q $xr2, $xr1, 48 -; LA64-NEXT: xvextrins.b $xr1, $xr2, 136 -; LA64-NEXT: vpickve2gr.b $a0, $vr0, 7 -; LA64-NEXT: xvreplgr2vr.b $xr0, $a0 -; LA64-NEXT: xvpermi.q $xr0, $xr1, 48 -; LA64-NEXT: xvextrins.b $xr1, $xr0, 204 -; LA64-NEXT: xvst $xr1, $a1, 0 +; LA64-NEXT: vext2xv.wu.bu $xr0, $xr0 +; LA64-NEXT: xvst $xr0, $a1, 0 ; LA64-NEXT: ret entry: %A = load <8 x i8>, ptr %ptr @@ -356,46 +205,15 @@ define void @load_zext_8i8_to_8i64(ptr %ptr, ptr %dst) { ; LA32-LABEL: load_zext_8i8_to_8i64: ; LA32: # %bb.0: # %entry ; LA32-NEXT: ld.w $a2, $a0, 0 -; LA32-NEXT: vinsgr2vr.w $vr0, $a2, 0 ; LA32-NEXT: ld.w $a0, $a0, 4 -; LA32-NEXT: vpickve2gr.b $a2, $vr0, 0 -; LA32-NEXT: vpickve2gr.b $a3, $vr0, 1 -; LA32-NEXT: vpickve2gr.b $a4, $vr0, 2 -; LA32-NEXT: vpickve2gr.b $a5, $vr0, 3 +; LA32-NEXT: vinsgr2vr.w $vr0, $a2, 0 +; LA32-NEXT: vext2xv.du.bu $xr1, $xr0 +; LA32-NEXT: # kill: def $vr0 killed $vr0 killed $xr0 ; LA32-NEXT: vinsgr2vr.w $vr0, $a0, 1 ; LA32-NEXT: vreplvei.w $vr0, $vr0, 1 -; LA32-NEXT: vpickve2gr.b $a0, $vr0, 0 -; LA32-NEXT: xvrepli.b $xr1, 0 -; LA32-NEXT: xvreplgr2vr.b $xr2, $a0 -; LA32-NEXT: xvpermi.q $xr2, $xr1, 18 -; LA32-NEXT: xvori.b $xr3, $xr1, 0 -; LA32-NEXT: xvextrins.b $xr3, $xr2, 0 -; LA32-NEXT: vpickve2gr.b $a0, $vr0, 1 -; LA32-NEXT: xvreplgr2vr.b $xr2, $a0 -; LA32-NEXT: vpickve2gr.b $a0, $vr0, 2 -; LA32-NEXT: xvreplgr2vr.b $xr4, $a0 -; LA32-NEXT: vpickve2gr.b $a0, $vr0, 3 -; LA32-NEXT: xvreplgr2vr.b $xr0, $a0 -; LA32-NEXT: xvpermi.q $xr2, $xr3, 18 -; LA32-NEXT: xvextrins.b $xr3, $xr2, 136 -; LA32-NEXT: xvreplgr2vr.b $xr2, $a2 -; LA32-NEXT: xvpermi.q $xr4, $xr3, 48 -; LA32-NEXT: xvextrins.b $xr3, $xr4, 0 -; LA32-NEXT: xvreplgr2vr.b $xr4, $a3 -; LA32-NEXT: xvpermi.q $xr0, $xr3, 48 -; LA32-NEXT: xvextrins.b $xr3, $xr0, 136 -; LA32-NEXT: xvpermi.q $xr2, $xr1, 18 -; LA32-NEXT: xvextrins.b $xr1, $xr2, 0 -; LA32-NEXT: xvpermi.q $xr4, $xr1, 18 -; LA32-NEXT: xvextrins.b $xr1, $xr4, 136 -; LA32-NEXT: xvreplgr2vr.b $xr0, $a4 -; LA32-NEXT: xvpermi.q $xr0, $xr1, 48 -; LA32-NEXT: xvextrins.b $xr1, $xr0, 0 -; LA32-NEXT: xvreplgr2vr.b $xr0, $a5 -; LA32-NEXT: xvpermi.q $xr0, $xr1, 48 -; LA32-NEXT: xvextrins.b $xr1, $xr0, 136 +; LA32-NEXT: vext2xv.du.bu $xr0, $xr0 ; LA32-NEXT: xvst $xr1, $a1, 0 -; LA32-NEXT: xvst $xr3, $a1, 32 +; LA32-NEXT: xvst $xr0, $a1, 32 ; LA32-NEXT: ret ; ; LA64-LABEL: load_zext_8i8_to_8i64: @@ -403,42 +221,10 @@ define void @load_zext_8i8_to_8i64(ptr %ptr, ptr %dst) { ; LA64-NEXT: ld.d $a0, $a0, 0 ; LA64-NEXT: vinsgr2vr.d $vr0, $a0, 0 ; LA64-NEXT: vsrli.d $vr1, $vr0, 32 -; LA64-NEXT: vpickve2gr.b $a0, $vr1, 0 -; LA64-NEXT: xvrepli.b $xr2, 0 -; LA64-NEXT: xvreplgr2vr.b $xr3, $a0 -; LA64-NEXT: xvpermi.q $xr3, $xr2, 18 -; LA64-NEXT: xvori.b $xr4, $xr2, 0 -; LA64-NEXT: xvextrins.b $xr4, $xr3, 0 -; LA64-NEXT: vpickve2gr.b $a0, $vr1, 1 -; LA64-NEXT: xvreplgr2vr.b $xr3, $a0 -; LA64-NEXT: xvpermi.q $xr3, $xr4, 18 -; LA64-NEXT: xvextrins.b $xr4, $xr3, 136 -; LA64-NEXT: vpickve2gr.b $a0, $vr1, 2 -; LA64-NEXT: xvreplgr2vr.b $xr3, $a0 -; LA64-NEXT: xvpermi.q $xr3, $xr4, 48 -; LA64-NEXT: xvextrins.b $xr4, $xr3, 0 -; LA64-NEXT: vpickve2gr.b $a0, $vr1, 3 -; LA64-NEXT: xvreplgr2vr.b $xr1, $a0 -; LA64-NEXT: xvpermi.q $xr1, $xr4, 48 -; LA64-NEXT: xvextrins.b $xr4, $xr1, 136 -; LA64-NEXT: vpickve2gr.b $a0, $vr0, 0 -; LA64-NEXT: xvreplgr2vr.b $xr1, $a0 -; LA64-NEXT: xvpermi.q $xr1, $xr2, 18 -; LA64-NEXT: xvextrins.b $xr2, $xr1, 0 -; LA64-NEXT: vpickve2gr.b $a0, $vr0, 1 -; LA64-NEXT: xvreplgr2vr.b $xr1, $a0 -; LA64-NEXT: xvpermi.q $xr1, $xr2, 18 -; LA64-NEXT: xvextrins.b $xr2, $xr1, 136 -; LA64-NEXT: vpickve2gr.b $a0, $vr0, 2 -; LA64-NEXT: xvreplgr2vr.b $xr1, $a0 -; LA64-NEXT: xvpermi.q $xr1, $xr2, 48 -; LA64-NEXT: xvextrins.b $xr2, $xr1, 0 -; LA64-NEXT: vpickve2gr.b $a0, $vr0, 3 -; LA64-NEXT: xvreplgr2vr.b $xr0, $a0 -; LA64-NEXT: xvpermi.q $xr0, $xr2, 48 -; LA64-NEXT: xvextrins.b $xr2, $xr0, 136 -; LA64-NEXT: xvst $xr2, $a1, 0 -; LA64-NEXT: xvst $xr4, $a1, 32 +; LA64-NEXT: vext2xv.du.bu $xr1, $xr1 +; LA64-NEXT: vext2xv.du.bu $xr0, $xr0 +; LA64-NEXT: xvst $xr0, $a1, 0 +; LA64-NEXT: xvst $xr1, $a1, 32 ; LA64-NEXT: ret entry: %A = load <8 x i8>, ptr %ptr @@ -448,67 +234,12 @@ entry: } define void @load_zext_8i16_to_8i32(ptr %ptr, ptr %dst) { -; LA32-LABEL: load_zext_8i16_to_8i32: -; LA32: # %bb.0: # %entry -; LA32-NEXT: vld $vr0, $a0, 0 -; LA32-NEXT: vpickve2gr.h $a0, $vr0, 4 -; LA32-NEXT: bstrpick.w $a0, $a0, 15, 0 -; LA32-NEXT: vinsgr2vr.w $vr1, $a0, 0 -; LA32-NEXT: vpickve2gr.h $a0, $vr0, 5 -; LA32-NEXT: bstrpick.w $a0, $a0, 15, 0 -; LA32-NEXT: vinsgr2vr.w $vr1, $a0, 1 -; LA32-NEXT: vpickve2gr.h $a0, $vr0, 6 -; LA32-NEXT: bstrpick.w $a0, $a0, 15, 0 -; LA32-NEXT: vinsgr2vr.w $vr1, $a0, 2 -; LA32-NEXT: vpickve2gr.h $a0, $vr0, 7 -; LA32-NEXT: bstrpick.w $a0, $a0, 15, 0 -; LA32-NEXT: vinsgr2vr.w $vr1, $a0, 3 -; LA32-NEXT: vpickve2gr.h $a0, $vr0, 0 -; LA32-NEXT: bstrpick.w $a0, $a0, 15, 0 -; LA32-NEXT: vinsgr2vr.w $vr2, $a0, 0 -; LA32-NEXT: vpickve2gr.h $a0, $vr0, 1 -; LA32-NEXT: bstrpick.w $a0, $a0, 15, 0 -; LA32-NEXT: vinsgr2vr.w $vr2, $a0, 1 -; LA32-NEXT: vpickve2gr.h $a0, $vr0, 2 -; LA32-NEXT: bstrpick.w $a0, $a0, 15, 0 -; LA32-NEXT: vinsgr2vr.w $vr2, $a0, 2 -; LA32-NEXT: vpickve2gr.h $a0, $vr0, 3 -; LA32-NEXT: bstrpick.w $a0, $a0, 15, 0 -; LA32-NEXT: vinsgr2vr.w $vr2, $a0, 3 -; LA32-NEXT: xvpermi.q $xr2, $xr1, 2 -; LA32-NEXT: xvst $xr2, $a1, 0 -; LA32-NEXT: ret -; -; LA64-LABEL: load_zext_8i16_to_8i32: -; LA64: # %bb.0: # %entry -; LA64-NEXT: vld $vr0, $a0, 0 -; LA64-NEXT: vpickve2gr.h $a0, $vr0, 4 -; LA64-NEXT: bstrpick.d $a0, $a0, 15, 0 -; LA64-NEXT: vinsgr2vr.w $vr1, $a0, 0 -; LA64-NEXT: vpickve2gr.h $a0, $vr0, 5 -; LA64-NEXT: bstrpick.d $a0, $a0, 15, 0 -; LA64-NEXT: vinsgr2vr.w $vr1, $a0, 1 -; LA64-NEXT: vpickve2gr.h $a0, $vr0, 6 -; LA64-NEXT: bstrpick.d $a0, $a0, 15, 0 -; LA64-NEXT: vinsgr2vr.w $vr1, $a0, 2 -; LA64-NEXT: vpickve2gr.h $a0, $vr0, 7 -; LA64-NEXT: bstrpick.d $a0, $a0, 15, 0 -; LA64-NEXT: vinsgr2vr.w $vr1, $a0, 3 -; LA64-NEXT: vpickve2gr.h $a0, $vr0, 0 -; LA64-NEXT: bstrpick.d $a0, $a0, 15, 0 -; LA64-NEXT: vinsgr2vr.w $vr2, $a0, 0 -; LA64-NEXT: vpickve2gr.h $a0, $vr0, 1 -; LA64-NEXT: bstrpick.d $a0, $a0, 15, 0 -; LA64-NEXT: vinsgr2vr.w $vr2, $a0, 1 -; LA64-NEXT: vpickve2gr.h $a0, $vr0, 2 -; LA64-NEXT: bstrpick.d $a0, $a0, 15, 0 -; LA64-NEXT: vinsgr2vr.w $vr2, $a0, 2 -; LA64-NEXT: vpickve2gr.h $a0, $vr0, 3 -; LA64-NEXT: bstrpick.d $a0, $a0, 15, 0 -; LA64-NEXT: vinsgr2vr.w $vr2, $a0, 3 -; LA64-NEXT: xvpermi.q $xr2, $xr1, 2 -; LA64-NEXT: xvst $xr2, $a1, 0 -; LA64-NEXT: ret +; CHECK-LABEL: load_zext_8i16_to_8i32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vld $vr0, $a0, 0 +; CHECK-NEXT: vext2xv.wu.hu $xr0, $xr0 +; CHECK-NEXT: xvst $xr0, $a1, 0 +; CHECK-NEXT: ret entry: %A = load <8 x i16>, ptr %ptr %B = zext <8 x i16> %A to <8 x i32> @@ -517,79 +248,16 @@ entry: } define void @load_zext_8i16_to_8i64(ptr %ptr, ptr %dst) { -; LA32-LABEL: load_zext_8i16_to_8i64: -; LA32: # %bb.0: # %entry -; LA32-NEXT: vld $vr0, $a0, 0 -; LA32-NEXT: vpickve2gr.h $a0, $vr0, 0 -; LA32-NEXT: xvrepli.b $xr1, 0 -; LA32-NEXT: bstrpick.w $a0, $a0, 15, 0 -; LA32-NEXT: xvori.b $xr2, $xr1, 0 -; LA32-NEXT: xvinsgr2vr.w $xr2, $a0, 0 -; LA32-NEXT: vpickve2gr.h $a0, $vr0, 1 -; LA32-NEXT: bstrpick.w $a0, $a0, 15, 0 -; LA32-NEXT: xvinsgr2vr.w $xr2, $a0, 2 -; LA32-NEXT: vpickve2gr.h $a0, $vr0, 2 -; LA32-NEXT: bstrpick.w $a0, $a0, 15, 0 -; LA32-NEXT: xvinsgr2vr.w $xr2, $a0, 4 -; LA32-NEXT: vpickve2gr.h $a0, $vr0, 3 -; LA32-NEXT: bstrpick.w $a0, $a0, 15, 0 -; LA32-NEXT: xvinsgr2vr.w $xr2, $a0, 6 -; LA32-NEXT: vpickve2gr.h $a0, $vr0, 4 -; LA32-NEXT: bstrpick.w $a0, $a0, 15, 0 -; LA32-NEXT: xvinsgr2vr.w $xr1, $a0, 0 -; LA32-NEXT: vpickve2gr.h $a0, $vr0, 5 -; LA32-NEXT: bstrpick.w $a0, $a0, 15, 0 -; LA32-NEXT: xvinsgr2vr.w $xr1, $a0, 2 -; LA32-NEXT: vpickve2gr.h $a0, $vr0, 6 -; LA32-NEXT: bstrpick.w $a0, $a0, 15, 0 -; LA32-NEXT: xvinsgr2vr.w $xr1, $a0, 4 -; LA32-NEXT: vpickve2gr.h $a0, $vr0, 7 -; LA32-NEXT: bstrpick.w $a0, $a0, 15, 0 -; LA32-NEXT: xvinsgr2vr.w $xr1, $a0, 6 -; LA32-NEXT: xvst $xr1, $a1, 32 -; LA32-NEXT: xvst $xr2, $a1, 0 -; LA32-NEXT: ret -; -; LA64-LABEL: load_zext_8i16_to_8i64: -; LA64: # %bb.0: # %entry -; LA64-NEXT: vld $vr0, $a0, 0 -; LA64-NEXT: vpickve2gr.h $a0, $vr0, 2 -; LA64-NEXT: bstrpick.d $a0, $a0, 15, 0 -; LA64-NEXT: bstrpick.d $a0, $a0, 15, 0 -; LA64-NEXT: vinsgr2vr.d $vr1, $a0, 0 -; LA64-NEXT: vpickve2gr.h $a0, $vr0, 3 -; LA64-NEXT: bstrpick.d $a0, $a0, 15, 0 -; LA64-NEXT: bstrpick.d $a0, $a0, 15, 0 -; LA64-NEXT: vinsgr2vr.d $vr1, $a0, 1 -; LA64-NEXT: vpickve2gr.h $a0, $vr0, 0 -; LA64-NEXT: bstrpick.d $a0, $a0, 15, 0 -; LA64-NEXT: bstrpick.d $a0, $a0, 15, 0 -; LA64-NEXT: vinsgr2vr.d $vr2, $a0, 0 -; LA64-NEXT: vpickve2gr.h $a0, $vr0, 1 -; LA64-NEXT: bstrpick.d $a0, $a0, 15, 0 -; LA64-NEXT: bstrpick.d $a0, $a0, 15, 0 -; LA64-NEXT: vinsgr2vr.d $vr2, $a0, 1 -; LA64-NEXT: xvpermi.q $xr2, $xr1, 2 -; LA64-NEXT: vpickve2gr.h $a0, $vr0, 6 -; LA64-NEXT: bstrpick.d $a0, $a0, 15, 0 -; LA64-NEXT: bstrpick.d $a0, $a0, 15, 0 -; LA64-NEXT: vinsgr2vr.d $vr1, $a0, 0 -; LA64-NEXT: vpickve2gr.h $a0, $vr0, 7 -; LA64-NEXT: bstrpick.d $a0, $a0, 15, 0 -; LA64-NEXT: bstrpick.d $a0, $a0, 15, 0 -; LA64-NEXT: vinsgr2vr.d $vr1, $a0, 1 -; LA64-NEXT: vpickve2gr.h $a0, $vr0, 4 -; LA64-NEXT: bstrpick.d $a0, $a0, 15, 0 -; LA64-NEXT: bstrpick.d $a0, $a0, 15, 0 -; LA64-NEXT: vinsgr2vr.d $vr3, $a0, 0 -; LA64-NEXT: vpickve2gr.h $a0, $vr0, 5 -; LA64-NEXT: bstrpick.d $a0, $a0, 15, 0 -; LA64-NEXT: bstrpick.d $a0, $a0, 15, 0 -; LA64-NEXT: vinsgr2vr.d $vr3, $a0, 1 -; LA64-NEXT: xvpermi.q $xr3, $xr1, 2 -; LA64-NEXT: xvst $xr3, $a1, 32 -; LA64-NEXT: xvst $xr2, $a1, 0 -; LA64-NEXT: ret +; CHECK-LABEL: load_zext_8i16_to_8i64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vld $vr0, $a0, 0 +; CHECK-NEXT: vext2xv.wu.hu $xr0, $xr0 +; CHECK-NEXT: xvpermi.q $xr1, $xr0, 1 +; CHECK-NEXT: vext2xv.du.wu $xr1, $xr1 +; CHECK-NEXT: vext2xv.du.wu $xr0, $xr0 +; CHECK-NEXT: xvst $xr0, $a1, 0 +; CHECK-NEXT: xvst $xr1, $a1, 32 +; CHECK-NEXT: ret entry: %A = load <8 x i16>, ptr %ptr %B = zext <8 x i16> %A to <8 x i64> @@ -598,65 +266,15 @@ entry: } define void @load_zext_8i32_to_8i64(ptr %ptr, ptr %dst) { -; LA32-LABEL: load_zext_8i32_to_8i64: -; LA32: # %bb.0: # %entry -; LA32-NEXT: xvld $xr0, $a0, 0 -; LA32-NEXT: xvpermi.q $xr1, $xr0, 1 -; LA32-NEXT: xvrepli.b $xr2, 0 -; LA32-NEXT: vpickve2gr.w $a0, $vr1, 0 -; LA32-NEXT: xvori.b $xr3, $xr2, 0 -; LA32-NEXT: xvinsgr2vr.w $xr3, $a0, 0 -; LA32-NEXT: vpickve2gr.w $a0, $vr1, 1 -; LA32-NEXT: xvinsgr2vr.w $xr3, $a0, 2 -; LA32-NEXT: vpickve2gr.w $a0, $vr1, 2 -; LA32-NEXT: xvinsgr2vr.w $xr3, $a0, 4 -; LA32-NEXT: vpickve2gr.w $a0, $vr1, 3 -; LA32-NEXT: xvinsgr2vr.w $xr3, $a0, 6 -; LA32-NEXT: vpickve2gr.w $a0, $vr0, 0 -; LA32-NEXT: xvinsgr2vr.w $xr2, $a0, 0 -; LA32-NEXT: vpickve2gr.w $a0, $vr0, 1 -; LA32-NEXT: xvinsgr2vr.w $xr2, $a0, 2 -; LA32-NEXT: vpickve2gr.w $a0, $vr0, 2 -; LA32-NEXT: xvinsgr2vr.w $xr2, $a0, 4 -; LA32-NEXT: vpickve2gr.w $a0, $vr0, 3 -; LA32-NEXT: xvinsgr2vr.w $xr2, $a0, 6 -; LA32-NEXT: xvst $xr2, $a1, 0 -; LA32-NEXT: xvst $xr3, $a1, 32 -; LA32-NEXT: ret -; -; LA64-LABEL: load_zext_8i32_to_8i64: -; LA64: # %bb.0: # %entry -; LA64-NEXT: xvld $xr0, $a0, 0 -; LA64-NEXT: xvpermi.q $xr1, $xr0, 1 -; LA64-NEXT: vpickve2gr.w $a0, $vr1, 2 -; LA64-NEXT: bstrpick.d $a0, $a0, 31, 0 -; LA64-NEXT: vinsgr2vr.d $vr2, $a0, 0 -; LA64-NEXT: vpickve2gr.w $a0, $vr1, 3 -; LA64-NEXT: bstrpick.d $a0, $a0, 31, 0 -; LA64-NEXT: vinsgr2vr.d $vr2, $a0, 1 -; LA64-NEXT: vpickve2gr.w $a0, $vr1, 0 -; LA64-NEXT: bstrpick.d $a0, $a0, 31, 0 -; LA64-NEXT: vinsgr2vr.d $vr3, $a0, 0 -; LA64-NEXT: vpickve2gr.w $a0, $vr1, 1 -; LA64-NEXT: bstrpick.d $a0, $a0, 31, 0 -; LA64-NEXT: vinsgr2vr.d $vr3, $a0, 1 -; LA64-NEXT: xvpermi.q $xr3, $xr2, 2 -; LA64-NEXT: vpickve2gr.w $a0, $vr0, 2 -; LA64-NEXT: bstrpick.d $a0, $a0, 31, 0 -; LA64-NEXT: vinsgr2vr.d $vr1, $a0, 0 -; LA64-NEXT: vpickve2gr.w $a0, $vr0, 3 -; LA64-NEXT: bstrpick.d $a0, $a0, 31, 0 -; LA64-NEXT: vinsgr2vr.d $vr1, $a0, 1 -; LA64-NEXT: vpickve2gr.w $a0, $vr0, 0 -; LA64-NEXT: bstrpick.d $a0, $a0, 31, 0 -; LA64-NEXT: vinsgr2vr.d $vr2, $a0, 0 -; LA64-NEXT: vpickve2gr.w $a0, $vr0, 1 -; LA64-NEXT: bstrpick.d $a0, $a0, 31, 0 -; LA64-NEXT: vinsgr2vr.d $vr2, $a0, 1 -; LA64-NEXT: xvpermi.q $xr2, $xr1, 2 -; LA64-NEXT: xvst $xr2, $a1, 0 -; LA64-NEXT: xvst $xr3, $a1, 32 -; LA64-NEXT: ret +; CHECK-LABEL: load_zext_8i32_to_8i64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvld $xr0, $a0, 0 +; CHECK-NEXT: xvpermi.q $xr1, $xr0, 1 +; CHECK-NEXT: vext2xv.du.wu $xr1, $xr1 +; CHECK-NEXT: vext2xv.du.wu $xr0, $xr0 +; CHECK-NEXT: xvst $xr0, $a1, 0 +; CHECK-NEXT: xvst $xr1, $a1, 32 +; CHECK-NEXT: ret entry: %A = load <8 x i32>, ptr %ptr %B = zext <8 x i32> %A to <8 x i64> @@ -668,56 +286,8 @@ define void @load_zext_16i8_to_16i16(ptr %ptr, ptr %dst) { ; CHECK-LABEL: load_zext_16i8_to_16i16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vld $vr0, $a0, 0 -; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 8 -; CHECK-NEXT: andi $a0, $a0, 255 -; CHECK-NEXT: vinsgr2vr.h $vr1, $a0, 0 -; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 9 -; CHECK-NEXT: andi $a0, $a0, 255 -; CHECK-NEXT: vinsgr2vr.h $vr1, $a0, 1 -; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 10 -; CHECK-NEXT: andi $a0, $a0, 255 -; CHECK-NEXT: vinsgr2vr.h $vr1, $a0, 2 -; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 11 -; CHECK-NEXT: andi $a0, $a0, 255 -; CHECK-NEXT: vinsgr2vr.h $vr1, $a0, 3 -; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 12 -; CHECK-NEXT: andi $a0, $a0, 255 -; CHECK-NEXT: vinsgr2vr.h $vr1, $a0, 4 -; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 13 -; CHECK-NEXT: andi $a0, $a0, 255 -; CHECK-NEXT: vinsgr2vr.h $vr1, $a0, 5 -; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 14 -; CHECK-NEXT: andi $a0, $a0, 255 -; CHECK-NEXT: vinsgr2vr.h $vr1, $a0, 6 -; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 15 -; CHECK-NEXT: andi $a0, $a0, 255 -; CHECK-NEXT: vinsgr2vr.h $vr1, $a0, 7 -; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 0 -; CHECK-NEXT: andi $a0, $a0, 255 -; CHECK-NEXT: vinsgr2vr.h $vr2, $a0, 0 -; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 1 -; CHECK-NEXT: andi $a0, $a0, 255 -; CHECK-NEXT: vinsgr2vr.h $vr2, $a0, 1 -; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 2 -; CHECK-NEXT: andi $a0, $a0, 255 -; CHECK-NEXT: vinsgr2vr.h $vr2, $a0, 2 -; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 3 -; CHECK-NEXT: andi $a0, $a0, 255 -; CHECK-NEXT: vinsgr2vr.h $vr2, $a0, 3 -; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 4 -; CHECK-NEXT: andi $a0, $a0, 255 -; CHECK-NEXT: vinsgr2vr.h $vr2, $a0, 4 -; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 5 -; CHECK-NEXT: andi $a0, $a0, 255 -; CHECK-NEXT: vinsgr2vr.h $vr2, $a0, 5 -; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 6 -; CHECK-NEXT: andi $a0, $a0, 255 -; CHECK-NEXT: vinsgr2vr.h $vr2, $a0, 6 -; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 7 -; CHECK-NEXT: andi $a0, $a0, 255 -; CHECK-NEXT: vinsgr2vr.h $vr2, $a0, 7 -; CHECK-NEXT: xvpermi.q $xr2, $xr1, 2 -; CHECK-NEXT: xvst $xr2, $a1, 0 +; CHECK-NEXT: vext2xv.hu.bu $xr0, $xr0 +; CHECK-NEXT: xvst $xr0, $a1, 0 ; CHECK-NEXT: ret entry: %A = load <16 x i8>, ptr %ptr @@ -730,58 +300,12 @@ define void @load_zext_16i8_to_16i32(ptr %ptr, ptr %dst) { ; CHECK-LABEL: load_zext_16i8_to_16i32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vld $vr0, $a0, 0 -; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 4 -; CHECK-NEXT: andi $a0, $a0, 255 -; CHECK-NEXT: vinsgr2vr.w $vr1, $a0, 0 -; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 5 -; CHECK-NEXT: andi $a0, $a0, 255 -; CHECK-NEXT: vinsgr2vr.w $vr1, $a0, 1 -; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 6 -; CHECK-NEXT: andi $a0, $a0, 255 -; CHECK-NEXT: vinsgr2vr.w $vr1, $a0, 2 -; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 7 -; CHECK-NEXT: andi $a0, $a0, 255 -; CHECK-NEXT: vinsgr2vr.w $vr1, $a0, 3 -; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 0 -; CHECK-NEXT: andi $a0, $a0, 255 -; CHECK-NEXT: vinsgr2vr.w $vr2, $a0, 0 -; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 1 -; CHECK-NEXT: andi $a0, $a0, 255 -; CHECK-NEXT: vinsgr2vr.w $vr2, $a0, 1 -; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 2 -; CHECK-NEXT: andi $a0, $a0, 255 -; CHECK-NEXT: vinsgr2vr.w $vr2, $a0, 2 -; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 3 -; CHECK-NEXT: andi $a0, $a0, 255 -; CHECK-NEXT: vinsgr2vr.w $vr2, $a0, 3 -; CHECK-NEXT: xvpermi.q $xr2, $xr1, 2 -; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 12 -; CHECK-NEXT: andi $a0, $a0, 255 -; CHECK-NEXT: vinsgr2vr.w $vr1, $a0, 0 -; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 13 -; CHECK-NEXT: andi $a0, $a0, 255 -; CHECK-NEXT: vinsgr2vr.w $vr1, $a0, 1 -; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 14 -; CHECK-NEXT: andi $a0, $a0, 255 -; CHECK-NEXT: vinsgr2vr.w $vr1, $a0, 2 -; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 15 -; CHECK-NEXT: andi $a0, $a0, 255 -; CHECK-NEXT: vinsgr2vr.w $vr1, $a0, 3 -; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 8 -; CHECK-NEXT: andi $a0, $a0, 255 -; CHECK-NEXT: vinsgr2vr.w $vr3, $a0, 0 -; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 9 -; CHECK-NEXT: andi $a0, $a0, 255 -; CHECK-NEXT: vinsgr2vr.w $vr3, $a0, 1 -; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 10 -; CHECK-NEXT: andi $a0, $a0, 255 -; CHECK-NEXT: vinsgr2vr.w $vr3, $a0, 2 -; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 11 -; CHECK-NEXT: andi $a0, $a0, 255 -; CHECK-NEXT: vinsgr2vr.w $vr3, $a0, 3 -; CHECK-NEXT: xvpermi.q $xr3, $xr1, 2 -; CHECK-NEXT: xvst $xr3, $a1, 32 -; CHECK-NEXT: xvst $xr2, $a1, 0 +; CHECK-NEXT: vext2xv.hu.bu $xr0, $xr0 +; CHECK-NEXT: xvpermi.q $xr1, $xr0, 1 +; CHECK-NEXT: vext2xv.wu.hu $xr1, $xr1 +; CHECK-NEXT: vext2xv.wu.hu $xr0, $xr0 +; CHECK-NEXT: xvst $xr0, $a1, 0 +; CHECK-NEXT: xvst $xr1, $a1, 32 ; CHECK-NEXT: ret entry: %A = load <16 x i8>, ptr %ptr @@ -791,143 +315,24 @@ entry: } define void @load_zext_16i8_to_16i64(ptr %ptr, ptr %dst) { -; LA32-LABEL: load_zext_16i8_to_16i64: -; LA32: # %bb.0: # %entry -; LA32-NEXT: vld $vr1, $a0, 0 -; LA32-NEXT: vpickve2gr.b $a0, $vr1, 0 -; LA32-NEXT: xvrepli.b $xr2, 0 -; LA32-NEXT: andi $a0, $a0, 255 -; LA32-NEXT: xvori.b $xr0, $xr2, 0 -; LA32-NEXT: xvinsgr2vr.w $xr0, $a0, 0 -; LA32-NEXT: vpickve2gr.b $a0, $vr1, 1 -; LA32-NEXT: andi $a0, $a0, 255 -; LA32-NEXT: xvinsgr2vr.w $xr0, $a0, 2 -; LA32-NEXT: vpickve2gr.b $a0, $vr1, 2 -; LA32-NEXT: andi $a0, $a0, 255 -; LA32-NEXT: xvinsgr2vr.w $xr0, $a0, 4 -; LA32-NEXT: vpickve2gr.b $a0, $vr1, 3 -; LA32-NEXT: andi $a0, $a0, 255 -; LA32-NEXT: xvinsgr2vr.w $xr0, $a0, 6 -; LA32-NEXT: vpickve2gr.b $a0, $vr1, 4 -; LA32-NEXT: andi $a0, $a0, 255 -; LA32-NEXT: xvori.b $xr3, $xr2, 0 -; LA32-NEXT: xvinsgr2vr.w $xr3, $a0, 0 -; LA32-NEXT: vpickve2gr.b $a0, $vr1, 5 -; LA32-NEXT: andi $a0, $a0, 255 -; LA32-NEXT: xvinsgr2vr.w $xr3, $a0, 2 -; LA32-NEXT: vpickve2gr.b $a0, $vr1, 6 -; LA32-NEXT: andi $a0, $a0, 255 -; LA32-NEXT: xvinsgr2vr.w $xr3, $a0, 4 -; LA32-NEXT: vpickve2gr.b $a0, $vr1, 7 -; LA32-NEXT: andi $a0, $a0, 255 -; LA32-NEXT: xvinsgr2vr.w $xr3, $a0, 6 -; LA32-NEXT: vpickve2gr.b $a0, $vr1, 8 -; LA32-NEXT: andi $a0, $a0, 255 -; LA32-NEXT: xvori.b $xr4, $xr2, 0 -; LA32-NEXT: xvinsgr2vr.w $xr4, $a0, 0 -; LA32-NEXT: vpickve2gr.b $a0, $vr1, 9 -; LA32-NEXT: andi $a0, $a0, 255 -; LA32-NEXT: xvinsgr2vr.w $xr4, $a0, 2 -; LA32-NEXT: vpickve2gr.b $a0, $vr1, 10 -; LA32-NEXT: andi $a0, $a0, 255 -; LA32-NEXT: xvinsgr2vr.w $xr4, $a0, 4 -; LA32-NEXT: vpickve2gr.b $a0, $vr1, 11 -; LA32-NEXT: andi $a0, $a0, 255 -; LA32-NEXT: xvinsgr2vr.w $xr4, $a0, 6 -; LA32-NEXT: vpickve2gr.b $a0, $vr1, 12 -; LA32-NEXT: andi $a0, $a0, 255 -; LA32-NEXT: xvinsgr2vr.w $xr2, $a0, 0 -; LA32-NEXT: vpickve2gr.b $a0, $vr1, 13 -; LA32-NEXT: andi $a0, $a0, 255 -; LA32-NEXT: xvinsgr2vr.w $xr2, $a0, 2 -; LA32-NEXT: vpickve2gr.b $a0, $vr1, 14 -; LA32-NEXT: andi $a0, $a0, 255 -; LA32-NEXT: xvinsgr2vr.w $xr2, $a0, 4 -; LA32-NEXT: vpickve2gr.b $a0, $vr1, 15 -; LA32-NEXT: andi $a0, $a0, 255 -; LA32-NEXT: xvinsgr2vr.w $xr2, $a0, 6 -; LA32-NEXT: xvst $xr2, $a1, 96 -; LA32-NEXT: xvst $xr4, $a1, 64 -; LA32-NEXT: xvst $xr3, $a1, 32 -; LA32-NEXT: xvst $xr0, $a1, 0 -; LA32-NEXT: ret -; -; LA64-LABEL: load_zext_16i8_to_16i64: -; LA64: # %bb.0: # %entry -; LA64-NEXT: vld $vr0, $a0, 0 -; LA64-NEXT: vpickve2gr.b $a0, $vr0, 2 -; LA64-NEXT: andi $a0, $a0, 255 -; LA64-NEXT: bstrpick.d $a0, $a0, 15, 0 -; LA64-NEXT: vinsgr2vr.d $vr2, $a0, 0 -; LA64-NEXT: vpickve2gr.b $a0, $vr0, 3 -; LA64-NEXT: andi $a0, $a0, 255 -; LA64-NEXT: bstrpick.d $a0, $a0, 15, 0 -; LA64-NEXT: vinsgr2vr.d $vr2, $a0, 1 -; LA64-NEXT: vpickve2gr.b $a0, $vr0, 0 -; LA64-NEXT: andi $a0, $a0, 255 -; LA64-NEXT: bstrpick.d $a0, $a0, 15, 0 -; LA64-NEXT: vinsgr2vr.d $vr1, $a0, 0 -; LA64-NEXT: vpickve2gr.b $a0, $vr0, 1 -; LA64-NEXT: andi $a0, $a0, 255 -; LA64-NEXT: bstrpick.d $a0, $a0, 15, 0 -; LA64-NEXT: vinsgr2vr.d $vr1, $a0, 1 -; LA64-NEXT: xvpermi.q $xr1, $xr2, 2 -; LA64-NEXT: vpickve2gr.b $a0, $vr0, 6 -; LA64-NEXT: andi $a0, $a0, 255 -; LA64-NEXT: bstrpick.d $a0, $a0, 15, 0 -; LA64-NEXT: vinsgr2vr.d $vr2, $a0, 0 -; LA64-NEXT: vpickve2gr.b $a0, $vr0, 7 -; LA64-NEXT: andi $a0, $a0, 255 -; LA64-NEXT: bstrpick.d $a0, $a0, 15, 0 -; LA64-NEXT: vinsgr2vr.d $vr2, $a0, 1 -; LA64-NEXT: vpickve2gr.b $a0, $vr0, 4 -; LA64-NEXT: andi $a0, $a0, 255 -; LA64-NEXT: bstrpick.d $a0, $a0, 15, 0 -; LA64-NEXT: vinsgr2vr.d $vr3, $a0, 0 -; LA64-NEXT: vpickve2gr.b $a0, $vr0, 5 -; LA64-NEXT: andi $a0, $a0, 255 -; LA64-NEXT: bstrpick.d $a0, $a0, 15, 0 -; LA64-NEXT: vinsgr2vr.d $vr3, $a0, 1 -; LA64-NEXT: xvpermi.q $xr3, $xr2, 2 -; LA64-NEXT: vpickve2gr.b $a0, $vr0, 10 -; LA64-NEXT: andi $a0, $a0, 255 -; LA64-NEXT: bstrpick.d $a0, $a0, 15, 0 -; LA64-NEXT: vinsgr2vr.d $vr2, $a0, 0 -; LA64-NEXT: vpickve2gr.b $a0, $vr0, 11 -; LA64-NEXT: andi $a0, $a0, 255 -; LA64-NEXT: bstrpick.d $a0, $a0, 15, 0 -; LA64-NEXT: vinsgr2vr.d $vr2, $a0, 1 -; LA64-NEXT: vpickve2gr.b $a0, $vr0, 8 -; LA64-NEXT: andi $a0, $a0, 255 -; LA64-NEXT: bstrpick.d $a0, $a0, 15, 0 -; LA64-NEXT: vinsgr2vr.d $vr4, $a0, 0 -; LA64-NEXT: vpickve2gr.b $a0, $vr0, 9 -; LA64-NEXT: andi $a0, $a0, 255 -; LA64-NEXT: bstrpick.d $a0, $a0, 15, 0 -; LA64-NEXT: vinsgr2vr.d $vr4, $a0, 1 -; LA64-NEXT: xvpermi.q $xr4, $xr2, 2 -; LA64-NEXT: vpickve2gr.b $a0, $vr0, 14 -; LA64-NEXT: andi $a0, $a0, 255 -; LA64-NEXT: bstrpick.d $a0, $a0, 15, 0 -; LA64-NEXT: vinsgr2vr.d $vr2, $a0, 0 -; LA64-NEXT: vpickve2gr.b $a0, $vr0, 15 -; LA64-NEXT: andi $a0, $a0, 255 -; LA64-NEXT: bstrpick.d $a0, $a0, 15, 0 -; LA64-NEXT: vinsgr2vr.d $vr2, $a0, 1 -; LA64-NEXT: vpickve2gr.b $a0, $vr0, 12 -; LA64-NEXT: andi $a0, $a0, 255 -; LA64-NEXT: bstrpick.d $a0, $a0, 15, 0 -; LA64-NEXT: vinsgr2vr.d $vr5, $a0, 0 -; LA64-NEXT: vpickve2gr.b $a0, $vr0, 13 -; LA64-NEXT: andi $a0, $a0, 255 -; LA64-NEXT: bstrpick.d $a0, $a0, 15, 0 -; LA64-NEXT: vinsgr2vr.d $vr5, $a0, 1 -; LA64-NEXT: xvpermi.q $xr5, $xr2, 2 -; LA64-NEXT: xvst $xr5, $a1, 96 -; LA64-NEXT: xvst $xr4, $a1, 64 -; LA64-NEXT: xvst $xr3, $a1, 32 -; LA64-NEXT: xvst $xr1, $a1, 0 -; LA64-NEXT: ret +; CHECK-LABEL: load_zext_16i8_to_16i64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vld $vr0, $a0, 0 +; CHECK-NEXT: vext2xv.hu.bu $xr0, $xr0 +; CHECK-NEXT: xvpermi.q $xr1, $xr0, 1 +; CHECK-NEXT: vext2xv.wu.hu $xr1, $xr1 +; CHECK-NEXT: xvpermi.q $xr2, $xr1, 1 +; CHECK-NEXT: vext2xv.du.wu $xr2, $xr2 +; CHECK-NEXT: vext2xv.wu.hu $xr0, $xr0 +; CHECK-NEXT: xvpermi.q $xr3, $xr0, 1 +; CHECK-NEXT: vext2xv.du.wu $xr3, $xr3 +; CHECK-NEXT: vext2xv.du.wu $xr1, $xr1 +; CHECK-NEXT: vext2xv.du.wu $xr0, $xr0 +; CHECK-NEXT: xvst $xr0, $a1, 0 +; CHECK-NEXT: xvst $xr1, $a1, 64 +; CHECK-NEXT: xvst $xr3, $a1, 32 +; CHECK-NEXT: xvst $xr2, $a1, 96 +; CHECK-NEXT: ret entry: %A = load <16 x i8>, ptr %ptr %B = zext <16 x i8> %A to <16 x i64> @@ -936,121 +341,15 @@ entry: } define void @load_zext_16i16_to_16i32(ptr %ptr, ptr %dst) { -; LA32-LABEL: load_zext_16i16_to_16i32: -; LA32: # %bb.0: # %entry -; LA32-NEXT: xvld $xr0, $a0, 0 -; LA32-NEXT: xvpermi.q $xr1, $xr0, 1 -; LA32-NEXT: vpickve2gr.h $a0, $vr1, 4 -; LA32-NEXT: bstrpick.w $a0, $a0, 15, 0 -; LA32-NEXT: vinsgr2vr.w $vr2, $a0, 0 -; LA32-NEXT: vpickve2gr.h $a0, $vr1, 5 -; LA32-NEXT: bstrpick.w $a0, $a0, 15, 0 -; LA32-NEXT: vinsgr2vr.w $vr2, $a0, 1 -; LA32-NEXT: vpickve2gr.h $a0, $vr1, 6 -; LA32-NEXT: bstrpick.w $a0, $a0, 15, 0 -; LA32-NEXT: vinsgr2vr.w $vr2, $a0, 2 -; LA32-NEXT: vpickve2gr.h $a0, $vr1, 7 -; LA32-NEXT: bstrpick.w $a0, $a0, 15, 0 -; LA32-NEXT: vinsgr2vr.w $vr2, $a0, 3 -; LA32-NEXT: vpickve2gr.h $a0, $vr1, 0 -; LA32-NEXT: bstrpick.w $a0, $a0, 15, 0 -; LA32-NEXT: vinsgr2vr.w $vr3, $a0, 0 -; LA32-NEXT: vpickve2gr.h $a0, $vr1, 1 -; LA32-NEXT: bstrpick.w $a0, $a0, 15, 0 -; LA32-NEXT: vinsgr2vr.w $vr3, $a0, 1 -; LA32-NEXT: vpickve2gr.h $a0, $vr1, 2 -; LA32-NEXT: bstrpick.w $a0, $a0, 15, 0 -; LA32-NEXT: vinsgr2vr.w $vr3, $a0, 2 -; LA32-NEXT: vpickve2gr.h $a0, $vr1, 3 -; LA32-NEXT: bstrpick.w $a0, $a0, 15, 0 -; LA32-NEXT: vinsgr2vr.w $vr3, $a0, 3 -; LA32-NEXT: xvpermi.q $xr3, $xr2, 2 -; LA32-NEXT: vpickve2gr.h $a0, $vr0, 4 -; LA32-NEXT: bstrpick.w $a0, $a0, 15, 0 -; LA32-NEXT: vinsgr2vr.w $vr1, $a0, 0 -; LA32-NEXT: vpickve2gr.h $a0, $vr0, 5 -; LA32-NEXT: bstrpick.w $a0, $a0, 15, 0 -; LA32-NEXT: vinsgr2vr.w $vr1, $a0, 1 -; LA32-NEXT: vpickve2gr.h $a0, $vr0, 6 -; LA32-NEXT: bstrpick.w $a0, $a0, 15, 0 -; LA32-NEXT: vinsgr2vr.w $vr1, $a0, 2 -; LA32-NEXT: vpickve2gr.h $a0, $vr0, 7 -; LA32-NEXT: bstrpick.w $a0, $a0, 15, 0 -; LA32-NEXT: vinsgr2vr.w $vr1, $a0, 3 -; LA32-NEXT: vpickve2gr.h $a0, $vr0, 0 -; LA32-NEXT: bstrpick.w $a0, $a0, 15, 0 -; LA32-NEXT: vinsgr2vr.w $vr2, $a0, 0 -; LA32-NEXT: vpickve2gr.h $a0, $vr0, 1 -; LA32-NEXT: bstrpick.w $a0, $a0, 15, 0 -; LA32-NEXT: vinsgr2vr.w $vr2, $a0, 1 -; LA32-NEXT: vpickve2gr.h $a0, $vr0, 2 -; LA32-NEXT: bstrpick.w $a0, $a0, 15, 0 -; LA32-NEXT: vinsgr2vr.w $vr2, $a0, 2 -; LA32-NEXT: vpickve2gr.h $a0, $vr0, 3 -; LA32-NEXT: bstrpick.w $a0, $a0, 15, 0 -; LA32-NEXT: vinsgr2vr.w $vr2, $a0, 3 -; LA32-NEXT: xvpermi.q $xr2, $xr1, 2 -; LA32-NEXT: xvst $xr2, $a1, 0 -; LA32-NEXT: xvst $xr3, $a1, 32 -; LA32-NEXT: ret -; -; LA64-LABEL: load_zext_16i16_to_16i32: -; LA64: # %bb.0: # %entry -; LA64-NEXT: xvld $xr0, $a0, 0 -; LA64-NEXT: xvpermi.q $xr1, $xr0, 1 -; LA64-NEXT: vpickve2gr.h $a0, $vr1, 4 -; LA64-NEXT: bstrpick.d $a0, $a0, 15, 0 -; LA64-NEXT: vinsgr2vr.w $vr2, $a0, 0 -; LA64-NEXT: vpickve2gr.h $a0, $vr1, 5 -; LA64-NEXT: bstrpick.d $a0, $a0, 15, 0 -; LA64-NEXT: vinsgr2vr.w $vr2, $a0, 1 -; LA64-NEXT: vpickve2gr.h $a0, $vr1, 6 -; LA64-NEXT: bstrpick.d $a0, $a0, 15, 0 -; LA64-NEXT: vinsgr2vr.w $vr2, $a0, 2 -; LA64-NEXT: vpickve2gr.h $a0, $vr1, 7 -; LA64-NEXT: bstrpick.d $a0, $a0, 15, 0 -; LA64-NEXT: vinsgr2vr.w $vr2, $a0, 3 -; LA64-NEXT: vpickve2gr.h $a0, $vr1, 0 -; LA64-NEXT: bstrpick.d $a0, $a0, 15, 0 -; LA64-NEXT: vinsgr2vr.w $vr3, $a0, 0 -; LA64-NEXT: vpickve2gr.h $a0, $vr1, 1 -; LA64-NEXT: bstrpick.d $a0, $a0, 15, 0 -; LA64-NEXT: vinsgr2vr.w $vr3, $a0, 1 -; LA64-NEXT: vpickve2gr.h $a0, $vr1, 2 -; LA64-NEXT: bstrpick.d $a0, $a0, 15, 0 -; LA64-NEXT: vinsgr2vr.w $vr3, $a0, 2 -; LA64-NEXT: vpickve2gr.h $a0, $vr1, 3 -; LA64-NEXT: bstrpick.d $a0, $a0, 15, 0 -; LA64-NEXT: vinsgr2vr.w $vr3, $a0, 3 -; LA64-NEXT: xvpermi.q $xr3, $xr2, 2 -; LA64-NEXT: vpickve2gr.h $a0, $vr0, 4 -; LA64-NEXT: bstrpick.d $a0, $a0, 15, 0 -; LA64-NEXT: vinsgr2vr.w $vr1, $a0, 0 -; LA64-NEXT: vpickve2gr.h $a0, $vr0, 5 -; LA64-NEXT: bstrpick.d $a0, $a0, 15, 0 -; LA64-NEXT: vinsgr2vr.w $vr1, $a0, 1 -; LA64-NEXT: vpickve2gr.h $a0, $vr0, 6 -; LA64-NEXT: bstrpick.d $a0, $a0, 15, 0 -; LA64-NEXT: vinsgr2vr.w $vr1, $a0, 2 -; LA64-NEXT: vpickve2gr.h $a0, $vr0, 7 -; LA64-NEXT: bstrpick.d $a0, $a0, 15, 0 -; LA64-NEXT: vinsgr2vr.w $vr1, $a0, 3 -; LA64-NEXT: vpickve2gr.h $a0, $vr0, 0 -; LA64-NEXT: bstrpick.d $a0, $a0, 15, 0 -; LA64-NEXT: vinsgr2vr.w $vr2, $a0, 0 -; LA64-NEXT: vpickve2gr.h $a0, $vr0, 1 -; LA64-NEXT: bstrpick.d $a0, $a0, 15, 0 -; LA64-NEXT: vinsgr2vr.w $vr2, $a0, 1 -; LA64-NEXT: vpickve2gr.h $a0, $vr0, 2 -; LA64-NEXT: bstrpick.d $a0, $a0, 15, 0 -; LA64-NEXT: vinsgr2vr.w $vr2, $a0, 2 -; LA64-NEXT: vpickve2gr.h $a0, $vr0, 3 -; LA64-NEXT: bstrpick.d $a0, $a0, 15, 0 -; LA64-NEXT: vinsgr2vr.w $vr2, $a0, 3 -; LA64-NEXT: xvpermi.q $xr2, $xr1, 2 -; LA64-NEXT: xvst $xr2, $a1, 0 -; LA64-NEXT: xvst $xr3, $a1, 32 -; LA64-NEXT: ret +; CHECK-LABEL: load_zext_16i16_to_16i32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvld $xr0, $a0, 0 +; CHECK-NEXT: xvpermi.q $xr1, $xr0, 1 +; CHECK-NEXT: vext2xv.wu.hu $xr1, $xr1 +; CHECK-NEXT: vext2xv.wu.hu $xr0, $xr0 +; CHECK-NEXT: xvst $xr0, $a1, 0 +; CHECK-NEXT: xvst $xr1, $a1, 32 +; CHECK-NEXT: ret entry: %A = load <16 x i16>, ptr %ptr %B = zext <16 x i16> %A to <16 x i32> @@ -1059,145 +358,23 @@ entry: } define void @load_zext_16i16_to_16i64(ptr %ptr, ptr %dst) { -; LA32-LABEL: load_zext_16i16_to_16i64: -; LA32: # %bb.0: # %entry -; LA32-NEXT: xvld $xr0, $a0, 0 -; LA32-NEXT: xvpermi.q $xr3, $xr0, 1 -; LA32-NEXT: vpickve2gr.h $a0, $vr3, 0 -; LA32-NEXT: xvrepli.b $xr2, 0 -; LA32-NEXT: bstrpick.w $a0, $a0, 15, 0 -; LA32-NEXT: xvori.b $xr1, $xr2, 0 -; LA32-NEXT: xvinsgr2vr.w $xr1, $a0, 0 -; LA32-NEXT: vpickve2gr.h $a0, $vr3, 1 -; LA32-NEXT: bstrpick.w $a0, $a0, 15, 0 -; LA32-NEXT: xvinsgr2vr.w $xr1, $a0, 2 -; LA32-NEXT: vpickve2gr.h $a0, $vr3, 2 -; LA32-NEXT: bstrpick.w $a0, $a0, 15, 0 -; LA32-NEXT: xvinsgr2vr.w $xr1, $a0, 4 -; LA32-NEXT: vpickve2gr.h $a0, $vr3, 3 -; LA32-NEXT: bstrpick.w $a0, $a0, 15, 0 -; LA32-NEXT: xvinsgr2vr.w $xr1, $a0, 6 -; LA32-NEXT: vpickve2gr.h $a0, $vr3, 4 -; LA32-NEXT: bstrpick.w $a0, $a0, 15, 0 -; LA32-NEXT: xvori.b $xr4, $xr2, 0 -; LA32-NEXT: xvinsgr2vr.w $xr4, $a0, 0 -; LA32-NEXT: vpickve2gr.h $a0, $vr3, 5 -; LA32-NEXT: bstrpick.w $a0, $a0, 15, 0 -; LA32-NEXT: xvinsgr2vr.w $xr4, $a0, 2 -; LA32-NEXT: vpickve2gr.h $a0, $vr3, 6 -; LA32-NEXT: bstrpick.w $a0, $a0, 15, 0 -; LA32-NEXT: xvinsgr2vr.w $xr4, $a0, 4 -; LA32-NEXT: vpickve2gr.h $a0, $vr3, 7 -; LA32-NEXT: bstrpick.w $a0, $a0, 15, 0 -; LA32-NEXT: xvinsgr2vr.w $xr4, $a0, 6 -; LA32-NEXT: vpickve2gr.h $a0, $vr0, 0 -; LA32-NEXT: bstrpick.w $a0, $a0, 15, 0 -; LA32-NEXT: xvori.b $xr3, $xr2, 0 -; LA32-NEXT: xvinsgr2vr.w $xr3, $a0, 0 -; LA32-NEXT: vpickve2gr.h $a0, $vr0, 1 -; LA32-NEXT: bstrpick.w $a0, $a0, 15, 0 -; LA32-NEXT: xvinsgr2vr.w $xr3, $a0, 2 -; LA32-NEXT: vpickve2gr.h $a0, $vr0, 2 -; LA32-NEXT: bstrpick.w $a0, $a0, 15, 0 -; LA32-NEXT: xvinsgr2vr.w $xr3, $a0, 4 -; LA32-NEXT: vpickve2gr.h $a0, $vr0, 3 -; LA32-NEXT: bstrpick.w $a0, $a0, 15, 0 -; LA32-NEXT: xvinsgr2vr.w $xr3, $a0, 6 -; LA32-NEXT: vpickve2gr.h $a0, $vr0, 4 -; LA32-NEXT: bstrpick.w $a0, $a0, 15, 0 -; LA32-NEXT: xvinsgr2vr.w $xr2, $a0, 0 -; LA32-NEXT: vpickve2gr.h $a0, $vr0, 5 -; LA32-NEXT: bstrpick.w $a0, $a0, 15, 0 -; LA32-NEXT: xvinsgr2vr.w $xr2, $a0, 2 -; LA32-NEXT: vpickve2gr.h $a0, $vr0, 6 -; LA32-NEXT: bstrpick.w $a0, $a0, 15, 0 -; LA32-NEXT: xvinsgr2vr.w $xr2, $a0, 4 -; LA32-NEXT: vpickve2gr.h $a0, $vr0, 7 -; LA32-NEXT: bstrpick.w $a0, $a0, 15, 0 -; LA32-NEXT: xvinsgr2vr.w $xr2, $a0, 6 -; LA32-NEXT: xvst $xr2, $a1, 32 -; LA32-NEXT: xvst $xr3, $a1, 0 -; LA32-NEXT: xvst $xr4, $a1, 96 -; LA32-NEXT: xvst $xr1, $a1, 64 -; LA32-NEXT: ret -; -; LA64-LABEL: load_zext_16i16_to_16i64: -; LA64: # %bb.0: # %entry -; LA64-NEXT: xvld $xr0, $a0, 0 -; LA64-NEXT: xvpermi.q $xr2, $xr0, 1 -; LA64-NEXT: vpickve2gr.h $a0, $vr2, 2 -; LA64-NEXT: bstrpick.d $a0, $a0, 15, 0 -; LA64-NEXT: bstrpick.d $a0, $a0, 15, 0 -; LA64-NEXT: vinsgr2vr.d $vr3, $a0, 0 -; LA64-NEXT: vpickve2gr.h $a0, $vr2, 3 -; LA64-NEXT: bstrpick.d $a0, $a0, 15, 0 -; LA64-NEXT: bstrpick.d $a0, $a0, 15, 0 -; LA64-NEXT: vinsgr2vr.d $vr3, $a0, 1 -; LA64-NEXT: vpickve2gr.h $a0, $vr2, 0 -; LA64-NEXT: bstrpick.d $a0, $a0, 15, 0 -; LA64-NEXT: bstrpick.d $a0, $a0, 15, 0 -; LA64-NEXT: vinsgr2vr.d $vr1, $a0, 0 -; LA64-NEXT: vpickve2gr.h $a0, $vr2, 1 -; LA64-NEXT: bstrpick.d $a0, $a0, 15, 0 -; LA64-NEXT: bstrpick.d $a0, $a0, 15, 0 -; LA64-NEXT: vinsgr2vr.d $vr1, $a0, 1 -; LA64-NEXT: xvpermi.q $xr1, $xr3, 2 -; LA64-NEXT: vpickve2gr.h $a0, $vr2, 6 -; LA64-NEXT: bstrpick.d $a0, $a0, 15, 0 -; LA64-NEXT: bstrpick.d $a0, $a0, 15, 0 -; LA64-NEXT: vinsgr2vr.d $vr3, $a0, 0 -; LA64-NEXT: vpickve2gr.h $a0, $vr2, 7 -; LA64-NEXT: bstrpick.d $a0, $a0, 15, 0 -; LA64-NEXT: bstrpick.d $a0, $a0, 15, 0 -; LA64-NEXT: vinsgr2vr.d $vr3, $a0, 1 -; LA64-NEXT: vpickve2gr.h $a0, $vr2, 4 -; LA64-NEXT: bstrpick.d $a0, $a0, 15, 0 -; LA64-NEXT: bstrpick.d $a0, $a0, 15, 0 -; LA64-NEXT: vinsgr2vr.d $vr4, $a0, 0 -; LA64-NEXT: vpickve2gr.h $a0, $vr2, 5 -; LA64-NEXT: bstrpick.d $a0, $a0, 15, 0 -; LA64-NEXT: bstrpick.d $a0, $a0, 15, 0 -; LA64-NEXT: vinsgr2vr.d $vr4, $a0, 1 -; LA64-NEXT: xvpermi.q $xr4, $xr3, 2 -; LA64-NEXT: vpickve2gr.h $a0, $vr0, 2 -; LA64-NEXT: bstrpick.d $a0, $a0, 15, 0 -; LA64-NEXT: bstrpick.d $a0, $a0, 15, 0 -; LA64-NEXT: vinsgr2vr.d $vr2, $a0, 0 -; LA64-NEXT: vpickve2gr.h $a0, $vr0, 3 -; LA64-NEXT: bstrpick.d $a0, $a0, 15, 0 -; LA64-NEXT: bstrpick.d $a0, $a0, 15, 0 -; LA64-NEXT: vinsgr2vr.d $vr2, $a0, 1 -; LA64-NEXT: vpickve2gr.h $a0, $vr0, 0 -; LA64-NEXT: bstrpick.d $a0, $a0, 15, 0 -; LA64-NEXT: bstrpick.d $a0, $a0, 15, 0 -; LA64-NEXT: vinsgr2vr.d $vr3, $a0, 0 -; LA64-NEXT: vpickve2gr.h $a0, $vr0, 1 -; LA64-NEXT: bstrpick.d $a0, $a0, 15, 0 -; LA64-NEXT: bstrpick.d $a0, $a0, 15, 0 -; LA64-NEXT: vinsgr2vr.d $vr3, $a0, 1 -; LA64-NEXT: xvpermi.q $xr3, $xr2, 2 -; LA64-NEXT: vpickve2gr.h $a0, $vr0, 6 -; LA64-NEXT: bstrpick.d $a0, $a0, 15, 0 -; LA64-NEXT: bstrpick.d $a0, $a0, 15, 0 -; LA64-NEXT: vinsgr2vr.d $vr2, $a0, 0 -; LA64-NEXT: vpickve2gr.h $a0, $vr0, 7 -; LA64-NEXT: bstrpick.d $a0, $a0, 15, 0 -; LA64-NEXT: bstrpick.d $a0, $a0, 15, 0 -; LA64-NEXT: vinsgr2vr.d $vr2, $a0, 1 -; LA64-NEXT: vpickve2gr.h $a0, $vr0, 4 -; LA64-NEXT: bstrpick.d $a0, $a0, 15, 0 -; LA64-NEXT: bstrpick.d $a0, $a0, 15, 0 -; LA64-NEXT: vinsgr2vr.d $vr5, $a0, 0 -; LA64-NEXT: vpickve2gr.h $a0, $vr0, 5 -; LA64-NEXT: bstrpick.d $a0, $a0, 15, 0 -; LA64-NEXT: bstrpick.d $a0, $a0, 15, 0 -; LA64-NEXT: vinsgr2vr.d $vr5, $a0, 1 -; LA64-NEXT: xvpermi.q $xr5, $xr2, 2 -; LA64-NEXT: xvst $xr5, $a1, 32 -; LA64-NEXT: xvst $xr3, $a1, 0 -; LA64-NEXT: xvst $xr4, $a1, 96 -; LA64-NEXT: xvst $xr1, $a1, 64 -; LA64-NEXT: ret +; CHECK-LABEL: load_zext_16i16_to_16i64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvld $xr0, $a0, 0 +; CHECK-NEXT: xvpermi.q $xr1, $xr0, 1 +; CHECK-NEXT: vext2xv.wu.hu $xr1, $xr1 +; CHECK-NEXT: xvpermi.q $xr2, $xr1, 1 +; CHECK-NEXT: vext2xv.du.wu $xr2, $xr2 +; CHECK-NEXT: vext2xv.wu.hu $xr0, $xr0 +; CHECK-NEXT: xvpermi.q $xr3, $xr0, 1 +; CHECK-NEXT: vext2xv.du.wu $xr3, $xr3 +; CHECK-NEXT: vext2xv.du.wu $xr1, $xr1 +; CHECK-NEXT: vext2xv.du.wu $xr0, $xr0 +; CHECK-NEXT: xvst $xr0, $a1, 0 +; CHECK-NEXT: xvst $xr1, $a1, 64 +; CHECK-NEXT: xvst $xr3, $a1, 32 +; CHECK-NEXT: xvst $xr2, $a1, 96 +; CHECK-NEXT: ret entry: %A = load <16 x i16>, ptr %ptr %B = zext <16 x i16> %A to <16 x i64> diff --git a/llvm/test/CodeGen/LoongArch/lsx/vec-sext.ll b/llvm/test/CodeGen/LoongArch/lsx/vec-sext.ll index cadaf2f..c78de80 100644 --- a/llvm/test/CodeGen/LoongArch/lsx/vec-sext.ll +++ b/llvm/test/CodeGen/LoongArch/lsx/vec-sext.ll @@ -7,11 +7,9 @@ define void @load_sext_2i8_to_2i64(ptr %ptr, ptr %dst) { ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: ld.h $a0, $a0, 0 ; CHECK-NEXT: vinsgr2vr.h $vr0, $a0, 0 -; CHECK-NEXT: vilvl.b $vr0, $vr0, $vr0 -; CHECK-NEXT: vilvl.h $vr0, $vr0, $vr0 -; CHECK-NEXT: vilvl.w $vr0, $vr0, $vr0 -; CHECK-NEXT: vslli.d $vr0, $vr0, 56 -; CHECK-NEXT: vsrai.d $vr0, $vr0, 56 +; CHECK-NEXT: vsllwil.h.b $vr0, $vr0, 0 +; CHECK-NEXT: vsllwil.w.h $vr0, $vr0, 0 +; CHECK-NEXT: vsllwil.d.w $vr0, $vr0, 0 ; CHECK-NEXT: vst $vr0, $a1, 0 ; CHECK-NEXT: ret entry: @@ -26,10 +24,8 @@ define void @load_sext_2i16_to_2i64(ptr %ptr, ptr %dst) { ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: ld.w $a0, $a0, 0 ; CHECK-NEXT: vinsgr2vr.w $vr0, $a0, 0 -; CHECK-NEXT: vilvl.h $vr0, $vr0, $vr0 -; CHECK-NEXT: vilvl.w $vr0, $vr0, $vr0 -; CHECK-NEXT: vslli.d $vr0, $vr0, 48 -; CHECK-NEXT: vsrai.d $vr0, $vr0, 48 +; CHECK-NEXT: vsllwil.w.h $vr0, $vr0, 0 +; CHECK-NEXT: vsllwil.d.w $vr0, $vr0, 0 ; CHECK-NEXT: vst $vr0, $a1, 0 ; CHECK-NEXT: ret entry: @@ -45,9 +41,8 @@ define void @load_sext_2i32_to_2i64(ptr %ptr, ptr %dst) { ; LA32-NEXT: ld.w $a2, $a0, 0 ; LA32-NEXT: ld.w $a0, $a0, 4 ; LA32-NEXT: vinsgr2vr.w $vr0, $a2, 0 -; LA32-NEXT: vinsgr2vr.w $vr0, $a0, 2 -; LA32-NEXT: vslli.d $vr0, $vr0, 32 -; LA32-NEXT: vsrai.d $vr0, $vr0, 32 +; LA32-NEXT: vinsgr2vr.w $vr0, $a0, 1 +; LA32-NEXT: vsllwil.d.w $vr0, $vr0, 0 ; LA32-NEXT: vst $vr0, $a1, 0 ; LA32-NEXT: ret ; @@ -55,9 +50,7 @@ define void @load_sext_2i32_to_2i64(ptr %ptr, ptr %dst) { ; LA64: # %bb.0: # %entry ; LA64-NEXT: ld.d $a0, $a0, 0 ; LA64-NEXT: vinsgr2vr.d $vr0, $a0, 0 -; LA64-NEXT: vshuf4i.w $vr0, $vr0, 16 -; LA64-NEXT: vslli.d $vr0, $vr0, 32 -; LA64-NEXT: vsrai.d $vr0, $vr0, 32 +; LA64-NEXT: vsllwil.d.w $vr0, $vr0, 0 ; LA64-NEXT: vst $vr0, $a1, 0 ; LA64-NEXT: ret entry: @@ -72,10 +65,8 @@ define void @load_sext_4i8_to_4i32(ptr %ptr, ptr %dst) { ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: ld.w $a0, $a0, 0 ; CHECK-NEXT: vinsgr2vr.w $vr0, $a0, 0 -; CHECK-NEXT: vilvl.b $vr0, $vr0, $vr0 -; CHECK-NEXT: vilvl.h $vr0, $vr0, $vr0 -; CHECK-NEXT: vslli.w $vr0, $vr0, 24 -; CHECK-NEXT: vsrai.w $vr0, $vr0, 24 +; CHECK-NEXT: vsllwil.h.b $vr0, $vr0, 0 +; CHECK-NEXT: vsllwil.w.h $vr0, $vr0, 0 ; CHECK-NEXT: vst $vr0, $a1, 0 ; CHECK-NEXT: ret entry: @@ -90,16 +81,15 @@ define void @load_sext_4i8_to_4i64(ptr %ptr, ptr %dst) { ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: ld.w $a0, $a0, 0 ; CHECK-NEXT: vinsgr2vr.w $vr0, $a0, 0 -; CHECK-NEXT: vilvl.b $vr0, $vr0, $vr0 -; CHECK-NEXT: vilvl.h $vr0, $vr0, $vr0 -; CHECK-NEXT: vilvl.w $vr1, $vr0, $vr0 -; CHECK-NEXT: vslli.d $vr1, $vr1, 56 -; CHECK-NEXT: vsrai.d $vr1, $vr1, 56 -; CHECK-NEXT: vilvh.w $vr0, $vr0, $vr0 -; CHECK-NEXT: vslli.d $vr0, $vr0, 56 -; CHECK-NEXT: vsrai.d $vr0, $vr0, 56 -; CHECK-NEXT: vst $vr0, $a1, 16 -; CHECK-NEXT: vst $vr1, $a1, 0 +; CHECK-NEXT: vshuf4i.b $vr1, $vr0, 14 +; CHECK-NEXT: vsllwil.h.b $vr1, $vr1, 0 +; CHECK-NEXT: vsllwil.w.h $vr1, $vr1, 0 +; CHECK-NEXT: vsllwil.d.w $vr1, $vr1, 0 +; CHECK-NEXT: vsllwil.h.b $vr0, $vr0, 0 +; CHECK-NEXT: vsllwil.w.h $vr0, $vr0, 0 +; CHECK-NEXT: vsllwil.d.w $vr0, $vr0, 0 +; CHECK-NEXT: vst $vr0, $a1, 0 +; CHECK-NEXT: vst $vr1, $a1, 16 ; CHECK-NEXT: ret entry: %A = load <4 x i8>, ptr %ptr @@ -115,9 +105,7 @@ define void @load_sext_4i16_to_4i32(ptr %ptr, ptr %dst) { ; LA32-NEXT: ld.w $a0, $a0, 4 ; LA32-NEXT: vinsgr2vr.w $vr0, $a2, 0 ; LA32-NEXT: vinsgr2vr.w $vr0, $a0, 1 -; LA32-NEXT: vilvl.h $vr0, $vr0, $vr0 -; LA32-NEXT: vslli.w $vr0, $vr0, 16 -; LA32-NEXT: vsrai.w $vr0, $vr0, 16 +; LA32-NEXT: vsllwil.w.h $vr0, $vr0, 0 ; LA32-NEXT: vst $vr0, $a1, 0 ; LA32-NEXT: ret ; @@ -125,9 +113,7 @@ define void @load_sext_4i16_to_4i32(ptr %ptr, ptr %dst) { ; LA64: # %bb.0: # %entry ; LA64-NEXT: ld.d $a0, $a0, 0 ; LA64-NEXT: vinsgr2vr.d $vr0, $a0, 0 -; LA64-NEXT: vilvl.h $vr0, $vr0, $vr0 -; LA64-NEXT: vslli.w $vr0, $vr0, 16 -; LA64-NEXT: vsrai.w $vr0, $vr0, 16 +; LA64-NEXT: vsllwil.w.h $vr0, $vr0, 0 ; LA64-NEXT: vst $vr0, $a1, 0 ; LA64-NEXT: ret entry: @@ -141,17 +127,13 @@ define void @load_sext_4i16_to_4i64(ptr %ptr, ptr %dst) { ; LA32-LABEL: load_sext_4i16_to_4i64: ; LA32: # %bb.0: # %entry ; LA32-NEXT: ld.w $a2, $a0, 0 -; LA32-NEXT: vinsgr2vr.w $vr0, $a2, 0 ; LA32-NEXT: ld.w $a0, $a0, 4 -; LA32-NEXT: vilvl.h $vr0, $vr0, $vr0 -; LA32-NEXT: vilvl.w $vr0, $vr0, $vr0 -; LA32-NEXT: vslli.d $vr0, $vr0, 48 -; LA32-NEXT: vsrai.d $vr0, $vr0, 48 +; LA32-NEXT: vinsgr2vr.w $vr0, $a2, 0 +; LA32-NEXT: vsllwil.w.h $vr0, $vr0, 0 +; LA32-NEXT: vsllwil.d.w $vr0, $vr0, 0 ; LA32-NEXT: vinsgr2vr.w $vr1, $a0, 0 -; LA32-NEXT: vilvl.h $vr1, $vr1, $vr1 -; LA32-NEXT: vilvl.w $vr1, $vr1, $vr1 -; LA32-NEXT: vslli.d $vr1, $vr1, 48 -; LA32-NEXT: vsrai.d $vr1, $vr1, 48 +; LA32-NEXT: vsllwil.w.h $vr1, $vr1, 0 +; LA32-NEXT: vsllwil.d.w $vr1, $vr1, 0 ; LA32-NEXT: vst $vr1, $a1, 16 ; LA32-NEXT: vst $vr0, $a1, 0 ; LA32-NEXT: ret @@ -160,15 +142,13 @@ define void @load_sext_4i16_to_4i64(ptr %ptr, ptr %dst) { ; LA64: # %bb.0: # %entry ; LA64-NEXT: ld.d $a0, $a0, 0 ; LA64-NEXT: vinsgr2vr.d $vr0, $a0, 0 -; LA64-NEXT: vilvl.h $vr0, $vr0, $vr0 -; LA64-NEXT: vilvl.w $vr1, $vr0, $vr0 -; LA64-NEXT: vslli.d $vr1, $vr1, 48 -; LA64-NEXT: vsrai.d $vr1, $vr1, 48 -; LA64-NEXT: vilvh.w $vr0, $vr0, $vr0 -; LA64-NEXT: vslli.d $vr0, $vr0, 48 -; LA64-NEXT: vsrai.d $vr0, $vr0, 48 -; LA64-NEXT: vst $vr0, $a1, 16 -; LA64-NEXT: vst $vr1, $a1, 0 +; LA64-NEXT: vshuf4i.h $vr1, $vr0, 14 +; LA64-NEXT: vsllwil.w.h $vr1, $vr1, 0 +; LA64-NEXT: vsllwil.d.w $vr1, $vr1, 0 +; LA64-NEXT: vsllwil.w.h $vr0, $vr0, 0 +; LA64-NEXT: vsllwil.d.w $vr0, $vr0, 0 +; LA64-NEXT: vst $vr0, $a1, 0 +; LA64-NEXT: vst $vr1, $a1, 16 ; LA64-NEXT: ret entry: %A = load <4 x i16>, ptr %ptr @@ -181,14 +161,11 @@ define void @load_sext_4i32_to_4i64(ptr %ptr, ptr %dst) { ; CHECK-LABEL: load_sext_4i32_to_4i64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vld $vr0, $a0, 0 -; CHECK-NEXT: vshuf4i.w $vr1, $vr0, 16 -; CHECK-NEXT: vslli.d $vr1, $vr1, 32 -; CHECK-NEXT: vsrai.d $vr1, $vr1, 32 -; CHECK-NEXT: vshuf4i.w $vr0, $vr0, 50 -; CHECK-NEXT: vslli.d $vr0, $vr0, 32 -; CHECK-NEXT: vsrai.d $vr0, $vr0, 32 -; CHECK-NEXT: vst $vr0, $a1, 16 -; CHECK-NEXT: vst $vr1, $a1, 0 +; CHECK-NEXT: vshuf4i.w $vr1, $vr0, 14 +; CHECK-NEXT: vsllwil.d.w $vr1, $vr1, 0 +; CHECK-NEXT: vsllwil.d.w $vr0, $vr0, 0 +; CHECK-NEXT: vst $vr0, $a1, 0 +; CHECK-NEXT: vst $vr1, $a1, 16 ; CHECK-NEXT: ret entry: %A = load <4 x i32>, ptr %ptr @@ -204,9 +181,7 @@ define void @load_sext_8i8_to_8i16(ptr %ptr, ptr %dst) { ; LA32-NEXT: ld.w $a0, $a0, 4 ; LA32-NEXT: vinsgr2vr.w $vr0, $a2, 0 ; LA32-NEXT: vinsgr2vr.w $vr0, $a0, 1 -; LA32-NEXT: vilvl.b $vr0, $vr0, $vr0 -; LA32-NEXT: vslli.h $vr0, $vr0, 8 -; LA32-NEXT: vsrai.h $vr0, $vr0, 8 +; LA32-NEXT: vsllwil.h.b $vr0, $vr0, 0 ; LA32-NEXT: vst $vr0, $a1, 0 ; LA32-NEXT: ret ; @@ -214,9 +189,7 @@ define void @load_sext_8i8_to_8i16(ptr %ptr, ptr %dst) { ; LA64: # %bb.0: # %entry ; LA64-NEXT: ld.d $a0, $a0, 0 ; LA64-NEXT: vinsgr2vr.d $vr0, $a0, 0 -; LA64-NEXT: vilvl.b $vr0, $vr0, $vr0 -; LA64-NEXT: vslli.h $vr0, $vr0, 8 -; LA64-NEXT: vsrai.h $vr0, $vr0, 8 +; LA64-NEXT: vsllwil.h.b $vr0, $vr0, 0 ; LA64-NEXT: vst $vr0, $a1, 0 ; LA64-NEXT: ret entry: @@ -230,17 +203,13 @@ define void @load_sext_8i8_to_8i32(ptr %ptr, ptr %dst) { ; LA32-LABEL: load_sext_8i8_to_8i32: ; LA32: # %bb.0: # %entry ; LA32-NEXT: ld.w $a2, $a0, 0 -; LA32-NEXT: vinsgr2vr.w $vr0, $a2, 0 ; LA32-NEXT: ld.w $a0, $a0, 4 -; LA32-NEXT: vilvl.b $vr0, $vr0, $vr0 -; LA32-NEXT: vilvl.h $vr0, $vr0, $vr0 -; LA32-NEXT: vslli.w $vr0, $vr0, 24 -; LA32-NEXT: vsrai.w $vr0, $vr0, 24 +; LA32-NEXT: vinsgr2vr.w $vr0, $a2, 0 +; LA32-NEXT: vsllwil.h.b $vr0, $vr0, 0 +; LA32-NEXT: vsllwil.w.h $vr0, $vr0, 0 ; LA32-NEXT: vinsgr2vr.w $vr1, $a0, 0 -; LA32-NEXT: vilvl.b $vr1, $vr1, $vr1 -; LA32-NEXT: vilvl.h $vr1, $vr1, $vr1 -; LA32-NEXT: vslli.w $vr1, $vr1, 24 -; LA32-NEXT: vsrai.w $vr1, $vr1, 24 +; LA32-NEXT: vsllwil.h.b $vr1, $vr1, 0 +; LA32-NEXT: vsllwil.w.h $vr1, $vr1, 0 ; LA32-NEXT: vst $vr1, $a1, 16 ; LA32-NEXT: vst $vr0, $a1, 0 ; LA32-NEXT: ret @@ -249,15 +218,13 @@ define void @load_sext_8i8_to_8i32(ptr %ptr, ptr %dst) { ; LA64: # %bb.0: # %entry ; LA64-NEXT: ld.d $a0, $a0, 0 ; LA64-NEXT: vinsgr2vr.d $vr0, $a0, 0 -; LA64-NEXT: vilvl.b $vr0, $vr0, $vr0 -; LA64-NEXT: vilvl.h $vr1, $vr0, $vr0 -; LA64-NEXT: vslli.w $vr1, $vr1, 24 -; LA64-NEXT: vsrai.w $vr1, $vr1, 24 -; LA64-NEXT: vilvh.h $vr0, $vr0, $vr0 -; LA64-NEXT: vslli.w $vr0, $vr0, 24 -; LA64-NEXT: vsrai.w $vr0, $vr0, 24 -; LA64-NEXT: vst $vr0, $a1, 16 -; LA64-NEXT: vst $vr1, $a1, 0 +; LA64-NEXT: vsrli.d $vr1, $vr0, 32 +; LA64-NEXT: vsllwil.h.b $vr1, $vr1, 0 +; LA64-NEXT: vsllwil.w.h $vr1, $vr1, 0 +; LA64-NEXT: vsllwil.h.b $vr0, $vr0, 0 +; LA64-NEXT: vsllwil.w.h $vr0, $vr0, 0 +; LA64-NEXT: vst $vr0, $a1, 0 +; LA64-NEXT: vst $vr1, $a1, 16 ; LA64-NEXT: ret entry: %A = load <8 x i8>, ptr %ptr @@ -273,50 +240,50 @@ define void @load_sext_8i8_to_8i64(ptr %ptr, ptr %dst) { ; LA32-NEXT: ld.w $a0, $a0, 4 ; LA32-NEXT: vinsgr2vr.w $vr0, $a2, 0 ; LA32-NEXT: vinsgr2vr.w $vr0, $a0, 1 -; LA32-NEXT: vilvl.b $vr0, $vr0, $vr0 -; LA32-NEXT: vilvl.h $vr1, $vr0, $vr0 -; LA32-NEXT: vilvl.w $vr2, $vr1, $vr1 -; LA32-NEXT: vslli.d $vr2, $vr2, 56 -; LA32-NEXT: vsrai.d $vr2, $vr2, 56 -; LA32-NEXT: vilvh.w $vr1, $vr1, $vr1 -; LA32-NEXT: vslli.d $vr1, $vr1, 56 -; LA32-NEXT: vsrai.d $vr1, $vr1, 56 -; LA32-NEXT: vilvh.h $vr0, $vr0, $vr0 -; LA32-NEXT: vilvl.w $vr3, $vr0, $vr0 -; LA32-NEXT: vslli.d $vr3, $vr3, 56 -; LA32-NEXT: vsrai.d $vr3, $vr3, 56 -; LA32-NEXT: vilvh.w $vr0, $vr0, $vr0 -; LA32-NEXT: vslli.d $vr0, $vr0, 56 -; LA32-NEXT: vsrai.d $vr0, $vr0, 56 -; LA32-NEXT: vst $vr0, $a1, 48 -; LA32-NEXT: vst $vr3, $a1, 32 +; LA32-NEXT: vshuf4i.b $vr1, $vr0, 14 +; LA32-NEXT: vsllwil.h.b $vr1, $vr1, 0 +; LA32-NEXT: vsllwil.w.h $vr1, $vr1, 0 +; LA32-NEXT: vsllwil.d.w $vr1, $vr1, 0 +; LA32-NEXT: vsrli.d $vr2, $vr0, 32 +; LA32-NEXT: vsllwil.h.b $vr2, $vr2, 0 +; LA32-NEXT: vsllwil.w.h $vr2, $vr2, 0 +; LA32-NEXT: vsllwil.d.w $vr2, $vr2, 0 +; LA32-NEXT: vsrli.d $vr3, $vr0, 48 +; LA32-NEXT: vsllwil.h.b $vr3, $vr3, 0 +; LA32-NEXT: vsllwil.w.h $vr3, $vr3, 0 +; LA32-NEXT: vsllwil.d.w $vr3, $vr3, 0 +; LA32-NEXT: vsllwil.h.b $vr0, $vr0, 0 +; LA32-NEXT: vsllwil.w.h $vr0, $vr0, 0 +; LA32-NEXT: vsllwil.d.w $vr0, $vr0, 0 +; LA32-NEXT: vst $vr0, $a1, 0 +; LA32-NEXT: vst $vr3, $a1, 48 +; LA32-NEXT: vst $vr2, $a1, 32 ; LA32-NEXT: vst $vr1, $a1, 16 -; LA32-NEXT: vst $vr2, $a1, 0 ; LA32-NEXT: ret ; ; LA64-LABEL: load_sext_8i8_to_8i64: ; LA64: # %bb.0: # %entry ; LA64-NEXT: ld.d $a0, $a0, 0 ; LA64-NEXT: vinsgr2vr.d $vr0, $a0, 0 -; LA64-NEXT: vilvl.b $vr0, $vr0, $vr0 -; LA64-NEXT: vilvl.h $vr1, $vr0, $vr0 -; LA64-NEXT: vilvl.w $vr2, $vr1, $vr1 -; LA64-NEXT: vslli.d $vr2, $vr2, 56 -; LA64-NEXT: vsrai.d $vr2, $vr2, 56 -; LA64-NEXT: vilvh.w $vr1, $vr1, $vr1 -; LA64-NEXT: vslli.d $vr1, $vr1, 56 -; LA64-NEXT: vsrai.d $vr1, $vr1, 56 -; LA64-NEXT: vilvh.h $vr0, $vr0, $vr0 -; LA64-NEXT: vilvl.w $vr3, $vr0, $vr0 -; LA64-NEXT: vslli.d $vr3, $vr3, 56 -; LA64-NEXT: vsrai.d $vr3, $vr3, 56 -; LA64-NEXT: vilvh.w $vr0, $vr0, $vr0 -; LA64-NEXT: vslli.d $vr0, $vr0, 56 -; LA64-NEXT: vsrai.d $vr0, $vr0, 56 -; LA64-NEXT: vst $vr0, $a1, 48 -; LA64-NEXT: vst $vr3, $a1, 32 +; LA64-NEXT: vshuf4i.b $vr1, $vr0, 14 +; LA64-NEXT: vsllwil.h.b $vr1, $vr1, 0 +; LA64-NEXT: vsllwil.w.h $vr1, $vr1, 0 +; LA64-NEXT: vsllwil.d.w $vr1, $vr1, 0 +; LA64-NEXT: vsrli.d $vr2, $vr0, 32 +; LA64-NEXT: vsllwil.h.b $vr2, $vr2, 0 +; LA64-NEXT: vsllwil.w.h $vr2, $vr2, 0 +; LA64-NEXT: vsllwil.d.w $vr2, $vr2, 0 +; LA64-NEXT: vsrli.d $vr3, $vr0, 48 +; LA64-NEXT: vsllwil.h.b $vr3, $vr3, 0 +; LA64-NEXT: vsllwil.w.h $vr3, $vr3, 0 +; LA64-NEXT: vsllwil.d.w $vr3, $vr3, 0 +; LA64-NEXT: vsllwil.h.b $vr0, $vr0, 0 +; LA64-NEXT: vsllwil.w.h $vr0, $vr0, 0 +; LA64-NEXT: vsllwil.d.w $vr0, $vr0, 0 +; LA64-NEXT: vst $vr0, $a1, 0 +; LA64-NEXT: vst $vr3, $a1, 48 +; LA64-NEXT: vst $vr2, $a1, 32 ; LA64-NEXT: vst $vr1, $a1, 16 -; LA64-NEXT: vst $vr2, $a1, 0 ; LA64-NEXT: ret entry: %A = load <8 x i8>, ptr %ptr @@ -329,14 +296,11 @@ define void @load_sext_8i16_to_8i32(ptr %ptr, ptr %dst) { ; CHECK-LABEL: load_sext_8i16_to_8i32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vld $vr0, $a0, 0 -; CHECK-NEXT: vilvl.h $vr1, $vr0, $vr0 -; CHECK-NEXT: vslli.w $vr1, $vr1, 16 -; CHECK-NEXT: vsrai.w $vr1, $vr1, 16 -; CHECK-NEXT: vilvh.h $vr0, $vr0, $vr0 -; CHECK-NEXT: vslli.w $vr0, $vr0, 16 -; CHECK-NEXT: vsrai.w $vr0, $vr0, 16 -; CHECK-NEXT: vst $vr0, $a1, 16 -; CHECK-NEXT: vst $vr1, $a1, 0 +; CHECK-NEXT: vbsrl.v $vr1, $vr0, 8 +; CHECK-NEXT: vsllwil.w.h $vr1, $vr1, 0 +; CHECK-NEXT: vsllwil.w.h $vr0, $vr0, 0 +; CHECK-NEXT: vst $vr0, $a1, 0 +; CHECK-NEXT: vst $vr1, $a1, 16 ; CHECK-NEXT: ret entry: %A = load <8 x i16>, ptr %ptr @@ -349,24 +313,21 @@ define void @load_sext_8i16_to_8i64(ptr %ptr, ptr %dst) { ; CHECK-LABEL: load_sext_8i16_to_8i64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vld $vr0, $a0, 0 -; CHECK-NEXT: vilvl.h $vr1, $vr0, $vr0 -; CHECK-NEXT: vilvl.w $vr2, $vr1, $vr1 -; CHECK-NEXT: vslli.d $vr2, $vr2, 48 -; CHECK-NEXT: vsrai.d $vr2, $vr2, 48 -; CHECK-NEXT: vilvh.w $vr1, $vr1, $vr1 -; CHECK-NEXT: vslli.d $vr1, $vr1, 48 -; CHECK-NEXT: vsrai.d $vr1, $vr1, 48 -; CHECK-NEXT: vilvh.h $vr0, $vr0, $vr0 -; CHECK-NEXT: vilvl.w $vr3, $vr0, $vr0 -; CHECK-NEXT: vslli.d $vr3, $vr3, 48 -; CHECK-NEXT: vsrai.d $vr3, $vr3, 48 -; CHECK-NEXT: vilvh.w $vr0, $vr0, $vr0 -; CHECK-NEXT: vslli.d $vr0, $vr0, 48 -; CHECK-NEXT: vsrai.d $vr0, $vr0, 48 -; CHECK-NEXT: vst $vr0, $a1, 48 -; CHECK-NEXT: vst $vr3, $a1, 32 +; CHECK-NEXT: vshuf4i.h $vr1, $vr0, 14 +; CHECK-NEXT: vsllwil.w.h $vr1, $vr1, 0 +; CHECK-NEXT: vsllwil.d.w $vr1, $vr1, 0 +; CHECK-NEXT: vbsrl.v $vr2, $vr0, 8 +; CHECK-NEXT: vsllwil.w.h $vr2, $vr2, 0 +; CHECK-NEXT: vsllwil.d.w $vr2, $vr2, 0 +; CHECK-NEXT: vbsrl.v $vr3, $vr0, 12 +; CHECK-NEXT: vsllwil.w.h $vr3, $vr3, 0 +; CHECK-NEXT: vsllwil.d.w $vr3, $vr3, 0 +; CHECK-NEXT: vsllwil.w.h $vr0, $vr0, 0 +; CHECK-NEXT: vsllwil.d.w $vr0, $vr0, 0 +; CHECK-NEXT: vst $vr0, $a1, 0 +; CHECK-NEXT: vst $vr3, $a1, 48 +; CHECK-NEXT: vst $vr2, $a1, 32 ; CHECK-NEXT: vst $vr1, $a1, 16 -; CHECK-NEXT: vst $vr2, $a1, 0 ; CHECK-NEXT: ret entry: %A = load <8 x i16>, ptr %ptr @@ -380,22 +341,16 @@ define void @load_sext_8i32_to_8i64(ptr %ptr, ptr %dst) { ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vld $vr0, $a0, 0 ; CHECK-NEXT: vld $vr1, $a0, 16 -; CHECK-NEXT: vshuf4i.w $vr2, $vr0, 16 -; CHECK-NEXT: vslli.d $vr2, $vr2, 32 -; CHECK-NEXT: vsrai.d $vr2, $vr2, 32 -; CHECK-NEXT: vshuf4i.w $vr0, $vr0, 50 -; CHECK-NEXT: vslli.d $vr0, $vr0, 32 -; CHECK-NEXT: vsrai.d $vr0, $vr0, 32 -; CHECK-NEXT: vshuf4i.w $vr3, $vr1, 16 -; CHECK-NEXT: vslli.d $vr3, $vr3, 32 -; CHECK-NEXT: vsrai.d $vr3, $vr3, 32 -; CHECK-NEXT: vshuf4i.w $vr1, $vr1, 50 -; CHECK-NEXT: vslli.d $vr1, $vr1, 32 -; CHECK-NEXT: vsrai.d $vr1, $vr1, 32 -; CHECK-NEXT: vst $vr1, $a1, 48 -; CHECK-NEXT: vst $vr3, $a1, 32 -; CHECK-NEXT: vst $vr0, $a1, 16 -; CHECK-NEXT: vst $vr2, $a1, 0 +; CHECK-NEXT: vshuf4i.w $vr2, $vr0, 14 +; CHECK-NEXT: vsllwil.d.w $vr2, $vr2, 0 +; CHECK-NEXT: vshuf4i.w $vr3, $vr1, 14 +; CHECK-NEXT: vsllwil.d.w $vr3, $vr3, 0 +; CHECK-NEXT: vsllwil.d.w $vr0, $vr0, 0 +; CHECK-NEXT: vsllwil.d.w $vr1, $vr1, 0 +; CHECK-NEXT: vst $vr1, $a1, 32 +; CHECK-NEXT: vst $vr0, $a1, 0 +; CHECK-NEXT: vst $vr3, $a1, 48 +; CHECK-NEXT: vst $vr2, $a1, 16 ; CHECK-NEXT: ret entry: %A = load <8 x i32>, ptr %ptr @@ -408,14 +363,11 @@ define void @load_sext_16i8_to_16i16(ptr %ptr, ptr %dst) { ; CHECK-LABEL: load_sext_16i8_to_16i16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vld $vr0, $a0, 0 -; CHECK-NEXT: vilvl.b $vr1, $vr0, $vr0 -; CHECK-NEXT: vslli.h $vr1, $vr1, 8 -; CHECK-NEXT: vsrai.h $vr1, $vr1, 8 -; CHECK-NEXT: vilvh.b $vr0, $vr0, $vr0 -; CHECK-NEXT: vslli.h $vr0, $vr0, 8 -; CHECK-NEXT: vsrai.h $vr0, $vr0, 8 -; CHECK-NEXT: vst $vr0, $a1, 16 -; CHECK-NEXT: vst $vr1, $a1, 0 +; CHECK-NEXT: vbsrl.v $vr1, $vr0, 8 +; CHECK-NEXT: vsllwil.h.b $vr1, $vr1, 0 +; CHECK-NEXT: vsllwil.h.b $vr0, $vr0, 0 +; CHECK-NEXT: vst $vr0, $a1, 0 +; CHECK-NEXT: vst $vr1, $a1, 16 ; CHECK-NEXT: ret entry: %A = load <16 x i8>, ptr %ptr @@ -428,24 +380,21 @@ define void @load_sext_16i8_to_16i32(ptr %ptr, ptr %dst) { ; CHECK-LABEL: load_sext_16i8_to_16i32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vld $vr0, $a0, 0 -; CHECK-NEXT: vilvl.b $vr1, $vr0, $vr0 -; CHECK-NEXT: vilvl.h $vr2, $vr1, $vr1 -; CHECK-NEXT: vslli.w $vr2, $vr2, 24 -; CHECK-NEXT: vsrai.w $vr2, $vr2, 24 -; CHECK-NEXT: vilvh.h $vr1, $vr1, $vr1 -; CHECK-NEXT: vslli.w $vr1, $vr1, 24 -; CHECK-NEXT: vsrai.w $vr1, $vr1, 24 -; CHECK-NEXT: vilvh.b $vr0, $vr0, $vr0 -; CHECK-NEXT: vilvl.h $vr3, $vr0, $vr0 -; CHECK-NEXT: vslli.w $vr3, $vr3, 24 -; CHECK-NEXT: vsrai.w $vr3, $vr3, 24 -; CHECK-NEXT: vilvh.h $vr0, $vr0, $vr0 -; CHECK-NEXT: vslli.w $vr0, $vr0, 24 -; CHECK-NEXT: vsrai.w $vr0, $vr0, 24 -; CHECK-NEXT: vst $vr0, $a1, 48 -; CHECK-NEXT: vst $vr3, $a1, 32 +; CHECK-NEXT: vsrli.d $vr1, $vr0, 32 +; CHECK-NEXT: vsllwil.h.b $vr1, $vr1, 0 +; CHECK-NEXT: vsllwil.w.h $vr1, $vr1, 0 +; CHECK-NEXT: vbsrl.v $vr2, $vr0, 8 +; CHECK-NEXT: vsllwil.h.b $vr2, $vr2, 0 +; CHECK-NEXT: vsllwil.w.h $vr2, $vr2, 0 +; CHECK-NEXT: vbsrl.v $vr3, $vr0, 12 +; CHECK-NEXT: vsllwil.h.b $vr3, $vr3, 0 +; CHECK-NEXT: vsllwil.w.h $vr3, $vr3, 0 +; CHECK-NEXT: vsllwil.h.b $vr0, $vr0, 0 +; CHECK-NEXT: vsllwil.w.h $vr0, $vr0, 0 +; CHECK-NEXT: vst $vr0, $a1, 0 +; CHECK-NEXT: vst $vr3, $a1, 48 +; CHECK-NEXT: vst $vr2, $a1, 32 ; CHECK-NEXT: vst $vr1, $a1, 16 -; CHECK-NEXT: vst $vr2, $a1, 0 ; CHECK-NEXT: ret entry: %A = load <16 x i8>, ptr %ptr @@ -458,44 +407,45 @@ define void @load_sext_16i8_to_16i64(ptr %ptr, ptr %dst) { ; CHECK-LABEL: load_sext_16i8_to_16i64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vld $vr0, $a0, 0 -; CHECK-NEXT: vilvl.b $vr1, $vr0, $vr0 -; CHECK-NEXT: vilvl.h $vr2, $vr1, $vr1 -; CHECK-NEXT: vilvl.w $vr3, $vr2, $vr2 -; CHECK-NEXT: vslli.d $vr3, $vr3, 56 -; CHECK-NEXT: vsrai.d $vr3, $vr3, 56 -; CHECK-NEXT: vilvh.w $vr2, $vr2, $vr2 -; CHECK-NEXT: vslli.d $vr2, $vr2, 56 -; CHECK-NEXT: vsrai.d $vr2, $vr2, 56 -; CHECK-NEXT: vilvh.h $vr1, $vr1, $vr1 -; CHECK-NEXT: vilvl.w $vr4, $vr1, $vr1 -; CHECK-NEXT: vslli.d $vr4, $vr4, 56 -; CHECK-NEXT: vsrai.d $vr4, $vr4, 56 -; CHECK-NEXT: vilvh.w $vr1, $vr1, $vr1 -; CHECK-NEXT: vslli.d $vr1, $vr1, 56 -; CHECK-NEXT: vsrai.d $vr1, $vr1, 56 -; CHECK-NEXT: vilvh.b $vr0, $vr0, $vr0 -; CHECK-NEXT: vilvl.h $vr5, $vr0, $vr0 -; CHECK-NEXT: vilvl.w $vr6, $vr5, $vr5 -; CHECK-NEXT: vslli.d $vr6, $vr6, 56 -; CHECK-NEXT: vsrai.d $vr6, $vr6, 56 -; CHECK-NEXT: vilvh.w $vr5, $vr5, $vr5 -; CHECK-NEXT: vslli.d $vr5, $vr5, 56 -; CHECK-NEXT: vsrai.d $vr5, $vr5, 56 -; CHECK-NEXT: vilvh.h $vr0, $vr0, $vr0 -; CHECK-NEXT: vilvl.w $vr7, $vr0, $vr0 -; CHECK-NEXT: vslli.d $vr7, $vr7, 56 -; CHECK-NEXT: vsrai.d $vr7, $vr7, 56 -; CHECK-NEXT: vilvh.w $vr0, $vr0, $vr0 -; CHECK-NEXT: vslli.d $vr0, $vr0, 56 -; CHECK-NEXT: vsrai.d $vr0, $vr0, 56 -; CHECK-NEXT: vst $vr0, $a1, 112 -; CHECK-NEXT: vst $vr7, $a1, 96 +; CHECK-NEXT: vshuf4i.b $vr1, $vr0, 14 +; CHECK-NEXT: vsllwil.h.b $vr1, $vr1, 0 +; CHECK-NEXT: vsllwil.w.h $vr1, $vr1, 0 +; CHECK-NEXT: vsllwil.d.w $vr1, $vr1, 0 +; CHECK-NEXT: vsrli.d $vr2, $vr0, 32 +; CHECK-NEXT: vsllwil.h.b $vr2, $vr2, 0 +; CHECK-NEXT: vsllwil.w.h $vr2, $vr2, 0 +; CHECK-NEXT: vsllwil.d.w $vr2, $vr2, 0 +; CHECK-NEXT: vsrli.d $vr3, $vr0, 48 +; CHECK-NEXT: vsllwil.h.b $vr3, $vr3, 0 +; CHECK-NEXT: vsllwil.w.h $vr3, $vr3, 0 +; CHECK-NEXT: vsllwil.d.w $vr3, $vr3, 0 +; CHECK-NEXT: vbsrl.v $vr4, $vr0, 8 +; CHECK-NEXT: vsllwil.h.b $vr4, $vr4, 0 +; CHECK-NEXT: vsllwil.w.h $vr4, $vr4, 0 +; CHECK-NEXT: vsllwil.d.w $vr4, $vr4, 0 +; CHECK-NEXT: vbsrl.v $vr5, $vr0, 10 +; CHECK-NEXT: vsllwil.h.b $vr5, $vr5, 0 +; CHECK-NEXT: vsllwil.w.h $vr5, $vr5, 0 +; CHECK-NEXT: vsllwil.d.w $vr5, $vr5, 0 +; CHECK-NEXT: vbsrl.v $vr6, $vr0, 12 +; CHECK-NEXT: vsllwil.h.b $vr6, $vr6, 0 +; CHECK-NEXT: vsllwil.w.h $vr6, $vr6, 0 +; CHECK-NEXT: vsllwil.d.w $vr6, $vr6, 0 +; CHECK-NEXT: vbsrl.v $vr7, $vr0, 14 +; CHECK-NEXT: vsllwil.h.b $vr7, $vr7, 0 +; CHECK-NEXT: vsllwil.w.h $vr7, $vr7, 0 +; CHECK-NEXT: vsllwil.d.w $vr7, $vr7, 0 +; CHECK-NEXT: vsllwil.h.b $vr0, $vr0, 0 +; CHECK-NEXT: vsllwil.w.h $vr0, $vr0, 0 +; CHECK-NEXT: vsllwil.d.w $vr0, $vr0, 0 +; CHECK-NEXT: vst $vr0, $a1, 0 +; CHECK-NEXT: vst $vr7, $a1, 112 +; CHECK-NEXT: vst $vr6, $a1, 96 ; CHECK-NEXT: vst $vr5, $a1, 80 -; CHECK-NEXT: vst $vr6, $a1, 64 -; CHECK-NEXT: vst $vr1, $a1, 48 -; CHECK-NEXT: vst $vr4, $a1, 32 -; CHECK-NEXT: vst $vr2, $a1, 16 -; CHECK-NEXT: vst $vr3, $a1, 0 +; CHECK-NEXT: vst $vr4, $a1, 64 +; CHECK-NEXT: vst $vr3, $a1, 48 +; CHECK-NEXT: vst $vr2, $a1, 32 +; CHECK-NEXT: vst $vr1, $a1, 16 ; CHECK-NEXT: ret entry: %A = load <16 x i8>, ptr %ptr @@ -509,22 +459,16 @@ define void @load_sext_16i16_to_16i32(ptr %ptr, ptr %dst) { ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vld $vr0, $a0, 0 ; CHECK-NEXT: vld $vr1, $a0, 16 -; CHECK-NEXT: vilvl.h $vr2, $vr0, $vr0 -; CHECK-NEXT: vslli.w $vr2, $vr2, 16 -; CHECK-NEXT: vsrai.w $vr2, $vr2, 16 -; CHECK-NEXT: vilvh.h $vr0, $vr0, $vr0 -; CHECK-NEXT: vslli.w $vr0, $vr0, 16 -; CHECK-NEXT: vsrai.w $vr0, $vr0, 16 -; CHECK-NEXT: vilvl.h $vr3, $vr1, $vr1 -; CHECK-NEXT: vslli.w $vr3, $vr3, 16 -; CHECK-NEXT: vsrai.w $vr3, $vr3, 16 -; CHECK-NEXT: vilvh.h $vr1, $vr1, $vr1 -; CHECK-NEXT: vslli.w $vr1, $vr1, 16 -; CHECK-NEXT: vsrai.w $vr1, $vr1, 16 -; CHECK-NEXT: vst $vr1, $a1, 48 -; CHECK-NEXT: vst $vr3, $a1, 32 -; CHECK-NEXT: vst $vr0, $a1, 16 -; CHECK-NEXT: vst $vr2, $a1, 0 +; CHECK-NEXT: vbsrl.v $vr2, $vr0, 8 +; CHECK-NEXT: vsllwil.w.h $vr2, $vr2, 0 +; CHECK-NEXT: vbsrl.v $vr3, $vr1, 8 +; CHECK-NEXT: vsllwil.w.h $vr3, $vr3, 0 +; CHECK-NEXT: vsllwil.w.h $vr0, $vr0, 0 +; CHECK-NEXT: vsllwil.w.h $vr1, $vr1, 0 +; CHECK-NEXT: vst $vr1, $a1, 32 +; CHECK-NEXT: vst $vr0, $a1, 0 +; CHECK-NEXT: vst $vr3, $a1, 48 +; CHECK-NEXT: vst $vr2, $a1, 16 ; CHECK-NEXT: ret entry: %A = load <16 x i16>, ptr %ptr @@ -538,42 +482,36 @@ define void @load_sext_16i16_to_16i64(ptr %ptr, ptr %dst) { ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vld $vr0, $a0, 0 ; CHECK-NEXT: vld $vr1, $a0, 16 -; CHECK-NEXT: vilvl.h $vr2, $vr0, $vr0 -; CHECK-NEXT: vilvl.w $vr3, $vr2, $vr2 -; CHECK-NEXT: vslli.d $vr3, $vr3, 48 -; CHECK-NEXT: vsrai.d $vr3, $vr3, 48 -; CHECK-NEXT: vilvh.w $vr2, $vr2, $vr2 -; CHECK-NEXT: vslli.d $vr2, $vr2, 48 -; CHECK-NEXT: vsrai.d $vr2, $vr2, 48 -; CHECK-NEXT: vilvh.h $vr0, $vr0, $vr0 -; CHECK-NEXT: vilvl.w $vr4, $vr0, $vr0 -; CHECK-NEXT: vslli.d $vr4, $vr4, 48 -; CHECK-NEXT: vsrai.d $vr4, $vr4, 48 -; CHECK-NEXT: vilvh.w $vr0, $vr0, $vr0 -; CHECK-NEXT: vslli.d $vr0, $vr0, 48 -; CHECK-NEXT: vsrai.d $vr0, $vr0, 48 -; CHECK-NEXT: vilvl.h $vr5, $vr1, $vr1 -; CHECK-NEXT: vilvl.w $vr6, $vr5, $vr5 -; CHECK-NEXT: vslli.d $vr6, $vr6, 48 -; CHECK-NEXT: vsrai.d $vr6, $vr6, 48 -; CHECK-NEXT: vilvh.w $vr5, $vr5, $vr5 -; CHECK-NEXT: vslli.d $vr5, $vr5, 48 -; CHECK-NEXT: vsrai.d $vr5, $vr5, 48 -; CHECK-NEXT: vilvh.h $vr1, $vr1, $vr1 -; CHECK-NEXT: vilvl.w $vr7, $vr1, $vr1 -; CHECK-NEXT: vslli.d $vr7, $vr7, 48 -; CHECK-NEXT: vsrai.d $vr7, $vr7, 48 -; CHECK-NEXT: vilvh.w $vr1, $vr1, $vr1 -; CHECK-NEXT: vslli.d $vr1, $vr1, 48 -; CHECK-NEXT: vsrai.d $vr1, $vr1, 48 -; CHECK-NEXT: vst $vr1, $a1, 112 -; CHECK-NEXT: vst $vr7, $a1, 96 +; CHECK-NEXT: vshuf4i.h $vr2, $vr0, 14 +; CHECK-NEXT: vsllwil.w.h $vr2, $vr2, 0 +; CHECK-NEXT: vsllwil.d.w $vr2, $vr2, 0 +; CHECK-NEXT: vbsrl.v $vr3, $vr0, 8 +; CHECK-NEXT: vsllwil.w.h $vr3, $vr3, 0 +; CHECK-NEXT: vsllwil.d.w $vr3, $vr3, 0 +; CHECK-NEXT: vbsrl.v $vr4, $vr0, 12 +; CHECK-NEXT: vsllwil.w.h $vr4, $vr4, 0 +; CHECK-NEXT: vsllwil.d.w $vr4, $vr4, 0 +; CHECK-NEXT: vshuf4i.h $vr5, $vr1, 14 +; CHECK-NEXT: vsllwil.w.h $vr5, $vr5, 0 +; CHECK-NEXT: vsllwil.d.w $vr5, $vr5, 0 +; CHECK-NEXT: vbsrl.v $vr6, $vr1, 8 +; CHECK-NEXT: vsllwil.w.h $vr6, $vr6, 0 +; CHECK-NEXT: vsllwil.d.w $vr6, $vr6, 0 +; CHECK-NEXT: vbsrl.v $vr7, $vr1, 12 +; CHECK-NEXT: vsllwil.w.h $vr7, $vr7, 0 +; CHECK-NEXT: vsllwil.d.w $vr7, $vr7, 0 +; CHECK-NEXT: vsllwil.w.h $vr0, $vr0, 0 +; CHECK-NEXT: vsllwil.d.w $vr0, $vr0, 0 +; CHECK-NEXT: vsllwil.w.h $vr1, $vr1, 0 +; CHECK-NEXT: vsllwil.d.w $vr1, $vr1, 0 +; CHECK-NEXT: vst $vr1, $a1, 64 +; CHECK-NEXT: vst $vr0, $a1, 0 +; CHECK-NEXT: vst $vr7, $a1, 112 +; CHECK-NEXT: vst $vr6, $a1, 96 ; CHECK-NEXT: vst $vr5, $a1, 80 -; CHECK-NEXT: vst $vr6, $a1, 64 -; CHECK-NEXT: vst $vr0, $a1, 48 -; CHECK-NEXT: vst $vr4, $a1, 32 +; CHECK-NEXT: vst $vr4, $a1, 48 +; CHECK-NEXT: vst $vr3, $a1, 32 ; CHECK-NEXT: vst $vr2, $a1, 16 -; CHECK-NEXT: vst $vr3, $a1, 0 ; CHECK-NEXT: ret entry: %A = load <16 x i16>, ptr %ptr diff --git a/llvm/test/CodeGen/LoongArch/lsx/vec-shuffle-sign-ext.ll b/llvm/test/CodeGen/LoongArch/lsx/vec-shuffle-sign-ext.ll index 5e0ff9a..ee1374a 100644 --- a/llvm/test/CodeGen/LoongArch/lsx/vec-shuffle-sign-ext.ll +++ b/llvm/test/CodeGen/LoongArch/lsx/vec-shuffle-sign-ext.ll @@ -7,10 +7,9 @@ define void @shuffle_sign_ext_2i8_to_2i64(ptr %ptr, ptr %dst) nounwind { ; CHECK: # %bb.0: ; CHECK-NEXT: ld.h $a0, $a0, 0 ; CHECK-NEXT: vinsgr2vr.h $vr0, $a0, 0 -; CHECK-NEXT: vrepli.b $vr1, 0 -; CHECK-NEXT: vilvl.b $vr0, $vr1, $vr0 -; CHECK-NEXT: vilvl.h $vr0, $vr1, $vr0 -; CHECK-NEXT: vilvl.w $vr0, $vr1, $vr0 +; CHECK-NEXT: vsllwil.hu.bu $vr0, $vr0, 0 +; CHECK-NEXT: vsllwil.wu.hu $vr0, $vr0, 0 +; CHECK-NEXT: vsllwil.du.wu $vr0, $vr0, 0 ; CHECK-NEXT: vst $vr0, $a1, 0 ; CHECK-NEXT: ret %x = load <2 x i8>, ptr %ptr @@ -25,9 +24,8 @@ define void @shuffle_sign_ext_2i16_to_2i64(ptr %ptr, ptr %dst) nounwind { ; CHECK: # %bb.0: ; CHECK-NEXT: ld.w $a0, $a0, 0 ; CHECK-NEXT: vinsgr2vr.w $vr0, $a0, 0 -; CHECK-NEXT: vrepli.b $vr1, 0 -; CHECK-NEXT: vilvl.h $vr0, $vr1, $vr0 -; CHECK-NEXT: vilvl.w $vr0, $vr1, $vr0 +; CHECK-NEXT: vsllwil.wu.hu $vr0, $vr0, 0 +; CHECK-NEXT: vsllwil.du.wu $vr0, $vr0, 0 ; CHECK-NEXT: vst $vr0, $a1, 0 ; CHECK-NEXT: ret %x = load <2 x i16>, ptr %ptr @@ -42,9 +40,9 @@ define void @shuffle_sign_ext_2i32_to_2i64(ptr %ptr, ptr %dst) nounwind { ; LA32: # %bb.0: ; LA32-NEXT: ld.w $a2, $a0, 0 ; LA32-NEXT: ld.w $a0, $a0, 4 -; LA32-NEXT: vrepli.b $vr0, 0 ; LA32-NEXT: vinsgr2vr.w $vr0, $a2, 0 -; LA32-NEXT: vinsgr2vr.w $vr0, $a0, 2 +; LA32-NEXT: vinsgr2vr.w $vr0, $a0, 1 +; LA32-NEXT: vsllwil.du.wu $vr0, $vr0, 0 ; LA32-NEXT: vst $vr0, $a1, 0 ; LA32-NEXT: ret ; @@ -52,8 +50,7 @@ define void @shuffle_sign_ext_2i32_to_2i64(ptr %ptr, ptr %dst) nounwind { ; LA64: # %bb.0: ; LA64-NEXT: ld.d $a0, $a0, 0 ; LA64-NEXT: vinsgr2vr.d $vr0, $a0, 0 -; LA64-NEXT: vrepli.b $vr1, 0 -; LA64-NEXT: vilvl.w $vr0, $vr1, $vr0 +; LA64-NEXT: vsllwil.du.wu $vr0, $vr0, 0 ; LA64-NEXT: vst $vr0, $a1, 0 ; LA64-NEXT: ret %x = load <2 x i32>, ptr %ptr @@ -68,9 +65,8 @@ define void @shuffle_sign_ext_4i8_to_4i32(ptr %ptr, ptr %dst) nounwind { ; CHECK: # %bb.0: ; CHECK-NEXT: ld.w $a0, $a0, 0 ; CHECK-NEXT: vinsgr2vr.w $vr0, $a0, 0 -; CHECK-NEXT: vrepli.b $vr1, 0 -; CHECK-NEXT: vilvl.b $vr0, $vr1, $vr0 -; CHECK-NEXT: vilvl.h $vr0, $vr1, $vr0 +; CHECK-NEXT: vsllwil.hu.bu $vr0, $vr0, 0 +; CHECK-NEXT: vsllwil.wu.hu $vr0, $vr0, 0 ; CHECK-NEXT: vst $vr0, $a1, 0 ; CHECK-NEXT: ret %x = load <4 x i8>, ptr %ptr @@ -87,8 +83,7 @@ define void @shuffle_sign_ext_4i16_to_4i32(ptr %ptr, ptr %dst) nounwind { ; LA32-NEXT: ld.w $a0, $a0, 4 ; LA32-NEXT: vinsgr2vr.w $vr0, $a2, 0 ; LA32-NEXT: vinsgr2vr.w $vr0, $a0, 1 -; LA32-NEXT: vrepli.b $vr1, 0 -; LA32-NEXT: vilvl.h $vr0, $vr1, $vr0 +; LA32-NEXT: vsllwil.wu.hu $vr0, $vr0, 0 ; LA32-NEXT: vst $vr0, $a1, 0 ; LA32-NEXT: ret ; @@ -96,8 +91,7 @@ define void @shuffle_sign_ext_4i16_to_4i32(ptr %ptr, ptr %dst) nounwind { ; LA64: # %bb.0: ; LA64-NEXT: ld.d $a0, $a0, 0 ; LA64-NEXT: vinsgr2vr.d $vr0, $a0, 0 -; LA64-NEXT: vrepli.b $vr1, 0 -; LA64-NEXT: vilvl.h $vr0, $vr1, $vr0 +; LA64-NEXT: vsllwil.wu.hu $vr0, $vr0, 0 ; LA64-NEXT: vst $vr0, $a1, 0 ; LA64-NEXT: ret %x = load <4 x i16>, ptr %ptr @@ -114,8 +108,7 @@ define void @shuffle_sign_ext_8i8_to_8i16(ptr %ptr, ptr %dst) nounwind { ; LA32-NEXT: ld.w $a0, $a0, 4 ; LA32-NEXT: vinsgr2vr.w $vr0, $a2, 0 ; LA32-NEXT: vinsgr2vr.w $vr0, $a0, 1 -; LA32-NEXT: vrepli.b $vr1, 0 -; LA32-NEXT: vilvl.b $vr0, $vr1, $vr0 +; LA32-NEXT: vsllwil.hu.bu $vr0, $vr0, 0 ; LA32-NEXT: vst $vr0, $a1, 0 ; LA32-NEXT: ret ; @@ -123,8 +116,7 @@ define void @shuffle_sign_ext_8i8_to_8i16(ptr %ptr, ptr %dst) nounwind { ; LA64: # %bb.0: ; LA64-NEXT: ld.d $a0, $a0, 0 ; LA64-NEXT: vinsgr2vr.d $vr0, $a0, 0 -; LA64-NEXT: vrepli.b $vr1, 0 -; LA64-NEXT: vilvl.b $vr0, $vr1, $vr0 +; LA64-NEXT: vsllwil.hu.bu $vr0, $vr0, 0 ; LA64-NEXT: vst $vr0, $a1, 0 ; LA64-NEXT: ret %x = load <8 x i8>, ptr %ptr diff --git a/llvm/test/CodeGen/LoongArch/lsx/vec-zext.ll b/llvm/test/CodeGen/LoongArch/lsx/vec-zext.ll index 2ace0bf..7156e61 100644 --- a/llvm/test/CodeGen/LoongArch/lsx/vec-zext.ll +++ b/llvm/test/CodeGen/LoongArch/lsx/vec-zext.ll @@ -7,10 +7,9 @@ define void @load_zext_2i8_to_2i64(ptr %ptr, ptr %dst) { ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: ld.h $a0, $a0, 0 ; CHECK-NEXT: vinsgr2vr.h $vr0, $a0, 0 -; CHECK-NEXT: vrepli.b $vr1, 0 -; CHECK-NEXT: vilvl.b $vr0, $vr1, $vr0 -; CHECK-NEXT: vilvl.h $vr0, $vr1, $vr0 -; CHECK-NEXT: vilvl.w $vr0, $vr1, $vr0 +; CHECK-NEXT: vsllwil.hu.bu $vr0, $vr0, 0 +; CHECK-NEXT: vsllwil.wu.hu $vr0, $vr0, 0 +; CHECK-NEXT: vsllwil.du.wu $vr0, $vr0, 0 ; CHECK-NEXT: vst $vr0, $a1, 0 ; CHECK-NEXT: ret entry: @@ -25,9 +24,8 @@ define void @load_zext_2i16_to_2i64(ptr %ptr, ptr %dst) { ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: ld.w $a0, $a0, 0 ; CHECK-NEXT: vinsgr2vr.w $vr0, $a0, 0 -; CHECK-NEXT: vrepli.b $vr1, 0 -; CHECK-NEXT: vilvl.h $vr0, $vr1, $vr0 -; CHECK-NEXT: vilvl.w $vr0, $vr1, $vr0 +; CHECK-NEXT: vsllwil.wu.hu $vr0, $vr0, 0 +; CHECK-NEXT: vsllwil.du.wu $vr0, $vr0, 0 ; CHECK-NEXT: vst $vr0, $a1, 0 ; CHECK-NEXT: ret entry: @@ -42,9 +40,9 @@ define void @load_zext_2i32_to_2i64(ptr %ptr, ptr %dst) { ; LA32: # %bb.0: # %entry ; LA32-NEXT: ld.w $a2, $a0, 0 ; LA32-NEXT: ld.w $a0, $a0, 4 -; LA32-NEXT: vrepli.b $vr0, 0 ; LA32-NEXT: vinsgr2vr.w $vr0, $a2, 0 -; LA32-NEXT: vinsgr2vr.w $vr0, $a0, 2 +; LA32-NEXT: vinsgr2vr.w $vr0, $a0, 1 +; LA32-NEXT: vsllwil.du.wu $vr0, $vr0, 0 ; LA32-NEXT: vst $vr0, $a1, 0 ; LA32-NEXT: ret ; @@ -52,8 +50,7 @@ define void @load_zext_2i32_to_2i64(ptr %ptr, ptr %dst) { ; LA64: # %bb.0: # %entry ; LA64-NEXT: ld.d $a0, $a0, 0 ; LA64-NEXT: vinsgr2vr.d $vr0, $a0, 0 -; LA64-NEXT: vrepli.b $vr1, 0 -; LA64-NEXT: vilvl.w $vr0, $vr1, $vr0 +; LA64-NEXT: vsllwil.du.wu $vr0, $vr0, 0 ; LA64-NEXT: vst $vr0, $a1, 0 ; LA64-NEXT: ret entry: @@ -68,9 +65,8 @@ define void @load_zext_4i8_to_4i32(ptr %ptr, ptr %dst) { ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: ld.w $a0, $a0, 0 ; CHECK-NEXT: vinsgr2vr.w $vr0, $a0, 0 -; CHECK-NEXT: vrepli.b $vr1, 0 -; CHECK-NEXT: vilvl.b $vr0, $vr1, $vr0 -; CHECK-NEXT: vilvl.h $vr0, $vr1, $vr0 +; CHECK-NEXT: vsllwil.hu.bu $vr0, $vr0, 0 +; CHECK-NEXT: vsllwil.wu.hu $vr0, $vr0, 0 ; CHECK-NEXT: vst $vr0, $a1, 0 ; CHECK-NEXT: ret entry: @@ -85,13 +81,15 @@ define void @load_zext_4i8_to_4i64(ptr %ptr, ptr %dst) { ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: ld.w $a0, $a0, 0 ; CHECK-NEXT: vinsgr2vr.w $vr0, $a0, 0 -; CHECK-NEXT: vrepli.b $vr1, 0 -; CHECK-NEXT: vilvl.b $vr0, $vr1, $vr0 -; CHECK-NEXT: vilvl.h $vr0, $vr1, $vr0 -; CHECK-NEXT: vilvl.w $vr2, $vr1, $vr0 -; CHECK-NEXT: vilvh.w $vr0, $vr1, $vr0 -; CHECK-NEXT: vst $vr0, $a1, 16 -; CHECK-NEXT: vst $vr2, $a1, 0 +; CHECK-NEXT: vshuf4i.b $vr1, $vr0, 14 +; CHECK-NEXT: vsllwil.hu.bu $vr1, $vr1, 0 +; CHECK-NEXT: vsllwil.wu.hu $vr1, $vr1, 0 +; CHECK-NEXT: vsllwil.du.wu $vr1, $vr1, 0 +; CHECK-NEXT: vsllwil.hu.bu $vr0, $vr0, 0 +; CHECK-NEXT: vsllwil.wu.hu $vr0, $vr0, 0 +; CHECK-NEXT: vsllwil.du.wu $vr0, $vr0, 0 +; CHECK-NEXT: vst $vr0, $a1, 0 +; CHECK-NEXT: vst $vr1, $a1, 16 ; CHECK-NEXT: ret entry: %A = load <4 x i8>, ptr %ptr @@ -107,8 +105,7 @@ define void @load_zext_4i16_to_4i32(ptr %ptr, ptr %dst) { ; LA32-NEXT: ld.w $a0, $a0, 4 ; LA32-NEXT: vinsgr2vr.w $vr0, $a2, 0 ; LA32-NEXT: vinsgr2vr.w $vr0, $a0, 1 -; LA32-NEXT: vrepli.b $vr1, 0 -; LA32-NEXT: vilvl.h $vr0, $vr1, $vr0 +; LA32-NEXT: vsllwil.wu.hu $vr0, $vr0, 0 ; LA32-NEXT: vst $vr0, $a1, 0 ; LA32-NEXT: ret ; @@ -116,8 +113,7 @@ define void @load_zext_4i16_to_4i32(ptr %ptr, ptr %dst) { ; LA64: # %bb.0: # %entry ; LA64-NEXT: ld.d $a0, $a0, 0 ; LA64-NEXT: vinsgr2vr.d $vr0, $a0, 0 -; LA64-NEXT: vrepli.b $vr1, 0 -; LA64-NEXT: vilvl.h $vr0, $vr1, $vr0 +; LA64-NEXT: vsllwil.wu.hu $vr0, $vr0, 0 ; LA64-NEXT: vst $vr0, $a1, 0 ; LA64-NEXT: ret entry: @@ -131,14 +127,13 @@ define void @load_zext_4i16_to_4i64(ptr %ptr, ptr %dst) { ; LA32-LABEL: load_zext_4i16_to_4i64: ; LA32: # %bb.0: # %entry ; LA32-NEXT: ld.w $a2, $a0, 0 -; LA32-NEXT: vinsgr2vr.w $vr0, $a2, 0 ; LA32-NEXT: ld.w $a0, $a0, 4 -; LA32-NEXT: vrepli.b $vr1, 0 -; LA32-NEXT: vilvl.h $vr0, $vr1, $vr0 -; LA32-NEXT: vilvl.w $vr0, $vr1, $vr0 -; LA32-NEXT: vinsgr2vr.w $vr2, $a0, 0 -; LA32-NEXT: vilvl.h $vr2, $vr1, $vr2 -; LA32-NEXT: vilvl.w $vr1, $vr1, $vr2 +; LA32-NEXT: vinsgr2vr.w $vr0, $a2, 0 +; LA32-NEXT: vsllwil.wu.hu $vr0, $vr0, 0 +; LA32-NEXT: vsllwil.du.wu $vr0, $vr0, 0 +; LA32-NEXT: vinsgr2vr.w $vr1, $a0, 0 +; LA32-NEXT: vsllwil.wu.hu $vr1, $vr1, 0 +; LA32-NEXT: vsllwil.du.wu $vr1, $vr1, 0 ; LA32-NEXT: vst $vr1, $a1, 16 ; LA32-NEXT: vst $vr0, $a1, 0 ; LA32-NEXT: ret @@ -147,12 +142,13 @@ define void @load_zext_4i16_to_4i64(ptr %ptr, ptr %dst) { ; LA64: # %bb.0: # %entry ; LA64-NEXT: ld.d $a0, $a0, 0 ; LA64-NEXT: vinsgr2vr.d $vr0, $a0, 0 -; LA64-NEXT: vrepli.b $vr1, 0 -; LA64-NEXT: vilvl.h $vr0, $vr1, $vr0 -; LA64-NEXT: vilvl.w $vr2, $vr1, $vr0 -; LA64-NEXT: vilvh.w $vr0, $vr1, $vr0 -; LA64-NEXT: vst $vr0, $a1, 16 -; LA64-NEXT: vst $vr2, $a1, 0 +; LA64-NEXT: vshuf4i.h $vr1, $vr0, 14 +; LA64-NEXT: vsllwil.wu.hu $vr1, $vr1, 0 +; LA64-NEXT: vsllwil.du.wu $vr1, $vr1, 0 +; LA64-NEXT: vsllwil.wu.hu $vr0, $vr0, 0 +; LA64-NEXT: vsllwil.du.wu $vr0, $vr0, 0 +; LA64-NEXT: vst $vr0, $a1, 0 +; LA64-NEXT: vst $vr1, $a1, 16 ; LA64-NEXT: ret entry: %A = load <4 x i16>, ptr %ptr @@ -165,11 +161,11 @@ define void @load_zext_4i32_to_4i64(ptr %ptr, ptr %dst) { ; CHECK-LABEL: load_zext_4i32_to_4i64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vld $vr0, $a0, 0 -; CHECK-NEXT: vrepli.b $vr1, 0 -; CHECK-NEXT: vilvl.w $vr2, $vr1, $vr0 -; CHECK-NEXT: vilvh.w $vr0, $vr1, $vr0 -; CHECK-NEXT: vst $vr0, $a1, 16 -; CHECK-NEXT: vst $vr2, $a1, 0 +; CHECK-NEXT: vshuf4i.w $vr1, $vr0, 14 +; CHECK-NEXT: vsllwil.du.wu $vr1, $vr1, 0 +; CHECK-NEXT: vsllwil.du.wu $vr0, $vr0, 0 +; CHECK-NEXT: vst $vr0, $a1, 0 +; CHECK-NEXT: vst $vr1, $a1, 16 ; CHECK-NEXT: ret entry: %A = load <4 x i32>, ptr %ptr @@ -185,8 +181,7 @@ define void @load_zext_8i8_to_8i16(ptr %ptr, ptr %dst) { ; LA32-NEXT: ld.w $a0, $a0, 4 ; LA32-NEXT: vinsgr2vr.w $vr0, $a2, 0 ; LA32-NEXT: vinsgr2vr.w $vr0, $a0, 1 -; LA32-NEXT: vrepli.b $vr1, 0 -; LA32-NEXT: vilvl.b $vr0, $vr1, $vr0 +; LA32-NEXT: vsllwil.hu.bu $vr0, $vr0, 0 ; LA32-NEXT: vst $vr0, $a1, 0 ; LA32-NEXT: ret ; @@ -194,8 +189,7 @@ define void @load_zext_8i8_to_8i16(ptr %ptr, ptr %dst) { ; LA64: # %bb.0: # %entry ; LA64-NEXT: ld.d $a0, $a0, 0 ; LA64-NEXT: vinsgr2vr.d $vr0, $a0, 0 -; LA64-NEXT: vrepli.b $vr1, 0 -; LA64-NEXT: vilvl.b $vr0, $vr1, $vr0 +; LA64-NEXT: vsllwil.hu.bu $vr0, $vr0, 0 ; LA64-NEXT: vst $vr0, $a1, 0 ; LA64-NEXT: ret entry: @@ -209,14 +203,13 @@ define void @load_zext_8i8_to_8i32(ptr %ptr, ptr %dst) { ; LA32-LABEL: load_zext_8i8_to_8i32: ; LA32: # %bb.0: # %entry ; LA32-NEXT: ld.w $a2, $a0, 0 -; LA32-NEXT: vinsgr2vr.w $vr0, $a2, 0 ; LA32-NEXT: ld.w $a0, $a0, 4 -; LA32-NEXT: vrepli.b $vr1, 0 -; LA32-NEXT: vilvl.b $vr0, $vr1, $vr0 -; LA32-NEXT: vilvl.h $vr0, $vr1, $vr0 -; LA32-NEXT: vinsgr2vr.w $vr2, $a0, 0 -; LA32-NEXT: vilvl.b $vr2, $vr1, $vr2 -; LA32-NEXT: vilvl.h $vr1, $vr1, $vr2 +; LA32-NEXT: vinsgr2vr.w $vr0, $a2, 0 +; LA32-NEXT: vsllwil.hu.bu $vr0, $vr0, 0 +; LA32-NEXT: vsllwil.wu.hu $vr0, $vr0, 0 +; LA32-NEXT: vinsgr2vr.w $vr1, $a0, 0 +; LA32-NEXT: vsllwil.hu.bu $vr1, $vr1, 0 +; LA32-NEXT: vsllwil.wu.hu $vr1, $vr1, 0 ; LA32-NEXT: vst $vr1, $a1, 16 ; LA32-NEXT: vst $vr0, $a1, 0 ; LA32-NEXT: ret @@ -225,12 +218,13 @@ define void @load_zext_8i8_to_8i32(ptr %ptr, ptr %dst) { ; LA64: # %bb.0: # %entry ; LA64-NEXT: ld.d $a0, $a0, 0 ; LA64-NEXT: vinsgr2vr.d $vr0, $a0, 0 -; LA64-NEXT: vrepli.b $vr1, 0 -; LA64-NEXT: vilvl.b $vr0, $vr1, $vr0 -; LA64-NEXT: vilvl.h $vr2, $vr1, $vr0 -; LA64-NEXT: vilvh.h $vr0, $vr1, $vr0 -; LA64-NEXT: vst $vr0, $a1, 16 -; LA64-NEXT: vst $vr2, $a1, 0 +; LA64-NEXT: vsrli.d $vr1, $vr0, 32 +; LA64-NEXT: vsllwil.hu.bu $vr1, $vr1, 0 +; LA64-NEXT: vsllwil.wu.hu $vr1, $vr1, 0 +; LA64-NEXT: vsllwil.hu.bu $vr0, $vr0, 0 +; LA64-NEXT: vsllwil.wu.hu $vr0, $vr0, 0 +; LA64-NEXT: vst $vr0, $a1, 0 +; LA64-NEXT: vst $vr1, $a1, 16 ; LA64-NEXT: ret entry: %A = load <8 x i8>, ptr %ptr @@ -246,36 +240,50 @@ define void @load_zext_8i8_to_8i64(ptr %ptr, ptr %dst) { ; LA32-NEXT: ld.w $a0, $a0, 4 ; LA32-NEXT: vinsgr2vr.w $vr0, $a2, 0 ; LA32-NEXT: vinsgr2vr.w $vr0, $a0, 1 -; LA32-NEXT: vrepli.b $vr1, 0 -; LA32-NEXT: vilvl.b $vr0, $vr1, $vr0 -; LA32-NEXT: vilvl.h $vr2, $vr1, $vr0 -; LA32-NEXT: vilvl.w $vr3, $vr1, $vr2 -; LA32-NEXT: vilvh.w $vr2, $vr1, $vr2 -; LA32-NEXT: vilvh.h $vr0, $vr1, $vr0 -; LA32-NEXT: vilvl.w $vr4, $vr1, $vr0 -; LA32-NEXT: vilvh.w $vr0, $vr1, $vr0 -; LA32-NEXT: vst $vr0, $a1, 48 -; LA32-NEXT: vst $vr4, $a1, 32 -; LA32-NEXT: vst $vr2, $a1, 16 -; LA32-NEXT: vst $vr3, $a1, 0 +; LA32-NEXT: vshuf4i.b $vr1, $vr0, 14 +; LA32-NEXT: vsllwil.hu.bu $vr1, $vr1, 0 +; LA32-NEXT: vsllwil.wu.hu $vr1, $vr1, 0 +; LA32-NEXT: vsllwil.du.wu $vr1, $vr1, 0 +; LA32-NEXT: vsrli.d $vr2, $vr0, 32 +; LA32-NEXT: vsllwil.hu.bu $vr2, $vr2, 0 +; LA32-NEXT: vsllwil.wu.hu $vr2, $vr2, 0 +; LA32-NEXT: vsllwil.du.wu $vr2, $vr2, 0 +; LA32-NEXT: vsrli.d $vr3, $vr0, 48 +; LA32-NEXT: vsllwil.hu.bu $vr3, $vr3, 0 +; LA32-NEXT: vsllwil.wu.hu $vr3, $vr3, 0 +; LA32-NEXT: vsllwil.du.wu $vr3, $vr3, 0 +; LA32-NEXT: vsllwil.hu.bu $vr0, $vr0, 0 +; LA32-NEXT: vsllwil.wu.hu $vr0, $vr0, 0 +; LA32-NEXT: vsllwil.du.wu $vr0, $vr0, 0 +; LA32-NEXT: vst $vr0, $a1, 0 +; LA32-NEXT: vst $vr3, $a1, 48 +; LA32-NEXT: vst $vr2, $a1, 32 +; LA32-NEXT: vst $vr1, $a1, 16 ; LA32-NEXT: ret ; ; LA64-LABEL: load_zext_8i8_to_8i64: ; LA64: # %bb.0: # %entry ; LA64-NEXT: ld.d $a0, $a0, 0 ; LA64-NEXT: vinsgr2vr.d $vr0, $a0, 0 -; LA64-NEXT: vrepli.b $vr1, 0 -; LA64-NEXT: vilvl.b $vr0, $vr1, $vr0 -; LA64-NEXT: vilvl.h $vr2, $vr1, $vr0 -; LA64-NEXT: vilvl.w $vr3, $vr1, $vr2 -; LA64-NEXT: vilvh.w $vr2, $vr1, $vr2 -; LA64-NEXT: vilvh.h $vr0, $vr1, $vr0 -; LA64-NEXT: vilvl.w $vr4, $vr1, $vr0 -; LA64-NEXT: vilvh.w $vr0, $vr1, $vr0 -; LA64-NEXT: vst $vr0, $a1, 48 -; LA64-NEXT: vst $vr4, $a1, 32 -; LA64-NEXT: vst $vr2, $a1, 16 -; LA64-NEXT: vst $vr3, $a1, 0 +; LA64-NEXT: vshuf4i.b $vr1, $vr0, 14 +; LA64-NEXT: vsllwil.hu.bu $vr1, $vr1, 0 +; LA64-NEXT: vsllwil.wu.hu $vr1, $vr1, 0 +; LA64-NEXT: vsllwil.du.wu $vr1, $vr1, 0 +; LA64-NEXT: vsrli.d $vr2, $vr0, 32 +; LA64-NEXT: vsllwil.hu.bu $vr2, $vr2, 0 +; LA64-NEXT: vsllwil.wu.hu $vr2, $vr2, 0 +; LA64-NEXT: vsllwil.du.wu $vr2, $vr2, 0 +; LA64-NEXT: vsrli.d $vr3, $vr0, 48 +; LA64-NEXT: vsllwil.hu.bu $vr3, $vr3, 0 +; LA64-NEXT: vsllwil.wu.hu $vr3, $vr3, 0 +; LA64-NEXT: vsllwil.du.wu $vr3, $vr3, 0 +; LA64-NEXT: vsllwil.hu.bu $vr0, $vr0, 0 +; LA64-NEXT: vsllwil.wu.hu $vr0, $vr0, 0 +; LA64-NEXT: vsllwil.du.wu $vr0, $vr0, 0 +; LA64-NEXT: vst $vr0, $a1, 0 +; LA64-NEXT: vst $vr3, $a1, 48 +; LA64-NEXT: vst $vr2, $a1, 32 +; LA64-NEXT: vst $vr1, $a1, 16 ; LA64-NEXT: ret entry: %A = load <8 x i8>, ptr %ptr @@ -288,11 +296,11 @@ define void @load_zext_8i16_to_8i32(ptr %ptr, ptr %dst) { ; CHECK-LABEL: load_zext_8i16_to_8i32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vld $vr0, $a0, 0 -; CHECK-NEXT: vrepli.b $vr1, 0 -; CHECK-NEXT: vilvl.h $vr2, $vr1, $vr0 -; CHECK-NEXT: vilvh.h $vr0, $vr1, $vr0 -; CHECK-NEXT: vst $vr0, $a1, 16 -; CHECK-NEXT: vst $vr2, $a1, 0 +; CHECK-NEXT: vbsrl.v $vr1, $vr0, 8 +; CHECK-NEXT: vsllwil.wu.hu $vr1, $vr1, 0 +; CHECK-NEXT: vsllwil.wu.hu $vr0, $vr0, 0 +; CHECK-NEXT: vst $vr0, $a1, 0 +; CHECK-NEXT: vst $vr1, $a1, 16 ; CHECK-NEXT: ret entry: %A = load <8 x i16>, ptr %ptr @@ -305,17 +313,21 @@ define void @load_zext_8i16_to_8i64(ptr %ptr, ptr %dst) { ; CHECK-LABEL: load_zext_8i16_to_8i64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vld $vr0, $a0, 0 -; CHECK-NEXT: vrepli.b $vr1, 0 -; CHECK-NEXT: vilvl.h $vr2, $vr1, $vr0 -; CHECK-NEXT: vilvl.w $vr3, $vr1, $vr2 -; CHECK-NEXT: vilvh.w $vr2, $vr1, $vr2 -; CHECK-NEXT: vilvh.h $vr0, $vr1, $vr0 -; CHECK-NEXT: vilvl.w $vr4, $vr1, $vr0 -; CHECK-NEXT: vilvh.w $vr0, $vr1, $vr0 -; CHECK-NEXT: vst $vr0, $a1, 48 -; CHECK-NEXT: vst $vr4, $a1, 32 -; CHECK-NEXT: vst $vr2, $a1, 16 -; CHECK-NEXT: vst $vr3, $a1, 0 +; CHECK-NEXT: vshuf4i.h $vr1, $vr0, 14 +; CHECK-NEXT: vsllwil.wu.hu $vr1, $vr1, 0 +; CHECK-NEXT: vsllwil.du.wu $vr1, $vr1, 0 +; CHECK-NEXT: vbsrl.v $vr2, $vr0, 8 +; CHECK-NEXT: vsllwil.wu.hu $vr2, $vr2, 0 +; CHECK-NEXT: vsllwil.du.wu $vr2, $vr2, 0 +; CHECK-NEXT: vbsrl.v $vr3, $vr0, 12 +; CHECK-NEXT: vsllwil.wu.hu $vr3, $vr3, 0 +; CHECK-NEXT: vsllwil.du.wu $vr3, $vr3, 0 +; CHECK-NEXT: vsllwil.wu.hu $vr0, $vr0, 0 +; CHECK-NEXT: vsllwil.du.wu $vr0, $vr0, 0 +; CHECK-NEXT: vst $vr0, $a1, 0 +; CHECK-NEXT: vst $vr3, $a1, 48 +; CHECK-NEXT: vst $vr2, $a1, 32 +; CHECK-NEXT: vst $vr1, $a1, 16 ; CHECK-NEXT: ret entry: %A = load <8 x i16>, ptr %ptr @@ -329,15 +341,16 @@ define void @load_zext_8i32_to_8i64(ptr %ptr, ptr %dst) { ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vld $vr0, $a0, 0 ; CHECK-NEXT: vld $vr1, $a0, 16 -; CHECK-NEXT: vrepli.b $vr2, 0 -; CHECK-NEXT: vilvl.w $vr3, $vr2, $vr0 -; CHECK-NEXT: vilvh.w $vr0, $vr2, $vr0 -; CHECK-NEXT: vilvl.w $vr4, $vr2, $vr1 -; CHECK-NEXT: vilvh.w $vr1, $vr2, $vr1 -; CHECK-NEXT: vst $vr1, $a1, 48 -; CHECK-NEXT: vst $vr4, $a1, 32 -; CHECK-NEXT: vst $vr0, $a1, 16 -; CHECK-NEXT: vst $vr3, $a1, 0 +; CHECK-NEXT: vshuf4i.w $vr2, $vr0, 14 +; CHECK-NEXT: vsllwil.du.wu $vr2, $vr2, 0 +; CHECK-NEXT: vshuf4i.w $vr3, $vr1, 14 +; CHECK-NEXT: vsllwil.du.wu $vr3, $vr3, 0 +; CHECK-NEXT: vsllwil.du.wu $vr0, $vr0, 0 +; CHECK-NEXT: vsllwil.du.wu $vr1, $vr1, 0 +; CHECK-NEXT: vst $vr1, $a1, 32 +; CHECK-NEXT: vst $vr0, $a1, 0 +; CHECK-NEXT: vst $vr3, $a1, 48 +; CHECK-NEXT: vst $vr2, $a1, 16 ; CHECK-NEXT: ret entry: %A = load <8 x i32>, ptr %ptr @@ -350,11 +363,11 @@ define void @load_zext_16i8_to_16i16(ptr %ptr, ptr %dst) { ; CHECK-LABEL: load_zext_16i8_to_16i16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vld $vr0, $a0, 0 -; CHECK-NEXT: vrepli.b $vr1, 0 -; CHECK-NEXT: vilvl.b $vr2, $vr1, $vr0 -; CHECK-NEXT: vilvh.b $vr0, $vr1, $vr0 -; CHECK-NEXT: vst $vr0, $a1, 16 -; CHECK-NEXT: vst $vr2, $a1, 0 +; CHECK-NEXT: vbsrl.v $vr1, $vr0, 8 +; CHECK-NEXT: vsllwil.hu.bu $vr1, $vr1, 0 +; CHECK-NEXT: vsllwil.hu.bu $vr0, $vr0, 0 +; CHECK-NEXT: vst $vr0, $a1, 0 +; CHECK-NEXT: vst $vr1, $a1, 16 ; CHECK-NEXT: ret entry: %A = load <16 x i8>, ptr %ptr @@ -367,17 +380,21 @@ define void @load_zext_16i8_to_16i32(ptr %ptr, ptr %dst) { ; CHECK-LABEL: load_zext_16i8_to_16i32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vld $vr0, $a0, 0 -; CHECK-NEXT: vrepli.b $vr1, 0 -; CHECK-NEXT: vilvl.b $vr2, $vr1, $vr0 -; CHECK-NEXT: vilvl.h $vr3, $vr1, $vr2 -; CHECK-NEXT: vilvh.h $vr2, $vr1, $vr2 -; CHECK-NEXT: vilvh.b $vr0, $vr1, $vr0 -; CHECK-NEXT: vilvl.h $vr4, $vr1, $vr0 -; CHECK-NEXT: vilvh.h $vr0, $vr1, $vr0 -; CHECK-NEXT: vst $vr0, $a1, 48 -; CHECK-NEXT: vst $vr4, $a1, 32 -; CHECK-NEXT: vst $vr2, $a1, 16 -; CHECK-NEXT: vst $vr3, $a1, 0 +; CHECK-NEXT: vsrli.d $vr1, $vr0, 32 +; CHECK-NEXT: vsllwil.hu.bu $vr1, $vr1, 0 +; CHECK-NEXT: vsllwil.wu.hu $vr1, $vr1, 0 +; CHECK-NEXT: vbsrl.v $vr2, $vr0, 8 +; CHECK-NEXT: vsllwil.hu.bu $vr2, $vr2, 0 +; CHECK-NEXT: vsllwil.wu.hu $vr2, $vr2, 0 +; CHECK-NEXT: vbsrl.v $vr3, $vr0, 12 +; CHECK-NEXT: vsllwil.hu.bu $vr3, $vr3, 0 +; CHECK-NEXT: vsllwil.wu.hu $vr3, $vr3, 0 +; CHECK-NEXT: vsllwil.hu.bu $vr0, $vr0, 0 +; CHECK-NEXT: vsllwil.wu.hu $vr0, $vr0, 0 +; CHECK-NEXT: vst $vr0, $a1, 0 +; CHECK-NEXT: vst $vr3, $a1, 48 +; CHECK-NEXT: vst $vr2, $a1, 32 +; CHECK-NEXT: vst $vr1, $a1, 16 ; CHECK-NEXT: ret entry: %A = load <16 x i8>, ptr %ptr @@ -390,29 +407,45 @@ define void @load_zext_16i8_to_16i64(ptr %ptr, ptr %dst) { ; CHECK-LABEL: load_zext_16i8_to_16i64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vld $vr0, $a0, 0 -; CHECK-NEXT: vrepli.b $vr1, 0 -; CHECK-NEXT: vilvl.b $vr2, $vr1, $vr0 -; CHECK-NEXT: vilvl.h $vr3, $vr1, $vr2 -; CHECK-NEXT: vilvl.w $vr4, $vr1, $vr3 -; CHECK-NEXT: vilvh.w $vr3, $vr1, $vr3 -; CHECK-NEXT: vilvh.h $vr2, $vr1, $vr2 -; CHECK-NEXT: vilvl.w $vr5, $vr1, $vr2 -; CHECK-NEXT: vilvh.w $vr2, $vr1, $vr2 -; CHECK-NEXT: vilvh.b $vr0, $vr1, $vr0 -; CHECK-NEXT: vilvl.h $vr6, $vr1, $vr0 -; CHECK-NEXT: vilvl.w $vr7, $vr1, $vr6 -; CHECK-NEXT: vilvh.w $vr6, $vr1, $vr6 -; CHECK-NEXT: vilvh.h $vr0, $vr1, $vr0 -; CHECK-NEXT: vilvl.w $vr8, $vr1, $vr0 -; CHECK-NEXT: vilvh.w $vr0, $vr1, $vr0 -; CHECK-NEXT: vst $vr0, $a1, 112 -; CHECK-NEXT: vst $vr8, $a1, 96 -; CHECK-NEXT: vst $vr6, $a1, 80 -; CHECK-NEXT: vst $vr7, $a1, 64 -; CHECK-NEXT: vst $vr2, $a1, 48 -; CHECK-NEXT: vst $vr5, $a1, 32 -; CHECK-NEXT: vst $vr3, $a1, 16 -; CHECK-NEXT: vst $vr4, $a1, 0 +; CHECK-NEXT: vshuf4i.b $vr1, $vr0, 14 +; CHECK-NEXT: vsllwil.hu.bu $vr1, $vr1, 0 +; CHECK-NEXT: vsllwil.wu.hu $vr1, $vr1, 0 +; CHECK-NEXT: vsllwil.du.wu $vr1, $vr1, 0 +; CHECK-NEXT: vsrli.d $vr2, $vr0, 32 +; CHECK-NEXT: vsllwil.hu.bu $vr2, $vr2, 0 +; CHECK-NEXT: vsllwil.wu.hu $vr2, $vr2, 0 +; CHECK-NEXT: vsllwil.du.wu $vr2, $vr2, 0 +; CHECK-NEXT: vsrli.d $vr3, $vr0, 48 +; CHECK-NEXT: vsllwil.hu.bu $vr3, $vr3, 0 +; CHECK-NEXT: vsllwil.wu.hu $vr3, $vr3, 0 +; CHECK-NEXT: vsllwil.du.wu $vr3, $vr3, 0 +; CHECK-NEXT: vbsrl.v $vr4, $vr0, 8 +; CHECK-NEXT: vsllwil.hu.bu $vr4, $vr4, 0 +; CHECK-NEXT: vsllwil.wu.hu $vr4, $vr4, 0 +; CHECK-NEXT: vsllwil.du.wu $vr4, $vr4, 0 +; CHECK-NEXT: vbsrl.v $vr5, $vr0, 10 +; CHECK-NEXT: vsllwil.hu.bu $vr5, $vr5, 0 +; CHECK-NEXT: vsllwil.wu.hu $vr5, $vr5, 0 +; CHECK-NEXT: vsllwil.du.wu $vr5, $vr5, 0 +; CHECK-NEXT: vbsrl.v $vr6, $vr0, 12 +; CHECK-NEXT: vsllwil.hu.bu $vr6, $vr6, 0 +; CHECK-NEXT: vsllwil.wu.hu $vr6, $vr6, 0 +; CHECK-NEXT: vsllwil.du.wu $vr6, $vr6, 0 +; CHECK-NEXT: vbsrl.v $vr7, $vr0, 14 +; CHECK-NEXT: vsllwil.hu.bu $vr7, $vr7, 0 +; CHECK-NEXT: vsllwil.wu.hu $vr7, $vr7, 0 +; CHECK-NEXT: vsllwil.du.wu $vr7, $vr7, 0 +; CHECK-NEXT: vsllwil.hu.bu $vr0, $vr0, 0 +; CHECK-NEXT: vsllwil.wu.hu $vr0, $vr0, 0 +; CHECK-NEXT: vsllwil.du.wu $vr0, $vr0, 0 +; CHECK-NEXT: vst $vr0, $a1, 0 +; CHECK-NEXT: vst $vr7, $a1, 112 +; CHECK-NEXT: vst $vr6, $a1, 96 +; CHECK-NEXT: vst $vr5, $a1, 80 +; CHECK-NEXT: vst $vr4, $a1, 64 +; CHECK-NEXT: vst $vr3, $a1, 48 +; CHECK-NEXT: vst $vr2, $a1, 32 +; CHECK-NEXT: vst $vr1, $a1, 16 ; CHECK-NEXT: ret entry: %A = load <16 x i8>, ptr %ptr @@ -426,15 +459,16 @@ define void @load_zext_16i16_to_16i32(ptr %ptr, ptr %dst) { ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vld $vr0, $a0, 0 ; CHECK-NEXT: vld $vr1, $a0, 16 -; CHECK-NEXT: vrepli.b $vr2, 0 -; CHECK-NEXT: vilvl.h $vr3, $vr2, $vr0 -; CHECK-NEXT: vilvh.h $vr0, $vr2, $vr0 -; CHECK-NEXT: vilvl.h $vr4, $vr2, $vr1 -; CHECK-NEXT: vilvh.h $vr1, $vr2, $vr1 -; CHECK-NEXT: vst $vr1, $a1, 48 -; CHECK-NEXT: vst $vr4, $a1, 32 -; CHECK-NEXT: vst $vr0, $a1, 16 -; CHECK-NEXT: vst $vr3, $a1, 0 +; CHECK-NEXT: vbsrl.v $vr2, $vr0, 8 +; CHECK-NEXT: vsllwil.wu.hu $vr2, $vr2, 0 +; CHECK-NEXT: vbsrl.v $vr3, $vr1, 8 +; CHECK-NEXT: vsllwil.wu.hu $vr3, $vr3, 0 +; CHECK-NEXT: vsllwil.wu.hu $vr0, $vr0, 0 +; CHECK-NEXT: vsllwil.wu.hu $vr1, $vr1, 0 +; CHECK-NEXT: vst $vr1, $a1, 32 +; CHECK-NEXT: vst $vr0, $a1, 0 +; CHECK-NEXT: vst $vr3, $a1, 48 +; CHECK-NEXT: vst $vr2, $a1, 16 ; CHECK-NEXT: ret entry: %A = load <16 x i16>, ptr %ptr @@ -448,27 +482,36 @@ define void @load_zext_16i16_to_16i64(ptr %ptr, ptr %dst) { ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vld $vr0, $a0, 0 ; CHECK-NEXT: vld $vr1, $a0, 16 -; CHECK-NEXT: vrepli.b $vr2, 0 -; CHECK-NEXT: vilvl.h $vr3, $vr2, $vr0 -; CHECK-NEXT: vilvl.w $vr4, $vr2, $vr3 -; CHECK-NEXT: vilvh.w $vr3, $vr2, $vr3 -; CHECK-NEXT: vilvh.h $vr0, $vr2, $vr0 -; CHECK-NEXT: vilvl.w $vr5, $vr2, $vr0 -; CHECK-NEXT: vilvh.w $vr0, $vr2, $vr0 -; CHECK-NEXT: vilvl.h $vr6, $vr2, $vr1 -; CHECK-NEXT: vilvl.w $vr7, $vr2, $vr6 -; CHECK-NEXT: vilvh.w $vr6, $vr2, $vr6 -; CHECK-NEXT: vilvh.h $vr1, $vr2, $vr1 -; CHECK-NEXT: vilvl.w $vr8, $vr2, $vr1 -; CHECK-NEXT: vilvh.w $vr1, $vr2, $vr1 -; CHECK-NEXT: vst $vr1, $a1, 112 -; CHECK-NEXT: vst $vr8, $a1, 96 -; CHECK-NEXT: vst $vr6, $a1, 80 -; CHECK-NEXT: vst $vr7, $a1, 64 -; CHECK-NEXT: vst $vr0, $a1, 48 -; CHECK-NEXT: vst $vr5, $a1, 32 -; CHECK-NEXT: vst $vr3, $a1, 16 -; CHECK-NEXT: vst $vr4, $a1, 0 +; CHECK-NEXT: vshuf4i.h $vr2, $vr0, 14 +; CHECK-NEXT: vsllwil.wu.hu $vr2, $vr2, 0 +; CHECK-NEXT: vsllwil.du.wu $vr2, $vr2, 0 +; CHECK-NEXT: vbsrl.v $vr3, $vr0, 8 +; CHECK-NEXT: vsllwil.wu.hu $vr3, $vr3, 0 +; CHECK-NEXT: vsllwil.du.wu $vr3, $vr3, 0 +; CHECK-NEXT: vbsrl.v $vr4, $vr0, 12 +; CHECK-NEXT: vsllwil.wu.hu $vr4, $vr4, 0 +; CHECK-NEXT: vsllwil.du.wu $vr4, $vr4, 0 +; CHECK-NEXT: vshuf4i.h $vr5, $vr1, 14 +; CHECK-NEXT: vsllwil.wu.hu $vr5, $vr5, 0 +; CHECK-NEXT: vsllwil.du.wu $vr5, $vr5, 0 +; CHECK-NEXT: vbsrl.v $vr6, $vr1, 8 +; CHECK-NEXT: vsllwil.wu.hu $vr6, $vr6, 0 +; CHECK-NEXT: vsllwil.du.wu $vr6, $vr6, 0 +; CHECK-NEXT: vbsrl.v $vr7, $vr1, 12 +; CHECK-NEXT: vsllwil.wu.hu $vr7, $vr7, 0 +; CHECK-NEXT: vsllwil.du.wu $vr7, $vr7, 0 +; CHECK-NEXT: vsllwil.wu.hu $vr0, $vr0, 0 +; CHECK-NEXT: vsllwil.du.wu $vr0, $vr0, 0 +; CHECK-NEXT: vsllwil.wu.hu $vr1, $vr1, 0 +; CHECK-NEXT: vsllwil.du.wu $vr1, $vr1, 0 +; CHECK-NEXT: vst $vr1, $a1, 64 +; CHECK-NEXT: vst $vr0, $a1, 0 +; CHECK-NEXT: vst $vr7, $a1, 112 +; CHECK-NEXT: vst $vr6, $a1, 96 +; CHECK-NEXT: vst $vr5, $a1, 80 +; CHECK-NEXT: vst $vr4, $a1, 48 +; CHECK-NEXT: vst $vr3, $a1, 32 +; CHECK-NEXT: vst $vr2, $a1, 16 ; CHECK-NEXT: ret entry: %A = load <16 x i16>, ptr %ptr diff --git a/llvm/test/CodeGen/LoongArch/lsx/vmskcond.ll b/llvm/test/CodeGen/LoongArch/lsx/vmskcond.ll index 8bdeebe..57b382a 100644 --- a/llvm/test/CodeGen/LoongArch/lsx/vmskcond.ll +++ b/llvm/test/CodeGen/LoongArch/lsx/vmskcond.ll @@ -178,10 +178,9 @@ define i2 @vmsk_sgt_v2i8(<2 x i8> %a, <2 x i8> %b) { ; CHECK-LABEL: vmsk_sgt_v2i8: ; CHECK: # %bb.0: ; CHECK-NEXT: vslt.b $vr0, $vr1, $vr0 -; CHECK-NEXT: vilvl.b $vr0, $vr0, $vr0 -; CHECK-NEXT: vilvl.h $vr0, $vr0, $vr0 -; CHECK-NEXT: vilvl.w $vr0, $vr0, $vr0 -; CHECK-NEXT: vslli.d $vr0, $vr0, 56 +; CHECK-NEXT: vsllwil.h.b $vr0, $vr0, 0 +; CHECK-NEXT: vsllwil.w.h $vr0, $vr0, 0 +; CHECK-NEXT: vsllwil.d.w $vr0, $vr0, 0 ; CHECK-NEXT: vmskltz.d $vr0, $vr0 ; CHECK-NEXT: vpickve2gr.hu $a0, $vr0, 0 ; CHECK-NEXT: ret @@ -194,9 +193,8 @@ define i2 @vmsk_sgt_v2i16(<2 x i16> %a, <2 x i16> %b) { ; CHECK-LABEL: vmsk_sgt_v2i16: ; CHECK: # %bb.0: ; CHECK-NEXT: vslt.h $vr0, $vr1, $vr0 -; CHECK-NEXT: vilvl.h $vr0, $vr0, $vr0 -; CHECK-NEXT: vilvl.w $vr0, $vr0, $vr0 -; CHECK-NEXT: vslli.d $vr0, $vr0, 48 +; CHECK-NEXT: vsllwil.w.h $vr0, $vr0, 0 +; CHECK-NEXT: vsllwil.d.w $vr0, $vr0, 0 ; CHECK-NEXT: vmskltz.d $vr0, $vr0 ; CHECK-NEXT: vpickve2gr.hu $a0, $vr0, 0 ; CHECK-NEXT: ret @@ -209,8 +207,7 @@ define i2 @vmsk_sgt_v2i32(<2 x i32> %a, <2 x i32> %b) { ; CHECK-LABEL: vmsk_sgt_v2i32: ; CHECK: # %bb.0: ; CHECK-NEXT: vslt.w $vr0, $vr1, $vr0 -; CHECK-NEXT: vshuf4i.w $vr0, $vr0, 16 -; CHECK-NEXT: vslli.d $vr0, $vr0, 32 +; CHECK-NEXT: vsllwil.d.w $vr0, $vr0, 0 ; CHECK-NEXT: vmskltz.d $vr0, $vr0 ; CHECK-NEXT: vpickve2gr.hu $a0, $vr0, 0 ; CHECK-NEXT: ret @@ -247,9 +244,8 @@ define i4 @vmsk_sgt_v4i8(<4 x i8> %a, <4 x i8> %b) { ; CHECK-LABEL: vmsk_sgt_v4i8: ; CHECK: # %bb.0: ; CHECK-NEXT: vslt.b $vr0, $vr1, $vr0 -; CHECK-NEXT: vilvl.b $vr0, $vr0, $vr0 -; CHECK-NEXT: vilvl.h $vr0, $vr0, $vr0 -; CHECK-NEXT: vslli.w $vr0, $vr0, 24 +; CHECK-NEXT: vsllwil.h.b $vr0, $vr0, 0 +; CHECK-NEXT: vsllwil.w.h $vr0, $vr0, 0 ; CHECK-NEXT: vmskltz.w $vr0, $vr0 ; CHECK-NEXT: vpickve2gr.hu $a0, $vr0, 0 ; CHECK-NEXT: ret @@ -262,8 +258,7 @@ define i4 @vmsk_sgt_v4i16(<4 x i16> %a, <4 x i16> %b) { ; CHECK-LABEL: vmsk_sgt_v4i16: ; CHECK: # %bb.0: ; CHECK-NEXT: vslt.h $vr0, $vr1, $vr0 -; CHECK-NEXT: vilvl.h $vr0, $vr0, $vr0 -; CHECK-NEXT: vslli.w $vr0, $vr0, 16 +; CHECK-NEXT: vsllwil.w.h $vr0, $vr0, 0 ; CHECK-NEXT: vmskltz.w $vr0, $vr0 ; CHECK-NEXT: vpickve2gr.hu $a0, $vr0, 0 ; CHECK-NEXT: ret @@ -300,8 +295,7 @@ define i8 @vmsk_sgt_v8i8(<8 x i8> %a, <8 x i8> %b) { ; CHECK-LABEL: vmsk_sgt_v8i8: ; CHECK: # %bb.0: ; CHECK-NEXT: vslt.b $vr0, $vr1, $vr0 -; CHECK-NEXT: vilvl.b $vr0, $vr0, $vr0 -; CHECK-NEXT: vslli.h $vr0, $vr0, 8 +; CHECK-NEXT: vsllwil.h.b $vr0, $vr0, 0 ; CHECK-NEXT: vmskltz.h $vr0, $vr0 ; CHECK-NEXT: vpickve2gr.hu $a0, $vr0, 0 ; CHECK-NEXT: ret @@ -340,10 +334,9 @@ define i2 @vmsk_sgt_and_sgt_v2i8(<2 x i8> %a, <2 x i8> %b, <2 x i8> %c, <2 x i8> ; CHECK-NEXT: vslt.b $vr0, $vr1, $vr0 ; CHECK-NEXT: vslt.b $vr1, $vr3, $vr2 ; CHECK-NEXT: vand.v $vr0, $vr0, $vr1 -; CHECK-NEXT: vilvl.b $vr0, $vr0, $vr0 -; CHECK-NEXT: vilvl.h $vr0, $vr0, $vr0 -; CHECK-NEXT: vilvl.w $vr0, $vr0, $vr0 -; CHECK-NEXT: vslli.d $vr0, $vr0, 56 +; CHECK-NEXT: vsllwil.h.b $vr0, $vr0, 0 +; CHECK-NEXT: vsllwil.w.h $vr0, $vr0, 0 +; CHECK-NEXT: vsllwil.d.w $vr0, $vr0, 0 ; CHECK-NEXT: vmskltz.d $vr0, $vr0 ; CHECK-NEXT: vpickve2gr.hu $a0, $vr0, 0 ; CHECK-NEXT: ret @@ -360,9 +353,8 @@ define i2 @vmsk_sgt_and_sgt_v2i16(<2 x i16> %a, <2 x i16> %b, <2 x i16> %c, <2 x ; CHECK-NEXT: vslt.h $vr0, $vr1, $vr0 ; CHECK-NEXT: vslt.h $vr1, $vr3, $vr2 ; CHECK-NEXT: vand.v $vr0, $vr0, $vr1 -; CHECK-NEXT: vilvl.h $vr0, $vr0, $vr0 -; CHECK-NEXT: vilvl.w $vr0, $vr0, $vr0 -; CHECK-NEXT: vslli.d $vr0, $vr0, 48 +; CHECK-NEXT: vsllwil.w.h $vr0, $vr0, 0 +; CHECK-NEXT: vsllwil.d.w $vr0, $vr0, 0 ; CHECK-NEXT: vmskltz.d $vr0, $vr0 ; CHECK-NEXT: vpickve2gr.hu $a0, $vr0, 0 ; CHECK-NEXT: ret @@ -379,8 +371,7 @@ define i2 @vmsk_sgt_and_sgt_v2i32(<2 x i32> %a, <2 x i32> %b, <2 x i32> %c, <2 x ; CHECK-NEXT: vslt.w $vr0, $vr1, $vr0 ; CHECK-NEXT: vslt.w $vr1, $vr3, $vr2 ; CHECK-NEXT: vand.v $vr0, $vr0, $vr1 -; CHECK-NEXT: vshuf4i.w $vr0, $vr0, 16 -; CHECK-NEXT: vslli.d $vr0, $vr0, 32 +; CHECK-NEXT: vsllwil.d.w $vr0, $vr0, 0 ; CHECK-NEXT: vmskltz.d $vr0, $vr0 ; CHECK-NEXT: vpickve2gr.hu $a0, $vr0, 0 ; CHECK-NEXT: ret @@ -429,9 +420,8 @@ define i4 @vmsk_sgt_and_sgt_v4i8(<4 x i8> %a, <4 x i8> %b, <4 x i8> %c, <4 x i8> ; CHECK-NEXT: vslt.b $vr0, $vr1, $vr0 ; CHECK-NEXT: vslt.b $vr1, $vr3, $vr2 ; CHECK-NEXT: vand.v $vr0, $vr0, $vr1 -; CHECK-NEXT: vilvl.b $vr0, $vr0, $vr0 -; CHECK-NEXT: vilvl.h $vr0, $vr0, $vr0 -; CHECK-NEXT: vslli.w $vr0, $vr0, 24 +; CHECK-NEXT: vsllwil.h.b $vr0, $vr0, 0 +; CHECK-NEXT: vsllwil.w.h $vr0, $vr0, 0 ; CHECK-NEXT: vmskltz.w $vr0, $vr0 ; CHECK-NEXT: vpickve2gr.hu $a0, $vr0, 0 ; CHECK-NEXT: ret @@ -448,8 +438,7 @@ define i4 @vmsk_sgt_and_sgt_v4i16(<4 x i16> %a, <4 x i16> %b, <4 x i16> %c, <4 x ; CHECK-NEXT: vslt.h $vr0, $vr1, $vr0 ; CHECK-NEXT: vslt.h $vr1, $vr3, $vr2 ; CHECK-NEXT: vand.v $vr0, $vr0, $vr1 -; CHECK-NEXT: vilvl.h $vr0, $vr0, $vr0 -; CHECK-NEXT: vslli.w $vr0, $vr0, 16 +; CHECK-NEXT: vsllwil.w.h $vr0, $vr0, 0 ; CHECK-NEXT: vmskltz.w $vr0, $vr0 ; CHECK-NEXT: vpickve2gr.hu $a0, $vr0, 0 ; CHECK-NEXT: ret @@ -498,8 +487,7 @@ define i8 @vmsk_sgt_and_sgt_v8i8(<8 x i8> %a, <8 x i8> %b, <8 x i8> %c, <8 x i8> ; CHECK-NEXT: vslt.b $vr0, $vr1, $vr0 ; CHECK-NEXT: vslt.b $vr1, $vr3, $vr2 ; CHECK-NEXT: vand.v $vr0, $vr0, $vr1 -; CHECK-NEXT: vilvl.b $vr0, $vr0, $vr0 -; CHECK-NEXT: vslli.h $vr0, $vr0, 8 +; CHECK-NEXT: vsllwil.h.b $vr0, $vr0, 0 ; CHECK-NEXT: vmskltz.h $vr0, $vr0 ; CHECK-NEXT: vpickve2gr.hu $a0, $vr0, 0 ; CHECK-NEXT: ret @@ -594,9 +582,8 @@ define i4 @vmsk_eq_allzeros_v4i8(<4 x i8> %a) { ; CHECK-LABEL: vmsk_eq_allzeros_v4i8: ; CHECK: # %bb.0: ; CHECK-NEXT: vseqi.b $vr0, $vr0, 0 -; CHECK-NEXT: vilvl.b $vr0, $vr0, $vr0 -; CHECK-NEXT: vilvl.h $vr0, $vr0, $vr0 -; CHECK-NEXT: vslli.w $vr0, $vr0, 24 +; CHECK-NEXT: vsllwil.h.b $vr0, $vr0, 0 +; CHECK-NEXT: vsllwil.w.h $vr0, $vr0, 0 ; CHECK-NEXT: vmskltz.w $vr0, $vr0 ; CHECK-NEXT: vpickve2gr.hu $a0, $vr0, 0 ; CHECK-NEXT: ret |