; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32 ; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64 define <4 x i16> @shuffle_v4i16(<4 x i16> %x, <4 x i16> %y) { ; CHECK-LABEL: shuffle_v4i16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma ; CHECK-NEXT: vmv.v.i v0, 11 ; CHECK-NEXT: vmerge.vvm v8, v9, v8, v0 ; CHECK-NEXT: ret %s = shufflevector <4 x i16> %x, <4 x i16> %y, <4 x i32> ret <4 x i16> %s } define <8 x i32> @shuffle_v8i32(<8 x i32> %x, <8 x i32> %y) { ; CHECK-LABEL: shuffle_v8i32: ; CHECK: # %bb.0: ; CHECK-NEXT: li a0, 203 ; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma ; CHECK-NEXT: vmv.s.x v0, a0 ; CHECK-NEXT: vmerge.vvm v8, v10, v8, v0 ; CHECK-NEXT: ret %s = shufflevector <8 x i32> %x, <8 x i32> %y, <8 x i32> ret <8 x i32> %s } define <4 x i16> @shuffle_xv_v4i16(<4 x i16> %x) { ; CHECK-LABEL: shuffle_xv_v4i16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma ; CHECK-NEXT: vmv.v.i v0, 9 ; CHECK-NEXT: vmerge.vim v8, v8, 5, v0 ; CHECK-NEXT: ret %s = shufflevector <4 x i16> , <4 x i16> %x, <4 x i32> ret <4 x i16> %s } define <4 x i16> @shuffle_vx_v4i16(<4 x i16> %x) { ; CHECK-LABEL: shuffle_vx_v4i16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma ; CHECK-NEXT: vmv.v.i v0, 6 ; CHECK-NEXT: vmerge.vim v8, v8, 5, v0 ; CHECK-NEXT: ret %s = shufflevector <4 x i16> %x, <4 x i16> , <4 x i32> ret <4 x i16> %s } define <4 x i16> @vrgather_permute_shuffle_vu_v4i16(<4 x i16> %x) { ; CHECK-LABEL: vrgather_permute_shuffle_vu_v4i16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma ; CHECK-NEXT: vslidedown.vi v9, v8, 1 ; CHECK-NEXT: vslideup.vi v9, v8, 2 ; CHECK-NEXT: vmv1r.v v8, v9 ; CHECK-NEXT: ret %s = shufflevector <4 x i16> %x, <4 x i16> poison, <4 x i32> ret <4 x i16> %s } define <4 x i16> @vrgather_permute_shuffle_uv_v4i16(<4 x i16> %x) { ; CHECK-LABEL: vrgather_permute_shuffle_uv_v4i16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma ; CHECK-NEXT: vslidedown.vi v9, v8, 1 ; CHECK-NEXT: vslideup.vi v9, v8, 2 ; CHECK-NEXT: vmv1r.v v8, v9 ; CHECK-NEXT: ret %s = shufflevector <4 x i16> poison, <4 x i16> %x, <4 x i32> ret <4 x i16> %s } define <4 x i16> @vrgather_shuffle_vv_v4i16(<4 x i16> %x, <4 x i16> %y) { ; CHECK-LABEL: vrgather_shuffle_vv_v4i16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, mu ; CHECK-NEXT: vslidedown.vi v10, v8, 1 ; CHECK-NEXT: vmv.v.i v0, 8 ; CHECK-NEXT: vslideup.vi v10, v8, 2 ; CHECK-NEXT: vrgather.vi v10, v9, 1, v0.t ; CHECK-NEXT: vmv1r.v v8, v10 ; CHECK-NEXT: ret %s = shufflevector <4 x i16> %x, <4 x i16> %y, <4 x i32> ret <4 x i16> %s } define <4 x i16> @vrgather_shuffle_xv_v4i16(<4 x i16> %x) { ; CHECK-LABEL: vrgather_shuffle_xv_v4i16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, mu ; CHECK-NEXT: vmv.v.i v0, 8 ; CHECK-NEXT: vmv1r.v v9, v8 ; CHECK-NEXT: vslideup.vi v9, v8, 2, v0.t ; CHECK-NEXT: vmv.v.i v0, 12 ; CHECK-NEXT: vmv.v.i v8, 5 ; CHECK-NEXT: vmerge.vvm v8, v8, v9, v0 ; CHECK-NEXT: ret %s = shufflevector <4 x i16> , <4 x i16> %x, <4 x i32> ret <4 x i16> %s } define <4 x i16> @vrgather_shuffle_vx_v4i16(<4 x i16> %x) { ; CHECK-LABEL: vrgather_shuffle_vx_v4i16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, mu ; CHECK-NEXT: vmv.v.i v0, 2 ; CHECK-NEXT: vslidedown.vi v8, v8, 2, v0.t ; CHECK-NEXT: vmv.v.i v0, 3 ; CHECK-NEXT: vmv.v.i v9, 5 ; CHECK-NEXT: vmerge.vvm v8, v9, v8, v0 ; CHECK-NEXT: ret %s = shufflevector <4 x i16> %x, <4 x i16> , <4 x i32> ret <4 x i16> %s } define <8 x i64> @vrgather_permute_shuffle_vu_v8i64(<8 x i64> %x) { ; CHECK-LABEL: vrgather_permute_shuffle_vu_v8i64: ; CHECK: # %bb.0: ; CHECK-NEXT: lui a0, %hi(.LCPI9_0) ; CHECK-NEXT: addi a0, a0, %lo(.LCPI9_0) ; CHECK-NEXT: vsetivli zero, 8, e64, m4, ta, ma ; CHECK-NEXT: vle16.v v16, (a0) ; CHECK-NEXT: vrgatherei16.vv v12, v8, v16 ; CHECK-NEXT: vmv.v.v v8, v12 ; CHECK-NEXT: ret %s = shufflevector <8 x i64> %x, <8 x i64> poison, <8 x i32> ret <8 x i64> %s } define <8 x i64> @vrgather_permute_shuffle_uv_v8i64(<8 x i64> %x) { ; CHECK-LABEL: vrgather_permute_shuffle_uv_v8i64: ; CHECK: # %bb.0: ; CHECK-NEXT: lui a0, %hi(.LCPI10_0) ; CHECK-NEXT: addi a0, a0, %lo(.LCPI10_0) ; CHECK-NEXT: vsetivli zero, 8, e64, m4, ta, ma ; CHECK-NEXT: vle16.v v16, (a0) ; CHECK-NEXT: vrgatherei16.vv v12, v8, v16 ; CHECK-NEXT: vmv.v.v v8, v12 ; CHECK-NEXT: ret %s = shufflevector <8 x i64> poison, <8 x i64> %x, <8 x i32> ret <8 x i64> %s } define <8 x i64> @vrgather_shuffle_vv_v8i64(<8 x i64> %x, <8 x i64> %y) { ; RV32-LABEL: vrgather_shuffle_vv_v8i64: ; RV32: # %bb.0: ; RV32-NEXT: lui a0, %hi(.LCPI11_0) ; RV32-NEXT: addi a0, a0, %lo(.LCPI11_0) ; RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; RV32-NEXT: vle16.v v20, (a0) ; RV32-NEXT: vmv.v.i v21, 2 ; RV32-NEXT: li a0, 164 ; RV32-NEXT: vsetvli zero, zero, e64, m4, ta, ma ; RV32-NEXT: vrgatherei16.vv v16, v8, v20 ; RV32-NEXT: vmv.s.x v0, a0 ; RV32-NEXT: li a0, 5 ; RV32-NEXT: vsetvli zero, zero, e16, m1, ta, ma ; RV32-NEXT: vslide1down.vx v8, v21, a0 ; RV32-NEXT: vsetvli zero, zero, e64, m4, ta, mu ; RV32-NEXT: vrgatherei16.vv v16, v12, v8, v0.t ; RV32-NEXT: vmv.v.v v8, v16 ; RV32-NEXT: ret ; ; RV64-LABEL: vrgather_shuffle_vv_v8i64: ; RV64: # %bb.0: ; RV64-NEXT: li a0, 164 ; RV64-NEXT: vsetivli zero, 2, e64, m1, ta, ma ; RV64-NEXT: vmv.s.x v0, a0 ; RV64-NEXT: lui a0, 327683 ; RV64-NEXT: slli a0, a0, 3 ; RV64-NEXT: addi a0, a0, 1 ; RV64-NEXT: slli a0, a0, 17 ; RV64-NEXT: addi a0, a0, 1 ; RV64-NEXT: vmv.v.x v20, a0 ; RV64-NEXT: lui a0, 163841 ; RV64-NEXT: slli a0, a0, 4 ; RV64-NEXT: addi a0, a0, 1 ; RV64-NEXT: slli a0, a0, 17 ; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma ; RV64-NEXT: vrgatherei16.vv v16, v8, v20 ; RV64-NEXT: vsetivli zero, 2, e64, m1, ta, ma ; RV64-NEXT: vmv.v.x v8, a0 ; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, mu ; RV64-NEXT: vrgatherei16.vv v16, v12, v8, v0.t ; RV64-NEXT: vmv.v.v v8, v16 ; RV64-NEXT: ret %s = shufflevector <8 x i64> %x, <8 x i64> %y, <8 x i32> ret <8 x i64> %s } define <8 x i64> @vrgather_shuffle_xv_v8i64(<8 x i64> %x) { ; RV32-LABEL: vrgather_shuffle_xv_v8i64: ; RV32: # %bb.0: ; RV32-NEXT: lui a0, %hi(.LCPI12_0) ; RV32-NEXT: addi a0, a0, %lo(.LCPI12_0) ; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, mu ; RV32-NEXT: vle16.v v20, (a0) ; RV32-NEXT: lui a0, %hi(.LCPI12_1) ; RV32-NEXT: addi a0, a0, %lo(.LCPI12_1) ; RV32-NEXT: vle16.v v21, (a0) ; RV32-NEXT: vmv.v.i v16, -1 ; RV32-NEXT: li a0, 113 ; RV32-NEXT: vmv.s.x v0, a0 ; RV32-NEXT: vrgatherei16.vv v12, v16, v21 ; RV32-NEXT: vrgatherei16.vv v12, v8, v20, v0.t ; RV32-NEXT: vmv.v.v v8, v12 ; RV32-NEXT: ret ; ; RV64-LABEL: vrgather_shuffle_xv_v8i64: ; RV64: # %bb.0: ; RV64-NEXT: li a0, 113 ; RV64-NEXT: vsetivli zero, 2, e64, m1, ta, ma ; RV64-NEXT: vmv.s.x v0, a0 ; RV64-NEXT: lui a0, 98305 ; RV64-NEXT: slli a0, a0, 6 ; RV64-NEXT: vmv.v.x v16, a0 ; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, mu ; RV64-NEXT: vmv.v.i v12, -1 ; RV64-NEXT: vrgatherei16.vv v12, v8, v16, v0.t ; RV64-NEXT: vmv.v.v v8, v12 ; RV64-NEXT: ret %s = shufflevector <8 x i64> , <8 x i64> %x, <8 x i32> ret <8 x i64> %s } define <8 x i64> @vrgather_shuffle_vx_v8i64(<8 x i64> %x) { ; RV32-LABEL: vrgather_shuffle_vx_v8i64: ; RV32: # %bb.0: ; RV32-NEXT: lui a0, %hi(.LCPI13_1) ; RV32-NEXT: addi a0, a0, %lo(.LCPI13_1) ; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, mu ; RV32-NEXT: vle16.v v16, (a0) ; RV32-NEXT: lui a0, %hi(.LCPI13_0) ; RV32-NEXT: addi a0, a0, %lo(.LCPI13_0) ; RV32-NEXT: vle16.v v17, (a0) ; RV32-NEXT: li a0, 140 ; RV32-NEXT: vmv.s.x v0, a0 ; RV32-NEXT: vrgatherei16.vv v12, v8, v16 ; RV32-NEXT: vmv.v.i v8, 5 ; RV32-NEXT: vrgatherei16.vv v12, v8, v17, v0.t ; RV32-NEXT: vmv.v.v v8, v12 ; RV32-NEXT: ret ; ; RV64-LABEL: vrgather_shuffle_vx_v8i64: ; RV64: # %bb.0: ; RV64-NEXT: lui a0, %hi(.LCPI13_0) ; RV64-NEXT: addi a0, a0, %lo(.LCPI13_0) ; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, mu ; RV64-NEXT: vle16.v v16, (a0) ; RV64-NEXT: li a0, 115 ; RV64-NEXT: vmv.s.x v0, a0 ; RV64-NEXT: vmv.v.i v12, 5 ; RV64-NEXT: vrgatherei16.vv v12, v8, v16, v0.t ; RV64-NEXT: vmv.v.v v8, v12 ; RV64-NEXT: ret %s = shufflevector <8 x i64> %x, <8 x i64> , <8 x i32> ret <8 x i64> %s } define <4 x i16> @shuffle_v8i16_to_vslidedown_1(<8 x i16> %x) { ; CHECK-LABEL: shuffle_v8i16_to_vslidedown_1: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; CHECK-NEXT: vslidedown.vi v8, v8, 1 ; CHECK-NEXT: ret entry: %s = shufflevector <8 x i16> %x, <8 x i16> poison, <4 x i32> ret <4 x i16> %s } define <4 x i16> @shuffle_v8i16_to_vslidedown_3(<8 x i16> %x) { ; CHECK-LABEL: shuffle_v8i16_to_vslidedown_3: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; CHECK-NEXT: vslidedown.vi v8, v8, 3 ; CHECK-NEXT: ret entry: %s = shufflevector <8 x i16> %x, <8 x i16> poison, <4 x i32> ret <4 x i16> %s } define <2 x i32> @shuffle_v4i32_to_vslidedown(<4 x i32> %x) { ; CHECK-LABEL: shuffle_v4i32_to_vslidedown: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; CHECK-NEXT: vslidedown.vi v8, v8, 1 ; CHECK-NEXT: ret entry: %s = shufflevector <4 x i32> %x, <4 x i32> poison, <2 x i32> ret <2 x i32> %s } define <4 x i8> @interleave_shuffles(<4 x i8> %x) { ; CHECK-LABEL: interleave_shuffles: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma ; CHECK-NEXT: vrgather.vi v9, v8, 0 ; CHECK-NEXT: vrgather.vi v10, v8, 1 ; CHECK-NEXT: vsetivli zero, 2, e8, mf8, ta, ma ; CHECK-NEXT: vwaddu.vv v8, v9, v10 ; CHECK-NEXT: li a0, -1 ; CHECK-NEXT: vwmaccu.vx v8, a0, v10 ; CHECK-NEXT: ret %y = shufflevector <4 x i8> %x, <4 x i8> poison, <4 x i32> %z = shufflevector <4 x i8> %x, <4 x i8> poison, <4 x i32> %w = shufflevector <4 x i8> %y, <4 x i8> %z, <4 x i32> ret <4 x i8> %w } define <8 x i8> @splat_ve4(<8 x i8> %v) { ; CHECK-LABEL: splat_ve4: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma ; CHECK-NEXT: vrgather.vi v9, v8, 4 ; CHECK-NEXT: vmv1r.v v8, v9 ; CHECK-NEXT: ret %shuff = shufflevector <8 x i8> %v, <8 x i8> poison, <8 x i32> ret <8 x i8> %shuff } define <8 x i8> @splat_ve4_ins_i0ve2(<8 x i8> %v) { ; CHECK-LABEL: splat_ve4_ins_i0ve2: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma ; CHECK-NEXT: vmv.v.i v10, 4 ; CHECK-NEXT: li a0, 2 ; CHECK-NEXT: vsetvli zero, zero, e8, mf2, tu, ma ; CHECK-NEXT: vmv.s.x v10, a0 ; CHECK-NEXT: vsetvli zero, zero, e8, mf2, ta, ma ; CHECK-NEXT: vrgather.vv v9, v8, v10 ; CHECK-NEXT: vmv1r.v v8, v9 ; CHECK-NEXT: ret %shuff = shufflevector <8 x i8> %v, <8 x i8> poison, <8 x i32> ret <8 x i8> %shuff } define <8 x i8> @splat_ve4_ins_i1ve3(<8 x i8> %v) { ; CHECK-LABEL: splat_ve4_ins_i1ve3: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 2, e8, mf2, ta, ma ; CHECK-NEXT: vmv.v.i v9, 3 ; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma ; CHECK-NEXT: vmv.v.i v10, 4 ; CHECK-NEXT: vsetivli zero, 2, e8, mf2, tu, ma ; CHECK-NEXT: vslideup.vi v10, v9, 1 ; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma ; CHECK-NEXT: vrgather.vv v9, v8, v10 ; CHECK-NEXT: vmv1r.v v8, v9 ; CHECK-NEXT: ret %shuff = shufflevector <8 x i8> %v, <8 x i8> poison, <8 x i32> ret <8 x i8> %shuff } define <8 x i8> @splat_ve2_we0(<8 x i8> %v, <8 x i8> %w) { ; CHECK-LABEL: splat_ve2_we0: ; CHECK: # %bb.0: ; CHECK-NEXT: li a0, 66 ; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, mu ; CHECK-NEXT: vmv.s.x v0, a0 ; CHECK-NEXT: vrgather.vi v10, v8, 2 ; CHECK-NEXT: vrgather.vi v10, v9, 0, v0.t ; CHECK-NEXT: vmv1r.v v8, v10 ; CHECK-NEXT: ret %shuff = shufflevector <8 x i8> %v, <8 x i8> %w, <8 x i32> ret <8 x i8> %shuff } define <8 x i8> @splat_ve2_we0_ins_i0ve4(<8 x i8> %v, <8 x i8> %w) { ; CHECK-LABEL: splat_ve2_we0_ins_i0ve4: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma ; CHECK-NEXT: vmv.v.i v11, 2 ; CHECK-NEXT: li a0, 4 ; CHECK-NEXT: vsetvli zero, zero, e8, mf2, tu, ma ; CHECK-NEXT: vmv.s.x v11, a0 ; CHECK-NEXT: li a0, 66 ; CHECK-NEXT: vmv.s.x v0, a0 ; CHECK-NEXT: vsetvli zero, zero, e8, mf2, ta, mu ; CHECK-NEXT: vrgather.vv v10, v8, v11 ; CHECK-NEXT: vrgather.vi v10, v9, 0, v0.t ; CHECK-NEXT: vmv1r.v v8, v10 ; CHECK-NEXT: ret %shuff = shufflevector <8 x i8> %v, <8 x i8> %w, <8 x i32> ret <8 x i8> %shuff } define <8 x i8> @splat_ve2_we0_ins_i0we4(<8 x i8> %v, <8 x i8> %w) { ; CHECK-LABEL: splat_ve2_we0_ins_i0we4: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma ; CHECK-NEXT: vmv.v.i v11, 4 ; CHECK-NEXT: li a0, 67 ; CHECK-NEXT: vmv.s.x v0, a0 ; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, mu ; CHECK-NEXT: vrgather.vi v10, v8, 2 ; CHECK-NEXT: vrgather.vv v10, v9, v11, v0.t ; CHECK-NEXT: vmv1r.v v8, v10 ; CHECK-NEXT: ret %shuff = shufflevector <8 x i8> %v, <8 x i8> %w, <8 x i32> ret <8 x i8> %shuff } define <8 x i8> @splat_ve2_we0_ins_i2ve4(<8 x i8> %v, <8 x i8> %w) { ; CHECK-LABEL: splat_ve2_we0_ins_i2ve4: ; CHECK: # %bb.0: ; CHECK-NEXT: lui a0, 8256 ; CHECK-NEXT: addi a0, a0, 514 ; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma ; CHECK-NEXT: vmv.v.x v11, a0 ; CHECK-NEXT: li a0, 66 ; CHECK-NEXT: vmv.s.x v0, a0 ; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, mu ; CHECK-NEXT: vrgather.vv v10, v8, v11 ; CHECK-NEXT: vrgather.vi v10, v9, 0, v0.t ; CHECK-NEXT: vmv1r.v v8, v10 ; CHECK-NEXT: ret %shuff = shufflevector <8 x i8> %v, <8 x i8> %w, <8 x i32> ret <8 x i8> %shuff } define <8 x i8> @splat_ve2_we0_ins_i2we4(<8 x i8> %v, <8 x i8> %w) { ; CHECK-LABEL: splat_ve2_we0_ins_i2we4: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 3, e8, mf2, ta, ma ; CHECK-NEXT: vmv.v.i v10, 4 ; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma ; CHECK-NEXT: vmv.v.i v11, 0 ; CHECK-NEXT: li a0, 70 ; CHECK-NEXT: vsetivli zero, 3, e8, mf2, tu, ma ; CHECK-NEXT: vslideup.vi v11, v10, 2 ; CHECK-NEXT: vmv.s.x v0, a0 ; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, mu ; CHECK-NEXT: vrgather.vi v10, v8, 2 ; CHECK-NEXT: vrgather.vv v10, v9, v11, v0.t ; CHECK-NEXT: vmv1r.v v8, v10 ; CHECK-NEXT: ret %shuff = shufflevector <8 x i8> %v, <8 x i8> %w, <8 x i32> ret <8 x i8> %shuff } define <8 x i8> @splat_ve2_we0_ins_i2ve4_i5we6(<8 x i8> %v, <8 x i8> %w) { ; CHECK-LABEL: splat_ve2_we0_ins_i2ve4_i5we6: ; CHECK: # %bb.0: ; CHECK-NEXT: lui a0, %hi(.LCPI26_0) ; CHECK-NEXT: addi a0, a0, %lo(.LCPI26_0) ; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma ; CHECK-NEXT: vle8.v v10, (a0) ; CHECK-NEXT: li a0, 20 ; CHECK-NEXT: vmv.s.x v0, a0 ; CHECK-NEXT: vmerge.vvm v9, v9, v8, v0 ; CHECK-NEXT: vrgather.vv v8, v9, v10 ; CHECK-NEXT: ret %shuff = shufflevector <8 x i8> %v, <8 x i8> %w, <8 x i32> ret <8 x i8> %shuff } define <8 x i8> @widen_splat_ve3(<4 x i8> %v) { ; CHECK-LABEL: widen_splat_ve3: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma ; CHECK-NEXT: vrgather.vi v9, v8, 3 ; CHECK-NEXT: vmv1r.v v8, v9 ; CHECK-NEXT: ret %shuf = shufflevector <4 x i8> %v, <4 x i8> poison, <8 x i32> ret <8 x i8> %shuf } define <4 x i16> @slidedown_v4i16(<4 x i16> %x) { ; CHECK-LABEL: slidedown_v4i16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma ; CHECK-NEXT: vslidedown.vi v8, v8, 1 ; CHECK-NEXT: ret %s = shufflevector <4 x i16> %x, <4 x i16> poison, <4 x i32> ret <4 x i16> %s } define <8 x i32> @slidedown_v8i32(<8 x i32> %x) { ; CHECK-LABEL: slidedown_v8i32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma ; CHECK-NEXT: vslidedown.vi v8, v8, 3 ; CHECK-NEXT: ret %s = shufflevector <8 x i32> %x, <8 x i32> poison, <8 x i32> ret <8 x i32> %s } define <4 x i16> @slideup_v4i16(<4 x i16> %x) { ; CHECK-LABEL: slideup_v4i16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma ; CHECK-NEXT: vslideup.vi v9, v8, 1 ; CHECK-NEXT: vmv1r.v v8, v9 ; CHECK-NEXT: ret %s = shufflevector <4 x i16> %x, <4 x i16> poison, <4 x i32> ret <4 x i16> %s } define <8 x i32> @slideup_v8i32(<8 x i32> %x) { ; CHECK-LABEL: slideup_v8i32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma ; CHECK-NEXT: vslideup.vi v10, v8, 3 ; CHECK-NEXT: vmv.v.v v8, v10 ; CHECK-NEXT: ret %s = shufflevector <8 x i32> %x, <8 x i32> poison, <8 x i32> ret <8 x i32> %s } define <8 x i16> @splice_unary(<8 x i16> %x) { ; CHECK-LABEL: splice_unary: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; CHECK-NEXT: vslidedown.vi v9, v8, 2 ; CHECK-NEXT: vslideup.vi v9, v8, 6 ; CHECK-NEXT: vmv.v.v v8, v9 ; CHECK-NEXT: ret %s = shufflevector <8 x i16> %x, <8 x i16> poison, <8 x i32> ret <8 x i16> %s } define <8 x i32> @splice_unary2(<8 x i32> %x) { ; CHECK-LABEL: splice_unary2: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma ; CHECK-NEXT: vslidedown.vi v10, v8, 5 ; CHECK-NEXT: vslideup.vi v10, v8, 3 ; CHECK-NEXT: vmv.v.v v8, v10 ; CHECK-NEXT: ret %s = shufflevector <8 x i32> %x, <8 x i32> poison, <8 x i32> ret <8 x i32> %s } define <8 x i16> @splice_binary(<8 x i16> %x, <8 x i16> %y) { ; CHECK-LABEL: splice_binary: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; CHECK-NEXT: vslidedown.vi v8, v8, 2 ; CHECK-NEXT: vslideup.vi v8, v9, 6 ; CHECK-NEXT: ret %s = shufflevector <8 x i16> %x, <8 x i16> %y, <8 x i32> ret <8 x i16> %s } define <8 x i32> @splice_binary2(<8 x i32> %x, <8 x i32> %y) { ; CHECK-LABEL: splice_binary2: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma ; CHECK-NEXT: vslidedown.vi v8, v8, 5 ; CHECK-NEXT: vslideup.vi v8, v10, 3 ; CHECK-NEXT: ret %s = shufflevector <8 x i32> %x, <8 x i32> %y, <8 x i32> ret <8 x i32> %s } define <4 x i16> @shuffle_shuffle_vslidedown(<16 x i16> %0) { ; CHECK-LABEL: shuffle_shuffle_vslidedown: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, ma ; CHECK-NEXT: vslidedown.vi v8, v8, 5 ; CHECK-NEXT: ret entry: %1 = shufflevector <16 x i16> %0, <16 x i16> poison, <8 x i32> %2 = shufflevector <16 x i16> %0, <16 x i16> poison, <8 x i32> %3 = shufflevector <8 x i16> %1, <8 x i16> poison, <4 x i32> %4 = shufflevector <8 x i16> %2, <8 x i16> poison, <4 x i32> %5 = shufflevector <4 x i16> %3, <4 x i16> %4, <4 x i32> ret <4 x i16> %5 } define <8 x i8> @concat_4xi8_start(<8 x i8> %v, <8 x i8> %w) { ; CHECK-LABEL: concat_4xi8_start: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma ; CHECK-NEXT: vslideup.vi v8, v9, 4 ; CHECK-NEXT: ret %res = shufflevector <8 x i8> %v, <8 x i8> %w, <8 x i32> ret <8 x i8> %res } define <8 x i8> @concat_4xi8_start_undef(<8 x i8> %v, <8 x i8> %w) { ; CHECK-LABEL: concat_4xi8_start_undef: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma ; CHECK-NEXT: vslideup.vi v8, v9, 4 ; CHECK-NEXT: ret %res = shufflevector <8 x i8> %v, <8 x i8> %w, <8 x i32> ret <8 x i8> %res } define <8 x i8> @concat_4xi8_start_undef_at_start(<8 x i8> %v, <8 x i8> %w) { ; CHECK-LABEL: concat_4xi8_start_undef_at_start: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma ; CHECK-NEXT: vslideup.vi v8, v9, 4 ; CHECK-NEXT: ret %res = shufflevector <8 x i8> %v, <8 x i8> %w, <8 x i32> ret <8 x i8> %res } define <8 x i8> @merge_start_into_end_non_contiguous(<8 x i8> %v, <8 x i8> %w) { ; CHECK-LABEL: merge_start_into_end_non_contiguous: ; CHECK: # %bb.0: ; CHECK-NEXT: li a0, 144 ; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, mu ; CHECK-NEXT: vmv.s.x v0, a0 ; CHECK-NEXT: vslideup.vi v8, v9, 4, v0.t ; CHECK-NEXT: ret %res = shufflevector <8 x i8> %v, <8 x i8> %w, <8 x i32> ret <8 x i8> %res } define <8 x i8> @merge_end_into_end(<8 x i8> %v, <8 x i8> %w) { ; CHECK-LABEL: merge_end_into_end: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e8, mf2, tu, ma ; CHECK-NEXT: vmv.v.v v9, v8 ; CHECK-NEXT: vmv1r.v v8, v9 ; CHECK-NEXT: ret %res = shufflevector <8 x i8> %v, <8 x i8> %w, <8 x i32> ret <8 x i8> %res } define <8 x i8> @merge_start_into_middle(<8 x i8> %v, <8 x i8> %w) { ; CHECK-LABEL: merge_start_into_middle: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 5, e8, mf2, tu, ma ; CHECK-NEXT: vslideup.vi v8, v9, 1 ; CHECK-NEXT: ret %res = shufflevector <8 x i8> %v, <8 x i8> %w, <8 x i32> ret <8 x i8> %res } define <8 x i8> @merge_start_into_start(<8 x i8> %v, <8 x i8> %w) { ; CHECK-LABEL: merge_start_into_start: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e8, mf2, tu, ma ; CHECK-NEXT: vmv.v.v v8, v9 ; CHECK-NEXT: ret %res = shufflevector <8 x i8> %v, <8 x i8> %w, <8 x i32> ret <8 x i8> %res } define <8 x i8> @merge_slidedown(<8 x i8> %v, <8 x i8> %w) { ; CHECK-LABEL: merge_slidedown: ; CHECK: # %bb.0: ; CHECK-NEXT: li a0, 60 ; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, mu ; CHECK-NEXT: vmv.s.x v0, a0 ; CHECK-NEXT: vslidedown.vi v9, v8, 1, v0.t ; CHECK-NEXT: vmv1r.v v8, v9 ; CHECK-NEXT: ret %res = shufflevector <8 x i8> %v, <8 x i8> %w, <8 x i32> ret <8 x i8> %res } ; This should slide %v down by 2 and %w up by 1 before merging them define <8 x i8> @merge_non_contiguous_slideup_slidedown(<8 x i8> %v, <8 x i8> %w) { ; CHECK-LABEL: merge_non_contiguous_slideup_slidedown: ; CHECK: # %bb.0: ; CHECK-NEXT: li a0, -22 ; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, mu ; CHECK-NEXT: vmv.s.x v0, a0 ; CHECK-NEXT: vslidedown.vi v8, v8, 2 ; CHECK-NEXT: vslideup.vi v8, v9, 1, v0.t ; CHECK-NEXT: ret %res = shufflevector <8 x i8> %v, <8 x i8> %w, <8 x i32> ret <8 x i8> %res } ; This shouldn't generate a vmerge because the elements of %w are not consecutive define <8 x i8> @unmergable(<8 x i8> %v, <8 x i8> %w) { ; CHECK-LABEL: unmergable: ; CHECK: # %bb.0: ; CHECK-NEXT: lui a0, %hi(.LCPI46_0) ; CHECK-NEXT: addi a0, a0, %lo(.LCPI46_0) ; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma ; CHECK-NEXT: vle8.v v10, (a0) ; CHECK-NEXT: li a0, 84 ; CHECK-NEXT: vmv.s.x v0, a0 ; CHECK-NEXT: vmerge.vvm v9, v9, v8, v0 ; CHECK-NEXT: vrgather.vv v8, v9, v10 ; CHECK-NEXT: ret %res = shufflevector <8 x i8> %v, <8 x i8> %w, <8 x i32> ret <8 x i8> %res } ; Make sure we use a vmv.v.i to load the mask constant. define <8 x i32> @shuffle_v8i32_2(<8 x i32> %x, <8 x i32> %y) { ; CHECK-LABEL: shuffle_v8i32_2: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 1, e8, mf8, ta, ma ; CHECK-NEXT: vmv.v.i v0, 13 ; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma ; CHECK-NEXT: vmerge.vvm v8, v10, v8, v0 ; CHECK-NEXT: ret %s = shufflevector <8 x i32> %x, <8 x i32> %y, <8 x i32> ret <8 x i32> %s } ; FIXME: This could be expressed as a vrgather.vv define <8 x i8> @shuffle_v64i8_v8i8(<64 x i8> %wide.vec) { ; CHECK-LABEL: shuffle_v64i8_v8i8: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma ; CHECK-NEXT: vnsrl.wi v12, v8, 0 ; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma ; CHECK-NEXT: vnsrl.wi v8, v12, 0 ; CHECK-NEXT: vsetvli zero, zero, e8, mf2, ta, ma ; CHECK-NEXT: vnsrl.wi v8, v8, 0 ; CHECK-NEXT: ret %s = shufflevector <64 x i8> %wide.vec, <64 x i8> poison, <8 x i32> ret <8 x i8> %s } define <8 x i8> @shuffle_compress_singlesrc_e8(<8 x i8> %v) { ; CHECK-LABEL: shuffle_compress_singlesrc_e8: ; CHECK: # %bb.0: ; CHECK-NEXT: li a0, 181 ; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma ; CHECK-NEXT: vmv.s.x v10, a0 ; CHECK-NEXT: vcompress.vm v9, v8, v10 ; CHECK-NEXT: vmv1r.v v8, v9 ; CHECK-NEXT: ret %out = shufflevector <8 x i8> %v, <8 x i8> poison, <8 x i32> ret <8 x i8> %out } define <8 x i16> @shuffle_compress_singlesrc_e16(<8 x i16> %v) { ; CHECK-LABEL: shuffle_compress_singlesrc_e16: ; CHECK: # %bb.0: ; CHECK-NEXT: li a0, 181 ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; CHECK-NEXT: vmv.s.x v10, a0 ; CHECK-NEXT: vcompress.vm v9, v8, v10 ; CHECK-NEXT: vmv.v.v v8, v9 ; CHECK-NEXT: ret %out = shufflevector <8 x i16> %v, <8 x i16> poison, <8 x i32> ret <8 x i16> %out } define <8 x i32> @shuffle_compress_singlesrc_e32(<8 x i32> %v) { ; CHECK-LABEL: shuffle_compress_singlesrc_e32: ; CHECK: # %bb.0: ; CHECK-NEXT: li a0, 28 ; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, mu ; CHECK-NEXT: vmv.s.x v0, a0 ; CHECK-NEXT: vslidedown.vi v8, v8, 2, v0.t ; CHECK-NEXT: ret %out = shufflevector <8 x i32> %v, <8 x i32> poison, <8 x i32> ret <8 x i32> %out } define <8 x i64> @shuffle_compress_singlesrc_e64(<8 x i64> %v) { ; CHECK-LABEL: shuffle_compress_singlesrc_e64: ; CHECK: # %bb.0: ; CHECK-NEXT: li a0, 181 ; CHECK-NEXT: vsetivli zero, 8, e64, m4, ta, ma ; CHECK-NEXT: vmv.s.x v16, a0 ; CHECK-NEXT: vcompress.vm v12, v8, v16 ; CHECK-NEXT: vmv.v.v v8, v12 ; CHECK-NEXT: ret %out = shufflevector <8 x i64> %v, <8 x i64> poison, <8 x i32> ret <8 x i64> %out } define <8 x i32> @shuffle_compress_singlesrc_gaps_e32(<8 x i32> %v) { ; CHECK-LABEL: shuffle_compress_singlesrc_gaps_e32: ; CHECK: # %bb.0: ; CHECK-NEXT: lui a0, %hi(.LCPI53_0) ; CHECK-NEXT: addi a0, a0, %lo(.LCPI53_0) ; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma ; CHECK-NEXT: vle16.v v12, (a0) ; CHECK-NEXT: vrgatherei16.vv v10, v8, v12 ; CHECK-NEXT: vmv.v.v v8, v10 ; CHECK-NEXT: ret %out = shufflevector <8 x i32> %v, <8 x i32> poison, <8 x i32> ret <8 x i32> %out } define <8 x i32> @shuffle_spread2_singlesrc_e32(<8 x i32> %v) { ; CHECK-LABEL: shuffle_spread2_singlesrc_e32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma ; CHECK-NEXT: vzext.vf2 v10, v8 ; CHECK-NEXT: vmv.v.v v8, v10 ; CHECK-NEXT: ret %out = shufflevector <8 x i32> %v, <8 x i32> poison, <8 x i32> ret <8 x i32> %out } define <8 x i32> @shuffle_spread2_singlesrc_e32_index1(<8 x i32> %v) { ; CHECK-LABEL: shuffle_spread2_singlesrc_e32_index1: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma ; CHECK-NEXT: vzext.vf2 v10, v8 ; CHECK-NEXT: li a0, 32 ; CHECK-NEXT: vsll.vx v8, v10, a0 ; CHECK-NEXT: ret %out = shufflevector <8 x i32> %v, <8 x i32> poison, <8 x i32> ret <8 x i32> %out } define <8 x i32> @shuffle_spread2_singlesrc_e32_index2(<8 x i32> %v) { ; CHECK-LABEL: shuffle_spread2_singlesrc_e32_index2: ; CHECK: # %bb.0: ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; CHECK-NEXT: vid.v v9 ; CHECK-NEXT: srli a0, a0, 2 ; CHECK-NEXT: vsrl.vi v9, v9, 1 ; CHECK-NEXT: vadd.vi v9, v9, -1 ; CHECK-NEXT: vslidedown.vx v10, v9, a0 ; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma ; CHECK-NEXT: vrgatherei16.vv v11, v8, v10 ; CHECK-NEXT: vrgatherei16.vv v10, v8, v9 ; CHECK-NEXT: vmv2r.v v8, v10 ; CHECK-NEXT: ret %out = shufflevector <8 x i32> %v, <8 x i32> poison, <8 x i32> ret <8 x i32> %out } define <8 x i32> @shuffle_spread3_singlesrc_e32(<8 x i32> %v) { ; CHECK-LABEL: shuffle_spread3_singlesrc_e32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma ; CHECK-NEXT: vmv.v.i v9, 0 ; CHECK-NEXT: li a0, 1 ; CHECK-NEXT: vslide1down.vx v9, v9, a0 ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: srli a0, a0, 3 ; CHECK-NEXT: vslidedown.vx v10, v9, a0 ; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, ma ; CHECK-NEXT: vrgatherei16.vv v11, v8, v10 ; CHECK-NEXT: vrgatherei16.vv v10, v8, v9 ; CHECK-NEXT: vmv2r.v v8, v10 ; CHECK-NEXT: ret %out = shufflevector <8 x i32> %v, <8 x i32> poison, <8 x i32> ret <8 x i32> %out } define <8 x i32> @shuffle_spread4_singlesrc_e32(<8 x i32> %v) { ; CHECK-LABEL: shuffle_spread4_singlesrc_e32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma ; CHECK-NEXT: vmv2r.v v10, v8 ; CHECK-NEXT: vslideup.vi v10, v8, 3 ; CHECK-NEXT: vmv.v.v v8, v10 ; CHECK-NEXT: ret %out = shufflevector <8 x i32> %v, <8 x i32> poison, <8 x i32> ret <8 x i32> %out } define <16 x i8> @shuffle_spread4_singlesrc_e8_idx0(<16 x i8> %v) { ; CHECK-LABEL: shuffle_spread4_singlesrc_e8_idx0: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; CHECK-NEXT: vzext.vf4 v9, v8 ; CHECK-NEXT: vmv.v.v v8, v9 ; CHECK-NEXT: ret %out = shufflevector <16 x i8> %v, <16 x i8> poison, <16 x i32> ret <16 x i8> %out } define <16 x i8> @shuffle_spread4_singlesrc_e8_idx1(<16 x i8> %v) { ; CHECK-LABEL: shuffle_spread4_singlesrc_e8_idx1: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; CHECK-NEXT: vzext.vf4 v9, v8 ; CHECK-NEXT: vsll.vi v8, v9, 8 ; CHECK-NEXT: ret %out = shufflevector <16 x i8> %v, <16 x i8> poison, <16 x i32> ret <16 x i8> %out } define <16 x i8> @shuffle_spread4_singlesrc_e8_idx2(<16 x i8> %v) { ; CHECK-LABEL: shuffle_spread4_singlesrc_e8_idx2: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; CHECK-NEXT: vzext.vf4 v9, v8 ; CHECK-NEXT: vsll.vi v8, v9, 16 ; CHECK-NEXT: ret %out = shufflevector <16 x i8> %v, <16 x i8> poison, <16 x i32> ret <16 x i8> %out } define <16 x i8> @shuffle_spread4_singlesrc_e8_idx3(<16 x i8> %v) { ; CHECK-LABEL: shuffle_spread4_singlesrc_e8_idx3: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; CHECK-NEXT: vzext.vf4 v9, v8 ; CHECK-NEXT: vsll.vi v8, v9, 24 ; CHECK-NEXT: ret %out = shufflevector <16 x i8> %v, <16 x i8> poison, <16 x i32> ret <16 x i8> %out } define <16 x i8> @shuffle_spread4_singlesrc_e8_idx4(<16 x i8> %v) { ; CHECK-LABEL: shuffle_spread4_singlesrc_e8_idx4: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma ; CHECK-NEXT: vid.v v9 ; CHECK-NEXT: vsrl.vi v9, v9, 2 ; CHECK-NEXT: vadd.vi v10, v9, -1 ; CHECK-NEXT: vrgather.vv v9, v8, v10 ; CHECK-NEXT: vmv.v.v v8, v9 ; CHECK-NEXT: ret %out = shufflevector <16 x i8> %v, <16 x i8> poison, <16 x i32> ret <16 x i8> %out } define <32 x i8> @shuffle_spread8_singlesrc_e8(<32 x i8> %v) { ; CHECK-LABEL: shuffle_spread8_singlesrc_e8: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma ; CHECK-NEXT: vzext.vf8 v10, v8 ; CHECK-NEXT: vmv.v.v v8, v10 ; CHECK-NEXT: ret %out = shufflevector <32 x i8> %v, <32 x i8> poison, <32 x i32> ret <32 x i8> %out } define <8 x i32> @shuffle_decompress_singlesrc_e32(<8 x i32> %v) { ; CHECK-LABEL: shuffle_decompress_singlesrc_e32: ; CHECK: # %bb.0: ; CHECK-NEXT: lui a0, %hi(.LCPI65_0) ; CHECK-NEXT: addi a0, a0, %lo(.LCPI65_0) ; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma ; CHECK-NEXT: vle16.v v12, (a0) ; CHECK-NEXT: vrgatherei16.vv v10, v8, v12 ; CHECK-NEXT: vmv.v.v v8, v10 ; CHECK-NEXT: ret %out = shufflevector <8 x i32> %v, <8 x i32> poison, <8 x i32> ret <8 x i32> %out } define <8 x i8> @shuffle_decompress_singlesrc_e8(<8 x i8> %v) { ; CHECK-LABEL: shuffle_decompress_singlesrc_e8: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma ; CHECK-NEXT: vmv1r.v v9, v8 ; CHECK-NEXT: vslideup.vi v9, v8, 3 ; CHECK-NEXT: vmv1r.v v8, v9 ; CHECK-NEXT: ret %out = shufflevector <8 x i8> %v, <8 x i8> poison, <8 x i32> ret <8 x i8> %out } define <8 x i32> @shuffle_repeat2_singlesrc_e32(<8 x i32> %v) { ; CHECK-LABEL: shuffle_repeat2_singlesrc_e32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; CHECK-NEXT: vwaddu.vv v10, v8, v8 ; CHECK-NEXT: li a0, -1 ; CHECK-NEXT: vwmaccu.vx v10, a0, v8 ; CHECK-NEXT: vmv2r.v v8, v10 ; CHECK-NEXT: ret %out = shufflevector <8 x i32> %v, <8 x i32> poison, <8 x i32> ret <8 x i32> %out } define <8 x i32> @shuffle_repeat3_singlesrc_e32(<8 x i32> %v) { ; CHECK-LABEL: shuffle_repeat3_singlesrc_e32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; CHECK-NEXT: vmv.v.i v0, 7 ; CHECK-NEXT: vmv.v.i v9, 1 ; CHECK-NEXT: li a0, 192 ; CHECK-NEXT: vmerge.vim v9, v9, 0, v0 ; CHECK-NEXT: vmv.s.x v0, a0 ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: vmerge.vim v9, v9, 2, v0 ; CHECK-NEXT: srli a0, a0, 2 ; CHECK-NEXT: vslidedown.vx v10, v9, a0 ; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma ; CHECK-NEXT: vrgatherei16.vv v11, v8, v10 ; CHECK-NEXT: vrgatherei16.vv v10, v8, v9 ; CHECK-NEXT: vmv2r.v v8, v10 ; CHECK-NEXT: ret %out = shufflevector <8 x i32> %v, <8 x i32> poison, <8 x i32> ret <8 x i32> %out } define <8 x i32> @shuffle_repeat4_singlesrc_e32(<8 x i32> %v) { ; CHECK-LABEL: shuffle_repeat4_singlesrc_e32: ; CHECK: # %bb.0: ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; CHECK-NEXT: vid.v v9 ; CHECK-NEXT: srli a0, a0, 2 ; CHECK-NEXT: vsrl.vi v9, v9, 2 ; CHECK-NEXT: vslidedown.vx v10, v9, a0 ; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma ; CHECK-NEXT: vrgatherei16.vv v11, v8, v10 ; CHECK-NEXT: vrgatherei16.vv v10, v8, v9 ; CHECK-NEXT: vmv2r.v v8, v10 ; CHECK-NEXT: ret %out = shufflevector <8 x i32> %v, <8 x i32> poison, <8 x i32> ret <8 x i32> %out } define <16 x i32> @shuffle_disjoint_lanes(<16 x i32> %v, <16 x i32> %w) { ; CHECK-LABEL: shuffle_disjoint_lanes: ; CHECK: # %bb.0: ; CHECK-NEXT: lui a0, %hi(.LCPI70_0) ; CHECK-NEXT: addi a0, a0, %lo(.LCPI70_0) ; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, ma ; CHECK-NEXT: vle8.v v18, (a0) ; CHECK-NEXT: lui a0, 11 ; CHECK-NEXT: addi a0, a0, -1366 ; CHECK-NEXT: vmv.s.x v0, a0 ; CHECK-NEXT: vmerge.vvm v12, v12, v8, v0 ; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma ; CHECK-NEXT: vsext.vf2 v16, v18 ; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma ; CHECK-NEXT: vrgatherei16.vv v8, v12, v16 ; CHECK-NEXT: ret %out = shufflevector <16 x i32> %v, <16 x i32> %w, <16 x i32> ret <16 x i32> %out } define <16 x i32> @shuffle_disjoint_lanes_one_identity(<16 x i32> %v, <16 x i32> %w) { ; CHECK-LABEL: shuffle_disjoint_lanes_one_identity: ; CHECK: # %bb.0: ; CHECK-NEXT: lui a0, %hi(.LCPI71_0) ; CHECK-NEXT: addi a0, a0, %lo(.LCPI71_0) ; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, mu ; CHECK-NEXT: vle16.v v16, (a0) ; CHECK-NEXT: li a0, -272 ; CHECK-NEXT: vmv.s.x v0, a0 ; CHECK-NEXT: vrgatherei16.vv v8, v12, v16, v0.t ; CHECK-NEXT: ret %out = shufflevector <16 x i32> %v, <16 x i32> %w, <16 x i32> ret <16 x i32> %out } define <16 x i32> @shuffle_disjoint_lanes_one_broadcast(<16 x i32> %v, <16 x i32> %w) { ; CHECK-LABEL: shuffle_disjoint_lanes_one_broadcast: ; CHECK: # %bb.0: ; CHECK-NEXT: lui a0, %hi(.LCPI72_0) ; CHECK-NEXT: addi a0, a0, %lo(.LCPI72_0) ; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, mu ; CHECK-NEXT: vle16.v v20, (a0) ; CHECK-NEXT: lui a0, 15 ; CHECK-NEXT: addi a0, a0, 240 ; CHECK-NEXT: vmv.s.x v0, a0 ; CHECK-NEXT: vrgather.vi v16, v8, 7 ; CHECK-NEXT: vrgatherei16.vv v16, v12, v20, v0.t ; CHECK-NEXT: vmv.v.v v8, v16 ; CHECK-NEXT: ret %out = shufflevector <16 x i32> %v, <16 x i32> %w, <16 x i32> ret <16 x i32> %out } define <16 x i32> @shuffle_disjoint_lanes_one_splat(i32 %v, <16 x i32> %w) { ; CHECK-LABEL: shuffle_disjoint_lanes_one_splat: ; CHECK: # %bb.0: ; CHECK-NEXT: lui a1, %hi(.LCPI73_0) ; CHECK-NEXT: addi a1, a1, %lo(.LCPI73_0) ; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, mu ; CHECK-NEXT: vle16.v v16, (a1) ; CHECK-NEXT: lui a1, 15 ; CHECK-NEXT: addi a1, a1, 240 ; CHECK-NEXT: vmv.s.x v0, a1 ; CHECK-NEXT: vmv.v.x v12, a0 ; CHECK-NEXT: vrgatherei16.vv v12, v8, v16, v0.t ; CHECK-NEXT: vmv.v.v v8, v12 ; CHECK-NEXT: ret %head = insertelement <16 x i32> poison, i32 %v, i32 0 %splat = shufflevector <16 x i32> %head, <16 x i32> poison, <16 x i32> zeroinitializer %out = shufflevector <16 x i32> %splat, <16 x i32> %w, <16 x i32> ret <16 x i32> %out } define <4 x i128> @shuffle_i128(<4 x i128> %a) { ; RV32-LABEL: shuffle_i128: ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -128 ; RV32-NEXT: .cfi_def_cfa_offset 128 ; RV32-NEXT: sw ra, 124(sp) # 4-byte Folded Spill ; RV32-NEXT: sw s0, 120(sp) # 4-byte Folded Spill ; RV32-NEXT: .cfi_offset ra, -4 ; RV32-NEXT: .cfi_offset s0, -8 ; RV32-NEXT: addi s0, sp, 128 ; RV32-NEXT: .cfi_def_cfa s0, 0 ; RV32-NEXT: andi sp, sp, -64 ; RV32-NEXT: lw a2, 60(a1) ; RV32-NEXT: sw a2, 60(sp) ; RV32-NEXT: lw a2, 56(a1) ; RV32-NEXT: sw a2, 56(sp) ; RV32-NEXT: lw a2, 52(a1) ; RV32-NEXT: sw a2, 52(sp) ; RV32-NEXT: lw a2, 48(a1) ; RV32-NEXT: sw a2, 48(sp) ; RV32-NEXT: lw a2, 44(a1) ; RV32-NEXT: sw a2, 44(sp) ; RV32-NEXT: lw a2, 40(a1) ; RV32-NEXT: sw a2, 40(sp) ; RV32-NEXT: lw a2, 36(a1) ; RV32-NEXT: sw a2, 36(sp) ; RV32-NEXT: lw a2, 32(a1) ; RV32-NEXT: sw a2, 32(sp) ; RV32-NEXT: lw a2, 12(a1) ; RV32-NEXT: sw a2, 12(sp) ; RV32-NEXT: lw a2, 8(a1) ; RV32-NEXT: sw a2, 8(sp) ; RV32-NEXT: lw a2, 4(a1) ; RV32-NEXT: sw a2, 4(sp) ; RV32-NEXT: lw a1, 0(a1) ; RV32-NEXT: mv a2, sp ; RV32-NEXT: sw a1, 0(sp) ; RV32-NEXT: lui a1, %hi(.LCPI74_0) ; RV32-NEXT: addi a1, a1, %lo(.LCPI74_0) ; RV32-NEXT: vsetivli zero, 16, e32, m4, ta, ma ; RV32-NEXT: vle32.v v8, (a2) ; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, ma ; RV32-NEXT: vle16.v v16, (a1) ; RV32-NEXT: vrgatherei16.vv v12, v8, v16 ; RV32-NEXT: vse64.v v12, (a0) ; RV32-NEXT: addi sp, s0, -128 ; RV32-NEXT: .cfi_def_cfa sp, 128 ; RV32-NEXT: lw ra, 124(sp) # 4-byte Folded Reload ; RV32-NEXT: lw s0, 120(sp) # 4-byte Folded Reload ; RV32-NEXT: .cfi_restore ra ; RV32-NEXT: .cfi_restore s0 ; RV32-NEXT: addi sp, sp, 128 ; RV32-NEXT: .cfi_def_cfa_offset 0 ; RV32-NEXT: ret ; ; RV64-LABEL: shuffle_i128: ; RV64: # %bb.0: ; RV64-NEXT: addi sp, sp, -128 ; RV64-NEXT: .cfi_def_cfa_offset 128 ; RV64-NEXT: sd ra, 120(sp) # 8-byte Folded Spill ; RV64-NEXT: sd s0, 112(sp) # 8-byte Folded Spill ; RV64-NEXT: .cfi_offset ra, -8 ; RV64-NEXT: .cfi_offset s0, -16 ; RV64-NEXT: addi s0, sp, 128 ; RV64-NEXT: .cfi_def_cfa s0, 0 ; RV64-NEXT: andi sp, sp, -64 ; RV64-NEXT: ld a2, 56(a1) ; RV64-NEXT: sd a2, 56(sp) ; RV64-NEXT: ld a2, 48(a1) ; RV64-NEXT: sd a2, 48(sp) ; RV64-NEXT: ld a2, 40(a1) ; RV64-NEXT: sd a2, 40(sp) ; RV64-NEXT: ld a2, 32(a1) ; RV64-NEXT: sd a2, 32(sp) ; RV64-NEXT: ld a2, 8(a1) ; RV64-NEXT: sd a2, 8(sp) ; RV64-NEXT: ld a1, 0(a1) ; RV64-NEXT: mv a2, sp ; RV64-NEXT: sd a1, 0(sp) ; RV64-NEXT: lui a1, %hi(.LCPI74_0) ; RV64-NEXT: addi a1, a1, %lo(.LCPI74_0) ; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma ; RV64-NEXT: vle64.v v8, (a2) ; RV64-NEXT: vle16.v v16, (a1) ; RV64-NEXT: vrgatherei16.vv v12, v8, v16 ; RV64-NEXT: vse64.v v12, (a0) ; RV64-NEXT: addi sp, s0, -128 ; RV64-NEXT: .cfi_def_cfa sp, 128 ; RV64-NEXT: ld ra, 120(sp) # 8-byte Folded Reload ; RV64-NEXT: ld s0, 112(sp) # 8-byte Folded Reload ; RV64-NEXT: .cfi_restore ra ; RV64-NEXT: .cfi_restore s0 ; RV64-NEXT: addi sp, sp, 128 ; RV64-NEXT: .cfi_def_cfa_offset 0 ; RV64-NEXT: ret %res = shufflevector <4 x i128> %a, <4 x i128> poison, <4 x i32> ret <4 x i128> %res } define void @shuffle_i128_ldst(ptr %p) { ; CHECK-LABEL: shuffle_i128_ldst: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e64, m4, ta, ma ; CHECK-NEXT: vle64.v v8, (a0) ; CHECK-NEXT: lui a1, %hi(.LCPI75_0) ; CHECK-NEXT: addi a1, a1, %lo(.LCPI75_0) ; CHECK-NEXT: vle16.v v16, (a1) ; CHECK-NEXT: vrgatherei16.vv v12, v8, v16 ; CHECK-NEXT: vse64.v v12, (a0) ; CHECK-NEXT: ret %a = load <4 x i128>, ptr %p %res = shufflevector <4 x i128> %a, <4 x i128> poison, <4 x i32> store <4 x i128> %res, ptr %p ret void } define void @shuffle_i256_ldst(ptr %p) { ; CHECK-LABEL: shuffle_i256_ldst: ; CHECK: # %bb.0: ; CHECK-NEXT: lui a1, %hi(.LCPI76_0) ; CHECK-NEXT: addi a1, a1, %lo(.LCPI76_0) ; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, ma ; CHECK-NEXT: vle8.v v16, (a1) ; CHECK-NEXT: vle64.v v8, (a0) ; CHECK-NEXT: vsext.vf2 v24, v16 ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma ; CHECK-NEXT: vrgatherei16.vv v16, v8, v24 ; CHECK-NEXT: vse64.v v16, (a0) ; CHECK-NEXT: ret %a = load <4 x i256>, ptr %p %res = shufflevector <4 x i256> %a, <4 x i256> poison, <4 x i32> store <4 x i256> %res, ptr %p ret void } define void @shuffle_i64_splat(ptr %p) nounwind { ; RV32-LABEL: shuffle_i64_splat: ; RV32: # %bb.0: ; RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma ; RV32-NEXT: vlse64.v v8, (a0), zero ; RV32-NEXT: vse64.v v8, (a0) ; RV32-NEXT: ret ; ; RV64-LABEL: shuffle_i64_splat: ; RV64: # %bb.0: ; RV64-NEXT: ld a1, 0(a0) ; RV64-NEXT: vsetivli zero, 4, e64, m2, ta, ma ; RV64-NEXT: vmv.v.x v8, a1 ; RV64-NEXT: vse64.v v8, (a0) ; RV64-NEXT: ret %a = load <4 x i64>, ptr %p %res = shufflevector <4 x i64> %a, <4 x i64> poison, <4 x i32> store <4 x i64> %res, ptr %p ret void } define void @shuffle_i128_splat(ptr %p) nounwind { ; CHECK-LABEL: shuffle_i128_splat: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e64, m4, ta, ma ; CHECK-NEXT: vle64.v v8, (a0) ; CHECK-NEXT: lui a1, 16 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; CHECK-NEXT: vmv.v.x v9, a1 ; CHECK-NEXT: vsetvli a1, zero, e64, m1, ta, ma ; CHECK-NEXT: vrgatherei16.vv v12, v8, v9 ; CHECK-NEXT: vmv.v.v v13, v12 ; CHECK-NEXT: vmv.v.v v14, v12 ; CHECK-NEXT: vmv.v.v v15, v12 ; CHECK-NEXT: vsetivli zero, 8, e64, m4, ta, ma ; CHECK-NEXT: vse64.v v12, (a0) ; CHECK-NEXT: ret %a = load <4 x i128>, ptr %p %res = shufflevector <4 x i128> %a, <4 x i128> poison, <4 x i32> store <4 x i128> %res, ptr %p ret void } define void @shuffle_i256_splat(ptr %p) nounwind { ; RV32-LABEL: shuffle_i256_splat: ; RV32: # %bb.0: ; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; RV32-NEXT: vle64.v v8, (a0) ; RV32-NEXT: lui a1, 12320 ; RV32-NEXT: addi a1, a1, 256 ; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; RV32-NEXT: vmv.v.x v16, a1 ; RV32-NEXT: vsetivli zero, 16, e16, m2, ta, ma ; RV32-NEXT: vsext.vf2 v24, v16 ; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, ma ; RV32-NEXT: vrgatherei16.vv v16, v8, v24 ; RV32-NEXT: vse64.v v16, (a0) ; RV32-NEXT: ret ; ; RV64-LABEL: shuffle_i256_splat: ; RV64: # %bb.0: ; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; RV64-NEXT: vle64.v v8, (a0) ; RV64-NEXT: lui a1, 98305 ; RV64-NEXT: slli a1, a1, 5 ; RV64-NEXT: addi a1, a1, 1 ; RV64-NEXT: slli a1, a1, 16 ; RV64-NEXT: vsetivli zero, 4, e64, m2, ta, ma ; RV64-NEXT: vmv.v.x v24, a1 ; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; RV64-NEXT: vrgatherei16.vv v16, v8, v24 ; RV64-NEXT: vse64.v v16, (a0) ; RV64-NEXT: ret %a = load <4 x i256>, ptr %p %res = shufflevector <4 x i256> %a, <4 x i256> poison, <4 x i32> store <4 x i256> %res, ptr %p ret void } define <16 x i32> @shuffle_m1_prefix(<16 x i32> %a) { ; CHECK-LABEL: shuffle_m1_prefix: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 1, e16, mf4, ta, ma ; CHECK-NEXT: vmv.v.i v0, 12 ; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, mu ; CHECK-NEXT: vslidedown.vi v12, v8, 2 ; CHECK-NEXT: vslideup.vi v12, v8, 1, v0.t ; CHECK-NEXT: vmv.v.v v8, v12 ; CHECK-NEXT: ret %out = shufflevector <16 x i32> %a, <16 x i32> poison, <16 x i32> ret <16 x i32> %out } define <16 x i32> @shuffle_m2_prefix(<16 x i32> %a) { ; CHECK-LABEL: shuffle_m2_prefix: ; CHECK: # %bb.0: ; CHECK-NEXT: lui a0, %hi(.LCPI81_0) ; CHECK-NEXT: addi a0, a0, %lo(.LCPI81_0) ; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma ; CHECK-NEXT: vle16.v v14, (a0) ; CHECK-NEXT: vrgatherei16.vv v12, v8, v14 ; CHECK-NEXT: vmv4r.v v8, v12 ; CHECK-NEXT: ret %out = shufflevector <16 x i32> %a, <16 x i32> poison, <16 x i32> ret <16 x i32> %out } define <4 x i16> @vmerge_1(<4 x i16> %x) { ; CHECK-LABEL: vmerge_1: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma ; CHECK-NEXT: vmv.v.i v0, 6 ; CHECK-NEXT: vmerge.vim v8, v8, 5, v0 ; CHECK-NEXT: ret %s = shufflevector <4 x i16> %x, <4 x i16> , <4 x i32> ret <4 x i16> %s } define <4 x i16> @vmerge_2(<4 x i16> %x) { ; CHECK-LABEL: vmerge_2: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma ; CHECK-NEXT: vmv.v.i v0, 9 ; CHECK-NEXT: vmv.v.i v9, 5 ; CHECK-NEXT: vmerge.vvm v8, v9, v8, v0 ; CHECK-NEXT: ret %s = shufflevector <4 x i16> %x, <4 x i16> , <4 x i32> ret <4 x i16> %s } define <4 x i16> @vmerge_3(<4 x i16> %x) { ; CHECK-LABEL: vmerge_3: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma ; CHECK-NEXT: vmv.v.i v0, 6 ; CHECK-NEXT: vmerge.vim v8, v8, 5, v0 ; CHECK-NEXT: ret %s = shufflevector <4 x i16> %x, <4 x i16> , <4 x i32> ret <4 x i16> %s } define <8 x i64> @shuffle_v8i164_span_splat(<8 x i64> %a) nounwind { ; CHECK-LABEL: shuffle_v8i164_span_splat: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; CHECK-NEXT: vmv.v.i v9, 1 ; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, ma ; CHECK-NEXT: vrgatherei16.vv v12, v8, v9 ; CHECK-NEXT: vmv.v.v v13, v12 ; CHECK-NEXT: vmv.v.v v14, v12 ; CHECK-NEXT: vmv.v.v v15, v12 ; CHECK-NEXT: vmv4r.v v8, v12 ; CHECK-NEXT: ret %res = shufflevector <8 x i64> %a, <8 x i64> poison, <8 x i32> ret <8 x i64> %res } ; Doing this as a span splat requires rewriting the poison elements in the mask ; not just using a prefix of the mask. define <8 x i64> @shuffle_v8i64_span_splat_neg(<8 x i64> %a) nounwind { ; CHECK-LABEL: shuffle_v8i64_span_splat_neg: ; CHECK: # %bb.0: ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; CHECK-NEXT: vmv.v.i v9, 1 ; CHECK-NEXT: srli a0, a0, 3 ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; CHECK-NEXT: vslidedown.vx v10, v9, a0 ; CHECK-NEXT: vsetvli a1, zero, e64, m1, ta, ma ; CHECK-NEXT: vrgatherei16.vv v13, v8, v10 ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; CHECK-NEXT: vslidedown.vx v10, v10, a0 ; CHECK-NEXT: vsetvli a1, zero, e64, m1, ta, ma ; CHECK-NEXT: vrgatherei16.vv v12, v8, v9 ; CHECK-NEXT: vrgatherei16.vv v14, v8, v10 ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; CHECK-NEXT: vslidedown.vx v9, v10, a0 ; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, ma ; CHECK-NEXT: vrgatherei16.vv v15, v8, v9 ; CHECK-NEXT: vmv4r.v v8, v12 ; CHECK-NEXT: ret %res = shufflevector <8 x i64> %a, <8 x i64> poison, <8 x i32> ret <8 x i64> %res } ; Doing this as a locally repeating shuffle requires rewriting the poison ; elements in the mask not just using a prefix of the mask. define <8 x i32> @shuffle_v8i32_locally_repeating_neg(<8 x i32> %a) { ; CHECK-LABEL: shuffle_v8i32_locally_repeating_neg: ; CHECK: # %bb.0: ; CHECK-NEXT: lui a0, %hi(.LCPI87_0) ; CHECK-NEXT: addi a0, a0, %lo(.LCPI87_0) ; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma ; CHECK-NEXT: vle16.v v12, (a0) ; CHECK-NEXT: vrgatherei16.vv v10, v8, v12 ; CHECK-NEXT: vmv.v.v v8, v10 ; CHECK-NEXT: ret %res = shufflevector <8 x i32> %a, <8 x i32> poison, <8 x i32> ret <8 x i32> %res } define <8 x i8> @identity_splat0(<8 x i8> %v) { ; CHECK-LABEL: identity_splat0: ; CHECK: # %bb.0: ; CHECK-NEXT: li a0, 25 ; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, mu ; CHECK-NEXT: vmv.s.x v0, a0 ; CHECK-NEXT: vmv1r.v v9, v8 ; CHECK-NEXT: vrgather.vi v9, v8, 0, v0.t ; CHECK-NEXT: vmv1r.v v8, v9 ; CHECK-NEXT: ret %shuf = shufflevector <8 x i8> %v, <8 x i8> poison, <8 x i32> ret <8 x i8> %shuf } define <8 x i8> @identity_splat2(<8 x i8> %v) { ; CHECK-LABEL: identity_splat2: ; CHECK: # %bb.0: ; CHECK-NEXT: li a0, 28 ; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, mu ; CHECK-NEXT: vmv.s.x v0, a0 ; CHECK-NEXT: vmv1r.v v9, v8 ; CHECK-NEXT: vrgather.vi v9, v8, 2, v0.t ; CHECK-NEXT: vmv1r.v v8, v9 ; CHECK-NEXT: ret %shuf = shufflevector <8 x i8> %v, <8 x i8> poison, <8 x i32> ret <8 x i8> %shuf } define <8 x i8> @vmerge_vxm(<8 x i8> %v, i8 %s) { ; CHECK-LABEL: vmerge_vxm: ; CHECK: # %bb.0: ; CHECK-NEXT: li a1, 25 ; CHECK-NEXT: vsetivli zero, 1, e8, m1, tu, ma ; CHECK-NEXT: vmv.s.x v0, a1 ; CHECK-NEXT: vmv.s.x v8, a0 ; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma ; CHECK-NEXT: vmerge.vxm v8, v8, a0, v0 ; CHECK-NEXT: ret %ins = insertelement <8 x i8> %v, i8 %s, i32 0 %shuf = shufflevector <8 x i8> %ins, <8 x i8> poison, <8 x i32> ret <8 x i8> %shuf } define <8 x i8> @vmerge_vxm2(<8 x i8> %v, i8 %s) { ; CHECK-LABEL: vmerge_vxm2: ; CHECK: # %bb.0: ; CHECK-NEXT: li a1, 25 ; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma ; CHECK-NEXT: vmv.s.x v0, a1 ; CHECK-NEXT: vmerge.vxm v8, v8, a0, v0 ; CHECK-NEXT: ret %ins = insertelement <8 x i8> %v, i8 %s, i32 0 %shuf = shufflevector <8 x i8> %v, <8 x i8> %ins, <8 x i32> ret <8 x i8> %shuf } define <8 x i8> @vmerge_vxm3(<8 x i8> %v, i8 %s) { ; CHECK-LABEL: vmerge_vxm3: ; CHECK: # %bb.0: ; CHECK-NEXT: li a1, 25 ; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma ; CHECK-NEXT: vmv.s.x v0, a1 ; CHECK-NEXT: vmerge.vxm v8, v8, a0, v0 ; CHECK-NEXT: ret %ins = insertelement <8 x i8> %v, i8 %s, i32 0 %splat = shufflevector <8 x i8> %ins, <8 x i8> poison, <8 x i32> %shuf = shufflevector <8 x i8> %v, <8 x i8> %splat, <8 x i32> ret <8 x i8> %shuf }