4 files changed, 236 insertions, 0 deletions
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td b/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td
index 117eb33..900f9dd 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td
@@ -1697,6 +1697,31 @@ multiclass VPatNarrowShiftSplatExt_WX<SDNode op, PatFrags extop, string instruct
   }
 }
 
+multiclass VPatNarrowShiftExtVL_WV<SDNode op, PatFrags extop, string instruction_name> {
+  foreach vtiToWti = AllWidenableIntVectors in {
+    defvar vti = vtiToWti.Vti;
+    defvar wti = vtiToWti.Wti;
+    let Predicates = !listconcat(GetVTypePredicates<vti>.Predicates,
+                                 GetVTypePredicates<wti>.Predicates) in
+    def : Pat<
+      (vti.Vector
+        (riscv_trunc_vector_vl
+          (op (wti.Vector wti.RegClass:$rs2),
+              (wti.Vector (extop (vti.Vector vti.RegClass:$rs1),
+                                 (vti.Mask true_mask), VLOpFrag)),
+          srcvalue, (vti.Mask true_mask), VLOpFrag),
+        (vti.Mask V0), VLOpFrag)),
+      (!cast<Instruction>(instruction_name#"_WV_"#vti.LMul.MX#"_MASK")
+        (vti.Vector (IMPLICIT_DEF)), wti.RegClass:$rs2, vti.RegClass:$rs1,
+        (vti.Mask V0), GPR:$vl, vti.Log2SEW, TU_MU)>;
+  }
+}
+
+multiclass VPatNarrowShiftVL_WV<SDNode op, string instruction_name> {
+  defm : VPatNarrowShiftExtVL_WV<op, riscv_sext_vl_oneuse, instruction_name>;
+  defm : VPatNarrowShiftExtVL_WV<op, riscv_zext_vl_oneuse, instruction_name>;
+}
+
 multiclass VPatMultiplyAddVL_VV_VX<SDNode op, string instruction_name> {
   foreach vti = AllIntegerVectors in {
     defvar suffix = vti.LMul.MX;
@@ -2121,6 +2146,9 @@ defm : VPatNarrowShiftSplatExt_WX<riscv_sra_vl, riscv_zext_vl_oneuse, "PseudoVNS
 defm : VPatNarrowShiftSplatExt_WX<riscv_srl_vl, riscv_sext_vl_oneuse, "PseudoVNSRL">;
 defm : VPatNarrowShiftSplatExt_WX<riscv_srl_vl, riscv_zext_vl_oneuse, "PseudoVNSRL">;
 
+defm : VPatNarrowShiftVL_WV<riscv_srl_vl, "PseudoVNSRL">;
+defm : VPatNarrowShiftVL_WV<riscv_sra_vl, "PseudoVNSRA">;
+
 defm : VPatBinaryNVL_WV_WX_WI<riscv_vnsrl_vl, "PseudoVNSRL">;
 
 foreach vtiTowti = AllWidenableIntVectors in {
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vnsra-vnsrl.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vnsra-vnsrl.ll
index d18f76b..89038c4 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vnsra-vnsrl.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vnsra-vnsrl.ll
@@ -158,6 +158,30 @@ define <2 x i32> @vnsra_v2i64_v2i32_imm(<2 x i64> %x) {
   ret <2 x i32> %b
 }
 
+define <8 x i8> @vnsra_v8i16_v8i8_sext(<8 x i16> %x, <8 x i8> %y) {
+; CHECK-LABEL: vnsra_v8i16_v8i8_sext:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 8, e8, mf2, ta, ma
+; CHECK-NEXT:    vnsra.wv v8, v8, v9
+; CHECK-NEXT:    ret
+  %sext = sext <8 x i8> %y to <8 x i16>
+  %a = ashr <8 x i16> %x, %sext
+  %b = trunc <8 x i16> %a to <8 x i8>
+  ret <8 x i8> %b
+}
+
+define <8 x i8> @vnsra_v8i16_v8i8_zext(<8 x i16> %x, <8 x i8> %y) {
+; CHECK-LABEL: vnsra_v8i16_v8i8_zext:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 8, e8, mf2, ta, ma
+; CHECK-NEXT:    vnsra.wv v8, v8, v9
+; CHECK-NEXT:    ret
+  %sext = zext <8 x i8> %y to <8 x i16>
+  %a = ashr <8 x i16> %x, %sext
+  %b = trunc <8 x i16> %a to <8 x i8>
+  ret <8 x i8> %b
+}
+
 define <8 x i8> @vnsrl_v8i16_v8i8_scalar(<8 x i16> %x, i16 %y) {
 ; CHECK-LABEL: vnsrl_v8i16_v8i8_scalar:
 ; CHECK:       # %bb.0:
@@ -313,3 +337,51 @@ define <2 x i32> @vnsrl_v2i64_v2i32_imm(<2 x i64> %x) {
   %b = trunc <2 x i64> %a to <2 x i32>
   ret <2 x i32> %b
 }
+
+define <4 x i16> @vnsrl_v4i32_v4i16_sext(<4 x i32> %x, <4 x i16> %y) {
+; CHECK-LABEL: vnsrl_v4i32_v4i16_sext:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 4, e16, mf2, ta, ma
+; CHECK-NEXT:    vnsrl.wv v8, v8, v9
+; CHECK-NEXT:    ret
+  %sext = sext <4 x i16> %y to <4 x i32>
+  %a = lshr <4 x i32> %x, %sext
+  %b = trunc <4 x i32> %a to <4 x i16>
+  ret <4 x i16> %b
+}
+
+define <4 x i16> @vnsrl_v4i32_v4i16_zext(<4 x i32> %x, <4 x i16> %y) {
+; CHECK-LABEL: vnsrl_v4i32_v4i16_zext:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 4, e16, mf2, ta, ma
+; CHECK-NEXT:    vnsrl.wv v8, v8, v9
+; CHECK-NEXT:    ret
+  %zext = zext <4 x i16> %y to <4 x i32>
+  %a = lshr <4 x i32> %x, %zext
+  %b = trunc <4 x i32> %a to <4 x i16>
+  ret <4 x i16> %b
+}
+
+define <2 x i32> @vnsrl_v2i64_v2i32_sext(<2 x i64> %x, <2 x i32> %y) {
+; CHECK-LABEL: vnsrl_v2i64_v2i32_sext:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 2, e32, mf2, ta, ma
+; CHECK-NEXT:    vnsrl.wv v8, v8, v9
+; CHECK-NEXT:    ret
+  %sext = sext <2 x i32> %y to <2 x i64>
+  %a = lshr <2 x i64> %x, %sext
+  %b = trunc <2 x i64> %a to <2 x i32>
+  ret <2 x i32> %b
+}
+
+define <2 x i32> @vnsrl_v2i64_v2i32_zext(<2 x i64> %x, <2 x i32> %y) {
+; CHECK-LABEL: vnsrl_v2i64_v2i32_zext:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 2, e32, mf2, ta, ma
+; CHECK-NEXT:    vnsrl.wv v8, v8, v9
+; CHECK-NEXT:    ret
+  %zext = zext <2 x i32> %y to <2 x i64>
+  %a = lshr <2 x i64> %x, %zext
+  %b = trunc <2 x i64> %a to <2 x i32>
+  ret <2 x i32> %b
+}
diff --git a/llvm/test/CodeGen/RISCV/rvv/vnsra-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vnsra-vp.ll
new file mode 100644
index 0000000..18f7434
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/rvv/vnsra-vp.ll
@@ -0,0 +1,68 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs < %s | FileCheck %s
+; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs < %s | FileCheck %s
+
+declare <vscale x 1 x i32> @llvm.vp.sext.nxv1i32.nxv1i16(<vscale x 1 x i16>, <vscale x 1 x i1>, i32)
+declare <vscale x 1 x i16> @llvm.vp.trunc.nxv1i16.nxv1i32(<vscale x 1 x i32>, <vscale x 1 x i1>, i32)
+declare <vscale x 1 x i32> @llvm.vp.ashr.nxv1i32(<vscale x 1 x i32>, <vscale x 1 x i32>, <vscale x 1 x i1>, i32)
+
+define <vscale x 1 x i16> @vsra_vv_nxv1i16(<vscale x 1 x i32> %a, <vscale x 1 x i16> %b, <vscale x 1 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsra_vv_nxv1i16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, ta, ma
+; CHECK-NEXT:    vnsra.wv v8, v8, v9, v0.t
+; CHECK-NEXT:    ret
+  %splat = insertelement <vscale x 1 x i1> poison, i1 -1, i32 0
+  %allones = shufflevector <vscale x 1 x i1> %splat, <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer
+  %bext = call <vscale x 1 x i32> @llvm.vp.sext.nxv1i32.nxv1i16(<vscale x 1 x i16> %b, <vscale x 1 x i1> %allones, i32 %evl)
+  %v = call <vscale x 1 x i32> @llvm.vp.ashr.nxv1i32(<vscale x 1 x i32> %a, <vscale x 1 x i32> %bext, <vscale x 1 x i1> %allones, i32 %evl)
+  %vr = call <vscale x 1 x i16> @llvm.vp.trunc.nxv1i16.nxv1i32(<vscale x 1 x i32> %v, <vscale x 1 x i1> %m, i32 %evl)
+  ret <vscale x 1 x i16> %vr
+}
+
+
+define <vscale x 1 x i16> @vsra_vv_nxv1i16_unmasked(<vscale x 1 x i32> %a, <vscale x 1 x i16> %b, i32 zeroext %evl) {
+; CHECK-LABEL: vsra_vv_nxv1i16_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, ta, ma
+; CHECK-NEXT:    vnsra.wv v8, v8, v9
+; CHECK-NEXT:    ret
+  %head = insertelement <vscale x 1 x i1> poison, i1 -1, i32 0
+  %allones = shufflevector <vscale x 1 x i1> %head, <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer
+  %bext = call <vscale x 1 x i32> @llvm.vp.sext.nxv1i32.nxv1i16(<vscale x 1 x i16> %b, <vscale x 1 x i1> %allones, i32 %evl)
+  %v = call <vscale x 1 x i32> @llvm.vp.ashr.nxv1i32(<vscale x 1 x i32> %a, <vscale x 1 x i32> %bext, <vscale x 1 x i1> %allones, i32 %evl)
+  %vr = call <vscale x 1 x i16> @llvm.vp.trunc.nxv1i16.nxv1i32(<vscale x 1 x i32> %v, <vscale x 1 x i1> %allones, i32 %evl)
+  ret <vscale x 1 x i16> %vr
+}
+
+declare <vscale x 1 x i64> @llvm.vp.sext.nxv1i64.nxv1i32(<vscale x 1 x i32>, <vscale x 1 x i1>, i32)
+declare <vscale x 1 x i32> @llvm.vp.trunc.nxv1i32.nxv1i64(<vscale x 1 x i64>, <vscale x 1 x i1>, i32)
+declare <vscale x 1 x i64> @llvm.vp.ashr.nxv1i64(<vscale x 1 x i64>, <vscale x 1 x i64>, <vscale x 1 x i1>, i32)
+
+define <vscale x 1 x i32> @vsra_vv_nxv1i64(<vscale x 1 x i64> %a, <vscale x 1 x i32> %b, <vscale x 1 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsra_vv_nxv1i64:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, ta, ma
+; CHECK-NEXT:    vnsra.wv v8, v8, v9, v0.t
+; CHECK-NEXT:    ret
+  %splat = insertelement <vscale x 1 x i1> poison, i1 -1, i32 0
+  %allones = shufflevector <vscale x 1 x i1> %splat, <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer
+  %bext = call <vscale x 1 x i64> @llvm.vp.sext.nxv1i64.nxv1i32(<vscale x 1 x i32> %b, <vscale x 1 x i1> %allones, i32 %evl)
+  %v = call <vscale x 1 x i64> @llvm.vp.ashr.nxv1i64(<vscale x 1 x i64> %a, <vscale x 1 x i64> %bext, <vscale x 1 x i1> %allones, i32 %evl)
+  %vr = call <vscale x 1 x i32> @llvm.vp.trunc.nxv1i32.nxv1i64(<vscale x 1 x i64> %v, <vscale x 1 x i1> %m, i32 %evl)
+  ret <vscale x 1 x i32> %vr
+}
+
+define <vscale x 1 x i32> @vsra_vv_nxv1i64_unmasked(<vscale x 1 x i64> %a, <vscale x 1 x i32> %b, <vscale x 1 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsra_vv_nxv1i64_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, ta, ma
+; CHECK-NEXT:    vnsra.wv v8, v8, v9
+; CHECK-NEXT:    ret
+  %splat = insertelement <vscale x 1 x i1> poison, i1 -1, i32 0
+  %allones = shufflevector <vscale x 1 x i1> %splat, <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer
+  %bext = call <vscale x 1 x i64> @llvm.vp.sext.nxv1i64.nxv1i32(<vscale x 1 x i32> %b, <vscale x 1 x i1> %allones, i32 %evl)
+  %v = call <vscale x 1 x i64> @llvm.vp.ashr.nxv1i64(<vscale x 1 x i64> %a, <vscale x 1 x i64> %bext, <vscale x 1 x i1> %allones, i32 %evl)
+  %vr = call <vscale x 1 x i32> @llvm.vp.trunc.nxv1i32.nxv1i64(<vscale x 1 x i64> %v, <vscale x 1 x i1> %allones, i32 %evl)
+  ret <vscale x 1 x i32> %vr
+}
diff --git a/llvm/test/CodeGen/RISCV/rvv/vnsrl-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vnsrl-vp.ll
new file mode 100644
index 0000000..059d5bf
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/rvv/vnsrl-vp.ll
@@ -0,0 +1,68 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs < %s | FileCheck %s
+; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs < %s | FileCheck %s
+
+declare <vscale x 1 x i32> @llvm.vp.sext.nxv1i32.nxv1i16(<vscale x 1 x i16>, <vscale x 1 x i1>, i32)
+declare <vscale x 1 x i16> @llvm.vp.trunc.nxv1i16.nxv1i32(<vscale x 1 x i32>, <vscale x 1 x i1>, i32)
+declare <vscale x 1 x i32> @llvm.vp.lshr.nxv1i32(<vscale x 1 x i32>, <vscale x 1 x i32>, <vscale x 1 x i1>, i32)
+
+define <vscale x 1 x i16> @vsra_vv_nxv1i16(<vscale x 1 x i32> %a, <vscale x 1 x i16> %b, <vscale x 1 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsra_vv_nxv1i16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, ta, ma
+; CHECK-NEXT:    vnsrl.wv v8, v8, v9, v0.t
+; CHECK-NEXT:    ret
+  %splat = insertelement <vscale x 1 x i1> poison, i1 -1, i32 0
+  %allones = shufflevector <vscale x 1 x i1> %splat, <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer
+  %bext = call <vscale x 1 x i32> @llvm.vp.sext.nxv1i32.nxv1i16(<vscale x 1 x i16> %b, <vscale x 1 x i1> %allones, i32 %evl)
+  %v = call <vscale x 1 x i32> @llvm.vp.lshr.nxv1i32(<vscale x 1 x i32> %a, <vscale x 1 x i32> %bext, <vscale x 1 x i1> %allones, i32 %evl)
+  %vr = call <vscale x 1 x i16> @llvm.vp.trunc.nxv1i16.nxv1i32(<vscale x 1 x i32> %v, <vscale x 1 x i1> %m, i32 %evl)
+  ret <vscale x 1 x i16> %vr
+}
+
+
+define <vscale x 1 x i16> @vsra_vv_nxv1i16_unmasked(<vscale x 1 x i32> %a, <vscale x 1 x i16> %b, i32 zeroext %evl) {
+; CHECK-LABEL: vsra_vv_nxv1i16_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, ta, ma
+; CHECK-NEXT:    vnsrl.wv v8, v8, v9
+; CHECK-NEXT:    ret
+  %head = insertelement <vscale x 1 x i1> poison, i1 -1, i32 0
+  %allones = shufflevector <vscale x 1 x i1> %head, <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer
+  %bext = call <vscale x 1 x i32> @llvm.vp.sext.nxv1i32.nxv1i16(<vscale x 1 x i16> %b, <vscale x 1 x i1> %allones, i32 %evl)
+  %v = call <vscale x 1 x i32> @llvm.vp.lshr.nxv1i32(<vscale x 1 x i32> %a, <vscale x 1 x i32> %bext, <vscale x 1 x i1> %allones, i32 %evl)
+  %vr = call <vscale x 1 x i16> @llvm.vp.trunc.nxv1i16.nxv1i32(<vscale x 1 x i32> %v, <vscale x 1 x i1> %allones, i32 %evl)
+  ret <vscale x 1 x i16> %vr
+}
+
+declare <vscale x 1 x i64> @llvm.vp.sext.nxv1i64.nxv1i32(<vscale x 1 x i32>, <vscale x 1 x i1>, i32)
+declare <vscale x 1 x i32> @llvm.vp.trunc.nxv1i32.nxv1i64(<vscale x 1 x i64>, <vscale x 1 x i1>, i32)
+declare <vscale x 1 x i64> @llvm.vp.lshr.nxv1i64(<vscale x 1 x i64>, <vscale x 1 x i64>, <vscale x 1 x i1>, i32)
+
+define <vscale x 1 x i32> @vsra_vv_nxv1i64(<vscale x 1 x i64> %a, <vscale x 1 x i32> %b, <vscale x 1 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsra_vv_nxv1i64:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, ta, ma
+; CHECK-NEXT:    vnsrl.wv v8, v8, v9, v0.t
+; CHECK-NEXT:    ret
+  %splat = insertelement <vscale x 1 x i1> poison, i1 -1, i32 0
+  %allones = shufflevector <vscale x 1 x i1> %splat, <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer
+  %bext = call <vscale x 1 x i64> @llvm.vp.sext.nxv1i64.nxv1i32(<vscale x 1 x i32> %b, <vscale x 1 x i1> %allones, i32 %evl)
+  %v = call <vscale x 1 x i64> @llvm.vp.lshr.nxv1i64(<vscale x 1 x i64> %a, <vscale x 1 x i64> %bext, <vscale x 1 x i1> %allones, i32 %evl)
+  %vr = call <vscale x 1 x i32> @llvm.vp.trunc.nxv1i32.nxv1i64(<vscale x 1 x i64> %v, <vscale x 1 x i1> %m, i32 %evl)
+  ret <vscale x 1 x i32> %vr
+}
+
+define <vscale x 1 x i32> @vsra_vv_nxv1i64_unmasked(<vscale x 1 x i64> %a, <vscale x 1 x i32> %b, <vscale x 1 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsra_vv_nxv1i64_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, ta, ma
+; CHECK-NEXT:    vnsrl.wv v8, v8, v9
+; CHECK-NEXT:    ret
+  %splat = insertelement <vscale x 1 x i1> poison, i1 -1, i32 0
+  %allones = shufflevector <vscale x 1 x i1> %splat, <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer
+  %bext = call <vscale x 1 x i64> @llvm.vp.sext.nxv1i64.nxv1i32(<vscale x 1 x i32> %b, <vscale x 1 x i1> %allones, i32 %evl)
+  %v = call <vscale x 1 x i64> @llvm.vp.lshr.nxv1i64(<vscale x 1 x i64> %a, <vscale x 1 x i64> %bext, <vscale x 1 x i1> %allones, i32 %evl)
+  %vr = call <vscale x 1 x i32> @llvm.vp.trunc.nxv1i32.nxv1i64(<vscale x 1 x i64> %v, <vscale x 1 x i1> %allones, i32 %evl)
+  ret <vscale x 1 x i32> %vr
+}