aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPhilip Reames <preames@rivosinc.com>2024-01-24 17:10:59 -0800
committerPhilip Reames <listmail@philipreames.com>2024-01-24 17:10:59 -0800
commit795090739cf3b295be750dfba0af2ba993e60cdd (patch)
treeaa4e0d985022bef6f01b960659cdc6e1622133f1
parent7386aa02efafc260ad8b2d0c9ca19d39e16cfd07 (diff)
downloadllvm-795090739cf3b295be750dfba0af2ba993e60cdd.zip
llvm-795090739cf3b295be750dfba0af2ba993e60cdd.tar.gz
llvm-795090739cf3b295be750dfba0af2ba993e60cdd.tar.bz2
[RISCV] Fix a bug accidentally introduced in e9311f9
If we're lowering an e8 m8 shuffle and we have an index value greater than 255, we have no available space to generate an e16 index vector. The code had originally handled this correctly, but in a recent refactoring I had moved the single source code above the check, and thus broke the single source by accident. I have a change on review to rework this (https://github.com/llvm/llvm-project/pull/79330), but for now, go with the most obvious fix.
-rw-r--r--llvm/lib/Target/RISCV/RISCVISelLowering.cpp8
-rw-r--r--llvm/test/CodeGen/RISCV/rvv/fixed-vector-i8-index-cornercase.ll134
2 files changed, 95 insertions, 47 deletions
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index dc1c87f..6ab1c82 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -4936,6 +4936,14 @@ static SDValue lowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG,
if (SDValue V = lowerVECTOR_SHUFFLEAsRotate(SVN, DAG, Subtarget))
return V;
+ if (VT.getScalarSizeInBits() == 8 && VT.getVectorNumElements() > 256) {
+ // On such a large vector we're unable to use i8 as the index type.
+ // FIXME: We could promote the index to i16 and use vrgatherei16, but that
+ // may involve vector splitting if we're already at LMUL=8, or our
+ // user-supplied maximum fixed-length LMUL.
+ return SDValue();
+ }
+
// Base case for the two operand recursion below - handle the worst case
// single source shuffle.
unsigned GatherVVOpc = RISCVISD::VRGATHER_VV_VL;
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vector-i8-index-cornercase.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vector-i8-index-cornercase.ll
index 48593b3..ad99015 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vector-i8-index-cornercase.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vector-i8-index-cornercase.ll
@@ -7,31 +7,50 @@
define <512 x i8> @single_source(<512 x i8> %a) {
; CHECK-LABEL: single_source:
; CHECK: # %bb.0:
+; CHECK-NEXT: addi sp, sp, -1536
+; CHECK-NEXT: .cfi_def_cfa_offset 1536
+; CHECK-NEXT: sd ra, 1528(sp) # 8-byte Folded Spill
+; CHECK-NEXT: sd s0, 1520(sp) # 8-byte Folded Spill
+; CHECK-NEXT: .cfi_offset ra, -8
+; CHECK-NEXT: .cfi_offset s0, -16
+; CHECK-NEXT: addi s0, sp, 1536
+; CHECK-NEXT: .cfi_def_cfa s0, 0
+; CHECK-NEXT: andi sp, sp, -512
+; CHECK-NEXT: vmv8r.v v16, v8
; CHECK-NEXT: li a0, 512
+; CHECK-NEXT: addi a1, sp, 512
; CHECK-NEXT: vsetvli zero, a0, e8, m8, ta, ma
-; CHECK-NEXT: vmv.v.i v16, 0
-; CHECK-NEXT: li a1, 258
-; CHECK-NEXT: vslide1down.vx v24, v16, a1
-; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, ma
-; CHECK-NEXT: vmv.v.i v16, 5
-; CHECK-NEXT: li a1, 432
-; CHECK-NEXT: li a2, 431
-; CHECK-NEXT: vsetvli zero, a1, e8, m8, tu, ma
-; CHECK-NEXT: vslideup.vx v24, v16, a2
-; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, ma
-; CHECK-NEXT: vmv.v.i v16, 4
-; CHECK-NEXT: li a1, 466
-; CHECK-NEXT: li a2, 465
-; CHECK-NEXT: vsetvli zero, a1, e8, m8, tu, ma
-; CHECK-NEXT: vslideup.vx v24, v16, a2
+; CHECK-NEXT: vse8.v v8, (a1)
+; CHECK-NEXT: lbu a0, 770(sp)
+; CHECK-NEXT: vmv.x.s a1, v16
+; CHECK-NEXT: vmv.v.x v8, a1
+; CHECK-NEXT: vslide1down.vx v8, v8, a0
+; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma
+; CHECK-NEXT: vslidedown.vi v17, v16, 5
+; CHECK-NEXT: vmv.x.s a0, v17
+; CHECK-NEXT: vmv.s.x v24, a0
+; CHECK-NEXT: li a0, 432
+; CHECK-NEXT: li a1, 431
+; CHECK-NEXT: vsetvli zero, a0, e8, m8, tu, ma
+; CHECK-NEXT: vslideup.vx v8, v24, a1
+; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma
+; CHECK-NEXT: vslidedown.vi v16, v16, 4
+; CHECK-NEXT: vmv.x.s a0, v16
+; CHECK-NEXT: vmv.s.x v16, a0
+; CHECK-NEXT: li a0, 466
+; CHECK-NEXT: li a1, 465
+; CHECK-NEXT: vsetvli zero, a0, e8, m8, tu, ma
+; CHECK-NEXT: lbu a0, 1012(sp)
+; CHECK-NEXT: vslideup.vx v8, v16, a1
+; CHECK-NEXT: vmv.s.x v16, a0
+; CHECK-NEXT: li a0, 501
; CHECK-NEXT: li a1, 500
-; CHECK-NEXT: vmv.s.x v16, a1
-; CHECK-NEXT: li a2, 501
-; CHECK-NEXT: vsetvli zero, a2, e8, m8, tu, ma
-; CHECK-NEXT: vslideup.vx v24, v16, a1
-; CHECK-NEXT: vsetvli zero, a0, e8, m8, ta, ma
-; CHECK-NEXT: vrgather.vv v16, v8, v24
-; CHECK-NEXT: vmv.v.v v8, v16
+; CHECK-NEXT: vsetvli zero, a0, e8, m8, tu, ma
+; CHECK-NEXT: vslideup.vx v8, v16, a1
+; CHECK-NEXT: addi sp, s0, -1536
+; CHECK-NEXT: ld ra, 1528(sp) # 8-byte Folded Reload
+; CHECK-NEXT: ld s0, 1520(sp) # 8-byte Folded Reload
+; CHECK-NEXT: addi sp, sp, 1536
; CHECK-NEXT: ret
%res = shufflevector <512 x i8> %a, <512 x i8> poison, <512 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 5, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 4, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 500, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 258>
ret <512 x i8> %res
@@ -41,32 +60,53 @@ define <512 x i8> @single_source(<512 x i8> %a) {
define <512 x i8> @range_restriction(<512 x i8> %a) {
; CHECK-LABEL: range_restriction:
; CHECK: # %bb.0:
+; CHECK-NEXT: addi sp, sp, -1536
+; CHECK-NEXT: .cfi_def_cfa_offset 1536
+; CHECK-NEXT: sd ra, 1528(sp) # 8-byte Folded Spill
+; CHECK-NEXT: sd s0, 1520(sp) # 8-byte Folded Spill
+; CHECK-NEXT: .cfi_offset ra, -8
+; CHECK-NEXT: .cfi_offset s0, -16
+; CHECK-NEXT: addi s0, sp, 1536
+; CHECK-NEXT: .cfi_def_cfa s0, 0
+; CHECK-NEXT: andi sp, sp, -512
+; CHECK-NEXT: vmv8r.v v16, v8
; CHECK-NEXT: li a0, 512
+; CHECK-NEXT: addi a1, sp, 512
; CHECK-NEXT: vsetvli zero, a0, e8, m8, ta, ma
-; CHECK-NEXT: vmv.v.i v16, 0
-; CHECK-NEXT: li a1, 254
-; CHECK-NEXT: vslide1down.vx v24, v16, a1
-; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, ma
-; CHECK-NEXT: vmv.v.i v16, 5
-; CHECK-NEXT: li a1, 432
-; CHECK-NEXT: li a2, 431
-; CHECK-NEXT: vsetvli zero, a1, e8, m8, tu, ma
-; CHECK-NEXT: vslideup.vx v24, v16, a2
-; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, ma
-; CHECK-NEXT: vmv.v.i v16, 4
-; CHECK-NEXT: li a1, 466
-; CHECK-NEXT: li a2, 465
-; CHECK-NEXT: vsetvli zero, a1, e8, m8, tu, ma
-; CHECK-NEXT: vslideup.vx v24, v16, a2
-; CHECK-NEXT: li a1, 44
-; CHECK-NEXT: vmv.s.x v16, a1
-; CHECK-NEXT: li a1, 501
-; CHECK-NEXT: li a2, 500
-; CHECK-NEXT: vsetvli zero, a1, e8, m8, tu, ma
-; CHECK-NEXT: vslideup.vx v24, v16, a2
-; CHECK-NEXT: vsetvli zero, a0, e8, m8, ta, ma
-; CHECK-NEXT: vrgather.vv v16, v8, v24
-; CHECK-NEXT: vmv.v.v v8, v16
+; CHECK-NEXT: vse8.v v8, (a1)
+; CHECK-NEXT: lbu a0, 766(sp)
+; CHECK-NEXT: vmv.x.s a1, v16
+; CHECK-NEXT: vmv.v.x v8, a1
+; CHECK-NEXT: vslide1down.vx v8, v8, a0
+; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma
+; CHECK-NEXT: vslidedown.vi v17, v16, 5
+; CHECK-NEXT: vmv.x.s a0, v17
+; CHECK-NEXT: vmv.s.x v24, a0
+; CHECK-NEXT: li a0, 432
+; CHECK-NEXT: li a1, 431
+; CHECK-NEXT: vsetvli zero, a0, e8, m8, tu, ma
+; CHECK-NEXT: vslideup.vx v8, v24, a1
+; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma
+; CHECK-NEXT: vslidedown.vi v17, v16, 4
+; CHECK-NEXT: vmv.x.s a0, v17
+; CHECK-NEXT: vmv.s.x v24, a0
+; CHECK-NEXT: li a0, 466
+; CHECK-NEXT: li a1, 465
+; CHECK-NEXT: vsetvli zero, a0, e8, m8, tu, ma
+; CHECK-NEXT: vslideup.vx v8, v24, a1
+; CHECK-NEXT: li a0, 44
+; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma
+; CHECK-NEXT: vslidedown.vx v16, v16, a0
+; CHECK-NEXT: vmv.x.s a0, v16
+; CHECK-NEXT: vmv.s.x v16, a0
+; CHECK-NEXT: li a0, 501
+; CHECK-NEXT: li a1, 500
+; CHECK-NEXT: vsetvli zero, a0, e8, m8, tu, ma
+; CHECK-NEXT: vslideup.vx v8, v16, a1
+; CHECK-NEXT: addi sp, s0, -1536
+; CHECK-NEXT: ld ra, 1528(sp) # 8-byte Folded Reload
+; CHECK-NEXT: ld s0, 1520(sp) # 8-byte Folded Reload
+; CHECK-NEXT: addi sp, sp, 1536
; CHECK-NEXT: ret
%res = shufflevector <512 x i8> %a, <512 x i8> poison, <512 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 5, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 4, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 44, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 254>
ret <512 x i8> %res
@@ -137,9 +177,9 @@ define <512 x i8> @two_source(<512 x i8> %a, <512 x i8> %b) {
; CHECK-NEXT: vslideup.vx v8, v16, a0
; CHECK-NEXT: vmv.s.x v16, a1
; CHECK-NEXT: li a0, 478
+; CHECK-NEXT: li a1, 477
; CHECK-NEXT: vsetvli zero, a0, e8, m8, tu, ma
; CHECK-NEXT: lbu a0, 674(sp)
-; CHECK-NEXT: li a1, 477
; CHECK-NEXT: vslideup.vx v8, v16, a1
; CHECK-NEXT: vmv.s.x v16, a0
; CHECK-NEXT: li a0, 490