diff options
Diffstat (limited to 'llvm/test/CodeGen/RISCV')
54 files changed, 7757 insertions, 15810 deletions
diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/irtranslator/calling-conv-ilp32-ilp32f-ilp32d-common.ll b/llvm/test/CodeGen/RISCV/GlobalISel/irtranslator/calling-conv-ilp32-ilp32f-ilp32d-common.ll index 3fcaa81..3225120 100644 --- a/llvm/test/CodeGen/RISCV/GlobalISel/irtranslator/calling-conv-ilp32-ilp32f-ilp32d-common.ll +++ b/llvm/test/CodeGen/RISCV/GlobalISel/irtranslator/calling-conv-ilp32-ilp32f-ilp32d-common.ll @@ -1302,14 +1302,14 @@ define void @callee_large_struct_ret(ptr noalias sret(%struct.large) %agg.result ; RV32I-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; RV32I-NEXT: G_STORE [[C]](s32), [[COPY]](p0) :: (store (s32) into %ir.agg.result) ; RV32I-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; RV32I-NEXT: %3:_(p0) = nuw nusw G_PTR_ADD [[COPY]], [[C4]](s32) - ; RV32I-NEXT: G_STORE [[C1]](s32), %3(p0) :: (store (s32) into %ir.b) + ; RV32I-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = nuw nusw inbounds G_PTR_ADD [[COPY]], [[C4]](s32) + ; RV32I-NEXT: G_STORE [[C1]](s32), [[PTR_ADD]](p0) :: (store (s32) into %ir.b) ; RV32I-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; RV32I-NEXT: %6:_(p0) = nuw nusw G_PTR_ADD [[COPY]], [[C5]](s32) - ; RV32I-NEXT: G_STORE [[C2]](s32), %6(p0) :: (store (s32) into %ir.c) + ; RV32I-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = nuw nusw inbounds G_PTR_ADD [[COPY]], [[C5]](s32) + ; RV32I-NEXT: G_STORE [[C2]](s32), [[PTR_ADD1]](p0) :: (store (s32) into %ir.c) ; RV32I-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 - ; RV32I-NEXT: %9:_(p0) = nuw nusw G_PTR_ADD [[COPY]], [[C6]](s32) - ; RV32I-NEXT: G_STORE [[C3]](s32), %9(p0) :: (store (s32) into %ir.d) + ; RV32I-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = nuw nusw inbounds G_PTR_ADD [[COPY]], [[C6]](s32) + ; RV32I-NEXT: G_STORE [[C3]](s32), [[PTR_ADD2]](p0) :: (store (s32) into %ir.d) ; RV32I-NEXT: PseudoRET store i32 1, ptr %agg.result, align 4 %b = getelementptr inbounds %struct.large, ptr %agg.result, i32 0, i32 1 @@ -1331,8 +1331,8 @@ define i32 @caller_large_struct_ret() nounwind { ; ILP32-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $x2, implicit $x2 ; ILP32-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX]](p0) :: (dereferenceable load (s32) from %ir.1) ; ILP32-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 - ; ILP32-NEXT: %3:_(p0) = nuw nusw G_PTR_ADD [[FRAME_INDEX]], [[C]](s32) - ; ILP32-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD %3(p0) :: (dereferenceable load (s32) from %ir.3) + ; ILP32-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = nuw nusw inbounds G_PTR_ADD [[FRAME_INDEX]], [[C]](s32) + ; ILP32-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (dereferenceable load (s32) from %ir.3) ; ILP32-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[LOAD]], [[LOAD1]] ; ILP32-NEXT: $x10 = COPY [[ADD]](s32) ; ILP32-NEXT: PseudoRET implicit $x10 @@ -1346,8 +1346,8 @@ define i32 @caller_large_struct_ret() nounwind { ; ILP32F-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $x2, implicit $x2 ; ILP32F-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX]](p0) :: (dereferenceable load (s32) from %ir.1) ; ILP32F-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 - ; ILP32F-NEXT: %3:_(p0) = nuw nusw G_PTR_ADD [[FRAME_INDEX]], [[C]](s32) - ; ILP32F-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD %3(p0) :: (dereferenceable load (s32) from %ir.3) + ; ILP32F-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = nuw nusw inbounds G_PTR_ADD [[FRAME_INDEX]], [[C]](s32) + ; ILP32F-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (dereferenceable load (s32) from %ir.3) ; ILP32F-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[LOAD]], [[LOAD1]] ; ILP32F-NEXT: $x10 = COPY [[ADD]](s32) ; ILP32F-NEXT: PseudoRET implicit $x10 @@ -1361,8 +1361,8 @@ define i32 @caller_large_struct_ret() nounwind { ; ILP32D-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $x2, implicit $x2 ; ILP32D-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX]](p0) :: (dereferenceable load (s32) from %ir.1) ; ILP32D-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 - ; ILP32D-NEXT: %3:_(p0) = nuw nusw G_PTR_ADD [[FRAME_INDEX]], [[C]](s32) - ; ILP32D-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD %3(p0) :: (dereferenceable load (s32) from %ir.3) + ; ILP32D-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = nuw nusw inbounds G_PTR_ADD [[FRAME_INDEX]], [[C]](s32) + ; ILP32D-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (dereferenceable load (s32) from %ir.3) ; ILP32D-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[LOAD]], [[LOAD1]] ; ILP32D-NEXT: $x10 = COPY [[ADD]](s32) ; ILP32D-NEXT: PseudoRET implicit $x10 @@ -1392,13 +1392,13 @@ define %struct.large2 @callee_large_struct_ret2() nounwind { ; RV32I-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; RV32I-NEXT: G_STORE [[C]](s32), [[COPY]](p0) :: (store (s32), align 8) ; RV32I-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; RV32I-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C4]](s32) + ; RV32I-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = nuw inbounds G_PTR_ADD [[COPY]], [[C4]](s32) ; RV32I-NEXT: G_STORE [[C1]](s32), [[PTR_ADD]](p0) :: (store (s32)) ; RV32I-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; RV32I-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C5]](s32) + ; RV32I-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = nuw inbounds G_PTR_ADD [[COPY]], [[C5]](s32) ; RV32I-NEXT: G_STORE [[C2]](s16), [[PTR_ADD1]](p0) :: (store (s16), align 8) ; RV32I-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 - ; RV32I-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C6]](s32) + ; RV32I-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = nuw inbounds G_PTR_ADD [[COPY]], [[C6]](s32) ; RV32I-NEXT: G_STORE [[C3]](s32), [[PTR_ADD2]](p0) :: (store (s32)) ; RV32I-NEXT: PseudoRET %a = insertvalue %struct.large2 poison, i32 1, 0 @@ -1418,13 +1418,13 @@ define i32 @caller_large_struct_ret2() nounwind { ; ILP32-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $x2, implicit $x2 ; ILP32-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX]](p0) :: (load (s32) from %stack.0, align 8) ; ILP32-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; ILP32-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[FRAME_INDEX]], [[C]](s32) + ; ILP32-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = nuw inbounds G_PTR_ADD [[FRAME_INDEX]], [[C]](s32) ; ILP32-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s32) from %stack.0) ; ILP32-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; ILP32-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[FRAME_INDEX]], [[C1]](s32) + ; ILP32-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = nuw inbounds G_PTR_ADD [[FRAME_INDEX]], [[C1]](s32) ; ILP32-NEXT: [[LOAD2:%[0-9]+]]:_(s16) = G_LOAD [[PTR_ADD1]](p0) :: (load (s16) from %stack.0, align 8) ; ILP32-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 - ; ILP32-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[FRAME_INDEX]], [[C2]](s32) + ; ILP32-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = nuw inbounds G_PTR_ADD [[FRAME_INDEX]], [[C2]](s32) ; ILP32-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load (s32) from %stack.0) ; ILP32-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[LOAD]], [[LOAD3]] ; ILP32-NEXT: $x10 = COPY [[ADD]](s32) @@ -1439,13 +1439,13 @@ define i32 @caller_large_struct_ret2() nounwind { ; ILP32F-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $x2, implicit $x2 ; ILP32F-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX]](p0) :: (load (s32) from %stack.0, align 8) ; ILP32F-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; ILP32F-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[FRAME_INDEX]], [[C]](s32) + ; ILP32F-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = nuw inbounds G_PTR_ADD [[FRAME_INDEX]], [[C]](s32) ; ILP32F-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s32) from %stack.0) ; ILP32F-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; ILP32F-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[FRAME_INDEX]], [[C1]](s32) + ; ILP32F-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = nuw inbounds G_PTR_ADD [[FRAME_INDEX]], [[C1]](s32) ; ILP32F-NEXT: [[LOAD2:%[0-9]+]]:_(s16) = G_LOAD [[PTR_ADD1]](p0) :: (load (s16) from %stack.0, align 8) ; ILP32F-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 - ; ILP32F-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[FRAME_INDEX]], [[C2]](s32) + ; ILP32F-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = nuw inbounds G_PTR_ADD [[FRAME_INDEX]], [[C2]](s32) ; ILP32F-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load (s32) from %stack.0) ; ILP32F-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[LOAD]], [[LOAD3]] ; ILP32F-NEXT: $x10 = COPY [[ADD]](s32) @@ -1460,13 +1460,13 @@ define i32 @caller_large_struct_ret2() nounwind { ; ILP32D-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $x2, implicit $x2 ; ILP32D-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX]](p0) :: (load (s32) from %stack.0, align 8) ; ILP32D-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; ILP32D-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[FRAME_INDEX]], [[C]](s32) + ; ILP32D-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = nuw inbounds G_PTR_ADD [[FRAME_INDEX]], [[C]](s32) ; ILP32D-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s32) from %stack.0) ; ILP32D-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; ILP32D-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[FRAME_INDEX]], [[C1]](s32) + ; ILP32D-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = nuw inbounds G_PTR_ADD [[FRAME_INDEX]], [[C1]](s32) ; ILP32D-NEXT: [[LOAD2:%[0-9]+]]:_(s16) = G_LOAD [[PTR_ADD1]](p0) :: (load (s16) from %stack.0, align 8) ; ILP32D-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 - ; ILP32D-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[FRAME_INDEX]], [[C2]](s32) + ; ILP32D-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = nuw inbounds G_PTR_ADD [[FRAME_INDEX]], [[C2]](s32) ; ILP32D-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load (s32) from %stack.0) ; ILP32D-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[LOAD]], [[LOAD3]] ; ILP32D-NEXT: $x10 = COPY [[ADD]](s32) diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/irtranslator/calling-conv-lp64-lp64f-lp64d-common.ll b/llvm/test/CodeGen/RISCV/GlobalISel/irtranslator/calling-conv-lp64-lp64f-lp64d-common.ll index 17c6e55..a297358 100644 --- a/llvm/test/CodeGen/RISCV/GlobalISel/irtranslator/calling-conv-lp64-lp64f-lp64d-common.ll +++ b/llvm/test/CodeGen/RISCV/GlobalISel/irtranslator/calling-conv-lp64-lp64f-lp64d-common.ll @@ -1075,14 +1075,14 @@ define void @callee_large_struct_ret(ptr noalias sret(%struct.large) %agg.result ; RV64I-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; RV64I-NEXT: G_STORE [[C]](s64), [[COPY]](p0) :: (store (s64) into %ir.agg.result, align 4) ; RV64I-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; RV64I-NEXT: %3:_(p0) = nuw nusw G_PTR_ADD [[COPY]], [[C4]](s64) - ; RV64I-NEXT: G_STORE [[C1]](s64), %3(p0) :: (store (s64) into %ir.b, align 4) + ; RV64I-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = nuw nusw inbounds G_PTR_ADD [[COPY]], [[C4]](s64) + ; RV64I-NEXT: G_STORE [[C1]](s64), [[PTR_ADD]](p0) :: (store (s64) into %ir.b, align 4) ; RV64I-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 - ; RV64I-NEXT: %6:_(p0) = nuw nusw G_PTR_ADD [[COPY]], [[C5]](s64) - ; RV64I-NEXT: G_STORE [[C2]](s64), %6(p0) :: (store (s64) into %ir.c, align 4) + ; RV64I-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = nuw nusw inbounds G_PTR_ADD [[COPY]], [[C5]](s64) + ; RV64I-NEXT: G_STORE [[C2]](s64), [[PTR_ADD1]](p0) :: (store (s64) into %ir.c, align 4) ; RV64I-NEXT: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 24 - ; RV64I-NEXT: %9:_(p0) = nuw nusw G_PTR_ADD [[COPY]], [[C6]](s64) - ; RV64I-NEXT: G_STORE [[C3]](s64), %9(p0) :: (store (s64) into %ir.d, align 4) + ; RV64I-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = nuw nusw inbounds G_PTR_ADD [[COPY]], [[C6]](s64) + ; RV64I-NEXT: G_STORE [[C3]](s64), [[PTR_ADD2]](p0) :: (store (s64) into %ir.d, align 4) ; RV64I-NEXT: PseudoRET store i64 1, ptr %agg.result, align 4 %b = getelementptr inbounds %struct.large, ptr %agg.result, i64 0, i32 1 @@ -1104,8 +1104,8 @@ define i64 @caller_large_struct_ret() nounwind { ; LP64-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $x2, implicit $x2 ; LP64-NEXT: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[FRAME_INDEX]](p0) :: (dereferenceable load (s64) from %ir.1) ; LP64-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 24 - ; LP64-NEXT: %3:_(p0) = nuw nusw G_PTR_ADD [[FRAME_INDEX]], [[C]](s64) - ; LP64-NEXT: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD %3(p0) :: (dereferenceable load (s64) from %ir.3) + ; LP64-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = nuw nusw inbounds G_PTR_ADD [[FRAME_INDEX]], [[C]](s64) + ; LP64-NEXT: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD]](p0) :: (dereferenceable load (s64) from %ir.3) ; LP64-NEXT: [[ADD:%[0-9]+]]:_(s64) = G_ADD [[LOAD]], [[LOAD1]] ; LP64-NEXT: $x10 = COPY [[ADD]](s64) ; LP64-NEXT: PseudoRET implicit $x10 @@ -1119,8 +1119,8 @@ define i64 @caller_large_struct_ret() nounwind { ; LP64F-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $x2, implicit $x2 ; LP64F-NEXT: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[FRAME_INDEX]](p0) :: (dereferenceable load (s64) from %ir.1) ; LP64F-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 24 - ; LP64F-NEXT: %3:_(p0) = nuw nusw G_PTR_ADD [[FRAME_INDEX]], [[C]](s64) - ; LP64F-NEXT: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD %3(p0) :: (dereferenceable load (s64) from %ir.3) + ; LP64F-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = nuw nusw inbounds G_PTR_ADD [[FRAME_INDEX]], [[C]](s64) + ; LP64F-NEXT: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD]](p0) :: (dereferenceable load (s64) from %ir.3) ; LP64F-NEXT: [[ADD:%[0-9]+]]:_(s64) = G_ADD [[LOAD]], [[LOAD1]] ; LP64F-NEXT: $x10 = COPY [[ADD]](s64) ; LP64F-NEXT: PseudoRET implicit $x10 @@ -1134,8 +1134,8 @@ define i64 @caller_large_struct_ret() nounwind { ; LP64D-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $x2, implicit $x2 ; LP64D-NEXT: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[FRAME_INDEX]](p0) :: (dereferenceable load (s64) from %ir.1) ; LP64D-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 24 - ; LP64D-NEXT: %3:_(p0) = nuw nusw G_PTR_ADD [[FRAME_INDEX]], [[C]](s64) - ; LP64D-NEXT: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD %3(p0) :: (dereferenceable load (s64) from %ir.3) + ; LP64D-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = nuw nusw inbounds G_PTR_ADD [[FRAME_INDEX]], [[C]](s64) + ; LP64D-NEXT: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD]](p0) :: (dereferenceable load (s64) from %ir.3) ; LP64D-NEXT: [[ADD:%[0-9]+]]:_(s64) = G_ADD [[LOAD]], [[LOAD1]] ; LP64D-NEXT: $x10 = COPY [[ADD]](s64) ; LP64D-NEXT: PseudoRET implicit $x10 @@ -1165,13 +1165,13 @@ define %struct.large2 @callee_large_struct_ret2() nounwind { ; RV64I-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; RV64I-NEXT: G_STORE [[C]](s64), [[COPY]](p0) :: (store (s64), align 16) ; RV64I-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 - ; RV64I-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C4]](s64) + ; RV64I-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = nuw inbounds G_PTR_ADD [[COPY]], [[C4]](s64) ; RV64I-NEXT: G_STORE [[C1]](s128), [[PTR_ADD]](p0) :: (store (s128)) ; RV64I-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 32 - ; RV64I-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C5]](s64) + ; RV64I-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = nuw inbounds G_PTR_ADD [[COPY]], [[C5]](s64) ; RV64I-NEXT: G_STORE [[C2]](s64), [[PTR_ADD1]](p0) :: (store (s64), align 16) ; RV64I-NEXT: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 40 - ; RV64I-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C6]](s64) + ; RV64I-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = nuw inbounds G_PTR_ADD [[COPY]], [[C6]](s64) ; RV64I-NEXT: G_STORE [[C3]](s64), [[PTR_ADD2]](p0) :: (store (s64)) ; RV64I-NEXT: PseudoRET %a = insertvalue %struct.large2 poison, i64 1, 0 @@ -1191,13 +1191,13 @@ define i64 @caller_large_struct_ret2() nounwind { ; LP64-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $x2, implicit $x2 ; LP64-NEXT: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[FRAME_INDEX]](p0) :: (load (s64) from %stack.0, align 16) ; LP64-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 - ; LP64-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[FRAME_INDEX]], [[C]](s64) + ; LP64-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = nuw inbounds G_PTR_ADD [[FRAME_INDEX]], [[C]](s64) ; LP64-NEXT: [[LOAD1:%[0-9]+]]:_(s128) = G_LOAD [[PTR_ADD]](p0) :: (load (s128) from %stack.0) ; LP64-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 32 - ; LP64-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[FRAME_INDEX]], [[C1]](s64) + ; LP64-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = nuw inbounds G_PTR_ADD [[FRAME_INDEX]], [[C1]](s64) ; LP64-NEXT: [[LOAD2:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD1]](p0) :: (load (s64) from %stack.0, align 16) ; LP64-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 40 - ; LP64-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[FRAME_INDEX]], [[C2]](s64) + ; LP64-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = nuw inbounds G_PTR_ADD [[FRAME_INDEX]], [[C2]](s64) ; LP64-NEXT: [[LOAD3:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD2]](p0) :: (load (s64) from %stack.0) ; LP64-NEXT: [[ADD:%[0-9]+]]:_(s64) = G_ADD [[LOAD]], [[LOAD3]] ; LP64-NEXT: $x10 = COPY [[ADD]](s64) @@ -1212,13 +1212,13 @@ define i64 @caller_large_struct_ret2() nounwind { ; LP64F-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $x2, implicit $x2 ; LP64F-NEXT: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[FRAME_INDEX]](p0) :: (load (s64) from %stack.0, align 16) ; LP64F-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 - ; LP64F-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[FRAME_INDEX]], [[C]](s64) + ; LP64F-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = nuw inbounds G_PTR_ADD [[FRAME_INDEX]], [[C]](s64) ; LP64F-NEXT: [[LOAD1:%[0-9]+]]:_(s128) = G_LOAD [[PTR_ADD]](p0) :: (load (s128) from %stack.0) ; LP64F-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 32 - ; LP64F-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[FRAME_INDEX]], [[C1]](s64) + ; LP64F-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = nuw inbounds G_PTR_ADD [[FRAME_INDEX]], [[C1]](s64) ; LP64F-NEXT: [[LOAD2:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD1]](p0) :: (load (s64) from %stack.0, align 16) ; LP64F-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 40 - ; LP64F-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[FRAME_INDEX]], [[C2]](s64) + ; LP64F-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = nuw inbounds G_PTR_ADD [[FRAME_INDEX]], [[C2]](s64) ; LP64F-NEXT: [[LOAD3:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD2]](p0) :: (load (s64) from %stack.0) ; LP64F-NEXT: [[ADD:%[0-9]+]]:_(s64) = G_ADD [[LOAD]], [[LOAD3]] ; LP64F-NEXT: $x10 = COPY [[ADD]](s64) @@ -1233,13 +1233,13 @@ define i64 @caller_large_struct_ret2() nounwind { ; LP64D-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $x2, implicit $x2 ; LP64D-NEXT: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[FRAME_INDEX]](p0) :: (load (s64) from %stack.0, align 16) ; LP64D-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 - ; LP64D-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[FRAME_INDEX]], [[C]](s64) + ; LP64D-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = nuw inbounds G_PTR_ADD [[FRAME_INDEX]], [[C]](s64) ; LP64D-NEXT: [[LOAD1:%[0-9]+]]:_(s128) = G_LOAD [[PTR_ADD]](p0) :: (load (s128) from %stack.0) ; LP64D-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 32 - ; LP64D-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[FRAME_INDEX]], [[C1]](s64) + ; LP64D-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = nuw inbounds G_PTR_ADD [[FRAME_INDEX]], [[C1]](s64) ; LP64D-NEXT: [[LOAD2:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD1]](p0) :: (load (s64) from %stack.0, align 16) ; LP64D-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 40 - ; LP64D-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[FRAME_INDEX]], [[C2]](s64) + ; LP64D-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = nuw inbounds G_PTR_ADD [[FRAME_INDEX]], [[C2]](s64) ; LP64D-NEXT: [[LOAD3:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD2]](p0) :: (load (s64) from %stack.0) ; LP64D-NEXT: [[ADD:%[0-9]+]]:_(s64) = G_ADD [[LOAD]], [[LOAD3]] ; LP64D-NEXT: $x10 = COPY [[ADD]](s64) diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/irtranslator/vararg.ll b/llvm/test/CodeGen/RISCV/GlobalISel/irtranslator/vararg.ll index 3b12ad5..e985d1f 100644 --- a/llvm/test/CodeGen/RISCV/GlobalISel/irtranslator/vararg.ll +++ b/llvm/test/CodeGen/RISCV/GlobalISel/irtranslator/vararg.ll @@ -67,7 +67,7 @@ define i32 @va1(ptr %fmt, ...) { ; RV32-NEXT: G_VASTART [[FRAME_INDEX1]](p0) :: (store (s32) into %ir.va) ; RV32-NEXT: [[LOAD:%[0-9]+]]:_(p0) = G_LOAD [[FRAME_INDEX1]](p0) :: (dereferenceable load (p0) from %ir.va) ; RV32-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; RV32-NEXT: %20:_(p0) = nuw nusw G_PTR_ADD [[LOAD]], [[C1]](s32) + ; RV32-NEXT: %20:_(p0) = nuw nusw inbounds G_PTR_ADD [[LOAD]], [[C1]](s32) ; RV32-NEXT: G_STORE %20(p0), [[FRAME_INDEX1]](p0) :: (store (p0) into %ir.va) ; RV32-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[LOAD]](p0) :: (load (s32) from %ir.argp.cur) ; RV32-NEXT: $x10 = COPY [[LOAD1]](s32) @@ -105,7 +105,7 @@ define i32 @va1(ptr %fmt, ...) { ; RV64-NEXT: G_VASTART [[FRAME_INDEX1]](p0) :: (store (s64) into %ir.va) ; RV64-NEXT: [[LOAD:%[0-9]+]]:_(p0) = G_LOAD [[FRAME_INDEX1]](p0) :: (dereferenceable load (p0) from %ir.va, align 4) ; RV64-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; RV64-NEXT: %20:_(p0) = nuw nusw G_PTR_ADD [[LOAD]], [[C1]](s64) + ; RV64-NEXT: %20:_(p0) = nuw nusw inbounds G_PTR_ADD [[LOAD]], [[C1]](s64) ; RV64-NEXT: G_STORE %20(p0), [[FRAME_INDEX1]](p0) :: (store (p0) into %ir.va, align 4) ; RV64-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[LOAD]](p0) :: (load (s32) from %ir.argp.cur) ; RV64-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD1]](s32) @@ -687,7 +687,7 @@ define i64 @va2(ptr %fmt, ...) nounwind { ; RV32-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[ADD]], [[C2]] ; RV32-NEXT: [[INTTOPTR:%[0-9]+]]:_(p0) = G_INTTOPTR [[ADD]](s32) ; RV32-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; RV32-NEXT: %25:_(p0) = nuw nusw G_PTR_ADD [[INTTOPTR]], [[C3]](s32) + ; RV32-NEXT: %25:_(p0) = nuw nusw inbounds G_PTR_ADD [[INTTOPTR]], [[C3]](s32) ; RV32-NEXT: G_STORE %25(p0), [[FRAME_INDEX1]](p0) :: (store (p0) into %ir.va) ; RV32-NEXT: [[INTTOPTR1:%[0-9]+]]:_(p0) = G_INTTOPTR [[AND]](s32) ; RV32-NEXT: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[INTTOPTR1]](p0) :: (load (s64) from %ir.3) @@ -733,7 +733,7 @@ define i64 @va2(ptr %fmt, ...) nounwind { ; RV64-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[ADD]], [[C2]] ; RV64-NEXT: [[INTTOPTR:%[0-9]+]]:_(p0) = G_INTTOPTR [[ADD]](s32) ; RV64-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; RV64-NEXT: %25:_(p0) = nuw nusw G_PTR_ADD [[INTTOPTR]], [[C3]](s64) + ; RV64-NEXT: %25:_(p0) = nuw nusw inbounds G_PTR_ADD [[INTTOPTR]], [[C3]](s64) ; RV64-NEXT: G_STORE %25(p0), [[FRAME_INDEX1]](p0) :: (store (p0) into %ir.va, align 4) ; RV64-NEXT: [[INTTOPTR1:%[0-9]+]]:_(p0) = G_INTTOPTR [[AND]](s32) ; RV64-NEXT: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[INTTOPTR1]](p0) :: (load (s64) from %ir.3) @@ -974,7 +974,7 @@ define i64 @va3(i32 %a, i64 %b, ...) nounwind { ; RV32-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[ADD]], [[C2]] ; RV32-NEXT: [[INTTOPTR:%[0-9]+]]:_(p0) = G_INTTOPTR [[ADD]](s32) ; RV32-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; RV32-NEXT: %24:_(p0) = nuw nusw G_PTR_ADD [[INTTOPTR]], [[C3]](s32) + ; RV32-NEXT: %24:_(p0) = nuw nusw inbounds G_PTR_ADD [[INTTOPTR]], [[C3]](s32) ; RV32-NEXT: G_STORE %24(p0), [[FRAME_INDEX1]](p0) :: (store (p0) into %ir.va) ; RV32-NEXT: [[INTTOPTR1:%[0-9]+]]:_(p0) = G_INTTOPTR [[AND]](s32) ; RV32-NEXT: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[INTTOPTR1]](p0) :: (load (s64) from %ir.3) @@ -1020,7 +1020,7 @@ define i64 @va3(i32 %a, i64 %b, ...) nounwind { ; RV64-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[ADD]], [[C2]] ; RV64-NEXT: [[INTTOPTR:%[0-9]+]]:_(p0) = G_INTTOPTR [[ADD]](s32) ; RV64-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; RV64-NEXT: %25:_(p0) = nuw nusw G_PTR_ADD [[INTTOPTR]], [[C3]](s64) + ; RV64-NEXT: %25:_(p0) = nuw nusw inbounds G_PTR_ADD [[INTTOPTR]], [[C3]](s64) ; RV64-NEXT: G_STORE %25(p0), [[FRAME_INDEX1]](p0) :: (store (p0) into %ir.va, align 4) ; RV64-NEXT: [[INTTOPTR1:%[0-9]+]]:_(p0) = G_INTTOPTR [[AND]](s32) ; RV64-NEXT: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[INTTOPTR1]](p0) :: (load (s64) from %ir.3) @@ -1724,7 +1724,7 @@ define i32 @va_large_stack(ptr %fmt, ...) { ; RV32-NEXT: G_VASTART [[FRAME_INDEX2]](p0) :: (store (s32) into %ir.va) ; RV32-NEXT: [[LOAD:%[0-9]+]]:_(p0) = G_LOAD [[FRAME_INDEX2]](p0) :: (dereferenceable load (p0) from %ir.va) ; RV32-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; RV32-NEXT: %21:_(p0) = nuw nusw G_PTR_ADD [[LOAD]], [[C1]](s32) + ; RV32-NEXT: %21:_(p0) = nuw nusw inbounds G_PTR_ADD [[LOAD]], [[C1]](s32) ; RV32-NEXT: G_STORE %21(p0), [[FRAME_INDEX2]](p0) :: (store (p0) into %ir.va) ; RV32-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[LOAD]](p0) :: (load (s32) from %ir.argp.cur) ; RV32-NEXT: $x10 = COPY [[LOAD1]](s32) @@ -1763,7 +1763,7 @@ define i32 @va_large_stack(ptr %fmt, ...) { ; RV64-NEXT: G_VASTART [[FRAME_INDEX2]](p0) :: (store (s64) into %ir.va) ; RV64-NEXT: [[LOAD:%[0-9]+]]:_(p0) = G_LOAD [[FRAME_INDEX2]](p0) :: (dereferenceable load (p0) from %ir.va, align 4) ; RV64-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; RV64-NEXT: %21:_(p0) = nuw nusw G_PTR_ADD [[LOAD]], [[C1]](s64) + ; RV64-NEXT: %21:_(p0) = nuw nusw inbounds G_PTR_ADD [[LOAD]], [[C1]](s64) ; RV64-NEXT: G_STORE %21(p0), [[FRAME_INDEX2]](p0) :: (store (p0) into %ir.va, align 4) ; RV64-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[LOAD]](p0) :: (load (s32) from %ir.argp.cur) ; RV64-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD1]](s32) diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/legalize-icmp-rv32.mir b/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/legalize-icmp-rv32.mir index 8081cfb..e93f82a 100644 --- a/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/legalize-icmp-rv32.mir +++ b/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/legalize-icmp-rv32.mir @@ -1545,21 +1545,21 @@ body: | ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $x10 ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s32), align 8) ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s32) + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = nuw inbounds G_PTR_ADD [[COPY]], [[C]](s32) ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s32) from unknown-address + 4) ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; CHECK-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](s32) + ; CHECK-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = nuw inbounds G_PTR_ADD [[COPY]], [[C1]](s32) ; CHECK-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load (s32) from unknown-address + 8, align 8) ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 - ; CHECK-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s32) + ; CHECK-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = nuw inbounds G_PTR_ADD [[COPY]], [[C2]](s32) ; CHECK-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load (s32) from unknown-address + 12) ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(p0) = COPY $x11 ; CHECK-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[COPY1]](p0) :: (load (s32), align 8) - ; CHECK-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY1]], [[C]](s32) + ; CHECK-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p0) = nuw inbounds G_PTR_ADD [[COPY1]], [[C]](s32) ; CHECK-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p0) :: (load (s32) from unknown-address + 4) - ; CHECK-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY1]], [[C1]](s32) + ; CHECK-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p0) = nuw inbounds G_PTR_ADD [[COPY1]], [[C1]](s32) ; CHECK-NEXT: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p0) :: (load (s32) from unknown-address + 8, align 8) - ; CHECK-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY1]], [[C2]](s32) + ; CHECK-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p0) = nuw inbounds G_PTR_ADD [[COPY1]], [[C2]](s32) ; CHECK-NEXT: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p0) :: (load (s32) from unknown-address + 12) ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(ult), [[LOAD]](s32), [[LOAD4]] ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:_(s32) = G_ICMP intpred(ult), [[LOAD1]](s32), [[LOAD5]] diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/legalize-load-rv32.mir b/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/legalize-load-rv32.mir index 93b145c..9d2b6c1 100644 --- a/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/legalize-load-rv32.mir +++ b/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/legalize-load-rv32.mir @@ -147,7 +147,7 @@ body: | ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $x10 ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s32), align 8) ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s32) + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = nuw inbounds G_PTR_ADD [[COPY]], [[C]](s32) ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s32) from unknown-address + 4) ; CHECK-NEXT: $x10 = COPY [[LOAD]](s32) ; CHECK-NEXT: $x11 = COPY [[LOAD1]](s32) @@ -159,7 +159,7 @@ body: | ; UNALIGNED-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $x10 ; UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s32), align 8) ; UNALIGNED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; UNALIGNED-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s32) + ; UNALIGNED-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = nuw inbounds G_PTR_ADD [[COPY]], [[C]](s32) ; UNALIGNED-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s32) from unknown-address + 4) ; UNALIGNED-NEXT: $x10 = COPY [[LOAD]](s32) ; UNALIGNED-NEXT: $x11 = COPY [[LOAD1]](s32) @@ -232,7 +232,7 @@ body: | ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $x10 ; CHECK-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p0) :: (load (s8)) ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s32) + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = nuw inbounds G_PTR_ADD [[COPY]], [[C]](s32) ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s16) = G_LOAD [[PTR_ADD]](p0) :: (load (s8) from unknown-address + 1) ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[LOAD]](s16) @@ -278,15 +278,15 @@ body: | ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $x10 ; CHECK-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p0) :: (load (s8)) ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s32) + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = nuw inbounds G_PTR_ADD [[COPY]], [[C]](s32) ; CHECK-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p0) :: (load (s8) from unknown-address + 1) ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; CHECK-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s32) + ; CHECK-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = nuw inbounds G_PTR_ADD [[COPY]], [[C2]](s32) ; CHECK-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p0) :: (load (s8) from unknown-address + 2) - ; CHECK-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) + ; CHECK-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = nuw inbounds G_PTR_ADD [[PTR_ADD1]], [[C]](s32) ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load (s8) from unknown-address + 3) ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD2]] @@ -331,7 +331,7 @@ body: | ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $x10 ; CHECK-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p0) :: (load (s16)) ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s32) + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = nuw inbounds G_PTR_ADD [[COPY]], [[C]](s32) ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s16) from unknown-address + 2) ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) @@ -376,15 +376,15 @@ body: | ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $x10 ; CHECK-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p0) :: (load (s8)) ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s32) + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = nuw inbounds G_PTR_ADD [[COPY]], [[C]](s32) ; CHECK-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p0) :: (load (s8) from unknown-address + 1) ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; CHECK-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s32) + ; CHECK-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = nuw inbounds G_PTR_ADD [[COPY]], [[C2]](s32) ; CHECK-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p0) :: (load (s8) from unknown-address + 2) - ; CHECK-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) + ; CHECK-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = nuw inbounds G_PTR_ADD [[PTR_ADD1]], [[C]](s32) ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load (s8) from unknown-address + 3) ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD2]] @@ -392,15 +392,15 @@ body: | ; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) ; CHECK-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; CHECK-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C4]](s32) + ; CHECK-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p0) = nuw inbounds G_PTR_ADD [[COPY]], [[C4]](s32) ; CHECK-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p0) :: (load (s8) from unknown-address + 4) - ; CHECK-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) + ; CHECK-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p0) = nuw inbounds G_PTR_ADD [[PTR_ADD3]], [[C]](s32) ; CHECK-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD4]](p0) :: (load (s8) from unknown-address + 5) ; CHECK-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD4]], [[C1]](s32) ; CHECK-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD3]] - ; CHECK-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32) + ; CHECK-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p0) = nuw inbounds G_PTR_ADD [[PTR_ADD3]], [[C2]](s32) ; CHECK-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD5]](p0) :: (load (s8) from unknown-address + 6) - ; CHECK-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD5]], [[C]](s32) + ; CHECK-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p0) = nuw inbounds G_PTR_ADD [[PTR_ADD5]], [[C]](s32) ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p0) :: (load (s8) from unknown-address + 7) ; CHECK-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) ; CHECK-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[SHL4]], [[ZEXTLOAD5]] @@ -416,7 +416,7 @@ body: | ; UNALIGNED-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $x10 ; UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s32), align 1) ; UNALIGNED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; UNALIGNED-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s32) + ; UNALIGNED-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = nuw inbounds G_PTR_ADD [[COPY]], [[C]](s32) ; UNALIGNED-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s32) from unknown-address + 4, align 1) ; UNALIGNED-NEXT: $x10 = COPY [[LOAD]](s32) ; UNALIGNED-NEXT: $x11 = COPY [[LOAD1]](s32) diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/legalize-load-rv64.mir b/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/legalize-load-rv64.mir index d85d2c5..06e84fd 100644 --- a/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/legalize-load-rv64.mir +++ b/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/legalize-load-rv64.mir @@ -188,7 +188,7 @@ body: | ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $x10 ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p0) :: (load (s64)) ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = nuw inbounds G_PTR_ADD [[COPY]], [[C]](s64) ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD]](p0) :: (load (s64) from unknown-address + 8) ; CHECK-NEXT: $x10 = COPY [[LOAD]](s64) ; CHECK-NEXT: $x11 = COPY [[LOAD1]](s64) @@ -200,7 +200,7 @@ body: | ; UNALIGNED-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $x10 ; UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p0) :: (load (s64)) ; UNALIGNED-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; UNALIGNED-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) + ; UNALIGNED-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = nuw inbounds G_PTR_ADD [[COPY]], [[C]](s64) ; UNALIGNED-NEXT: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD]](p0) :: (load (s64) from unknown-address + 8) ; UNALIGNED-NEXT: $x10 = COPY [[LOAD]](s64) ; UNALIGNED-NEXT: $x11 = COPY [[LOAD1]](s64) @@ -273,7 +273,7 @@ body: | ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $x10 ; CHECK-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s64) = G_ZEXTLOAD [[COPY]](p0) :: (load (s8)) ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = nuw inbounds G_PTR_ADD [[COPY]], [[C]](s64) ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s16) = G_LOAD [[PTR_ADD]](p0) :: (load (s8) from unknown-address + 1) ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s16) @@ -320,15 +320,15 @@ body: | ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $x10 ; CHECK-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s64) = G_ZEXTLOAD [[COPY]](p0) :: (load (s8)) ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = nuw inbounds G_PTR_ADD [[COPY]], [[C]](s64) ; CHECK-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s64) = G_ZEXTLOAD [[PTR_ADD]](p0) :: (load (s8) from unknown-address + 1) ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[ZEXTLOAD1]], [[C1]](s64) ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s64) = G_OR [[SHL]], [[ZEXTLOAD]] ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; CHECK-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64) + ; CHECK-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = nuw inbounds G_PTR_ADD [[COPY]], [[C2]](s64) ; CHECK-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s64) = G_ZEXTLOAD [[PTR_ADD1]](p0) :: (load (s8) from unknown-address + 2) - ; CHECK-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64) + ; CHECK-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = nuw inbounds G_PTR_ADD [[PTR_ADD1]], [[C]](s64) ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load (s8) from unknown-address + 3) ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT]], [[C1]](s64) @@ -377,7 +377,7 @@ body: | ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $x10 ; CHECK-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s64) = G_ZEXTLOAD [[COPY]](p0) :: (load (s16)) ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = nuw inbounds G_PTR_ADD [[COPY]], [[C]](s64) ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s16) from unknown-address + 2) ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) @@ -423,15 +423,15 @@ body: | ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $x10 ; CHECK-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s64) = G_ZEXTLOAD [[COPY]](p0) :: (load (s8)) ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = nuw inbounds G_PTR_ADD [[COPY]], [[C]](s64) ; CHECK-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s64) = G_ZEXTLOAD [[PTR_ADD]](p0) :: (load (s8) from unknown-address + 1) ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[ZEXTLOAD1]], [[C1]](s64) ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s64) = G_OR [[SHL]], [[ZEXTLOAD]] ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; CHECK-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64) + ; CHECK-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = nuw inbounds G_PTR_ADD [[COPY]], [[C2]](s64) ; CHECK-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s64) = G_ZEXTLOAD [[PTR_ADD1]](p0) :: (load (s8) from unknown-address + 2) - ; CHECK-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64) + ; CHECK-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = nuw inbounds G_PTR_ADD [[PTR_ADD1]], [[C]](s64) ; CHECK-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s64) = G_ZEXTLOAD [[PTR_ADD2]](p0) :: (load (s8) from unknown-address + 3) ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s64) = G_SHL [[ZEXTLOAD3]], [[C1]](s64) ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s64) = G_OR [[SHL1]], [[ZEXTLOAD2]] @@ -439,15 +439,15 @@ body: | ; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(s64) = G_SHL [[OR1]], [[C3]](s64) ; CHECK-NEXT: [[OR2:%[0-9]+]]:_(s64) = G_OR [[SHL2]], [[OR]] ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; CHECK-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C4]](s64) + ; CHECK-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p0) = nuw inbounds G_PTR_ADD [[COPY]], [[C4]](s64) ; CHECK-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(s64) = G_ZEXTLOAD [[PTR_ADD3]](p0) :: (load (s8) from unknown-address + 4) - ; CHECK-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64) + ; CHECK-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p0) = nuw inbounds G_PTR_ADD [[PTR_ADD3]], [[C]](s64) ; CHECK-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(s64) = G_ZEXTLOAD [[PTR_ADD4]](p0) :: (load (s8) from unknown-address + 5) ; CHECK-NEXT: [[SHL3:%[0-9]+]]:_(s64) = G_SHL [[ZEXTLOAD5]], [[C1]](s64) ; CHECK-NEXT: [[OR3:%[0-9]+]]:_(s64) = G_OR [[SHL3]], [[ZEXTLOAD4]] - ; CHECK-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s64) + ; CHECK-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p0) = nuw inbounds G_PTR_ADD [[PTR_ADD3]], [[C2]](s64) ; CHECK-NEXT: [[ZEXTLOAD6:%[0-9]+]]:_(s64) = G_ZEXTLOAD [[PTR_ADD5]](p0) :: (load (s8) from unknown-address + 6) - ; CHECK-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD5]], [[C]](s64) + ; CHECK-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p0) = nuw inbounds G_PTR_ADD [[PTR_ADD5]], [[C]](s64) ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD6]](p0) :: (load (s8) from unknown-address + 7) ; CHECK-NEXT: [[SHL4:%[0-9]+]]:_(s64) = G_SHL [[LOAD]], [[C1]](s64) ; CHECK-NEXT: [[OR4:%[0-9]+]]:_(s64) = G_OR [[SHL4]], [[ZEXTLOAD6]] @@ -494,15 +494,15 @@ body: | ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $x10 ; CHECK-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s64) = G_ZEXTLOAD [[COPY]](p0) :: (load (s16)) ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = nuw inbounds G_PTR_ADD [[COPY]], [[C]](s64) ; CHECK-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s64) = G_ZEXTLOAD [[PTR_ADD]](p0) :: (load (s16) from unknown-address + 2) ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[ZEXTLOAD1]], [[C1]](s64) ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s64) = G_OR [[SHL]], [[ZEXTLOAD]] ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; CHECK-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64) + ; CHECK-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = nuw inbounds G_PTR_ADD [[COPY]], [[C2]](s64) ; CHECK-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s64) = G_ZEXTLOAD [[PTR_ADD1]](p0) :: (load (s16) from unknown-address + 4) - ; CHECK-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64) + ; CHECK-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = nuw inbounds G_PTR_ADD [[PTR_ADD1]], [[C]](s64) ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD2]](p0) :: (load (s16) from unknown-address + 6) ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s64) = G_SHL [[LOAD]], [[C1]](s64) ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s64) = G_OR [[SHL1]], [[ZEXTLOAD2]] @@ -549,15 +549,15 @@ body: | ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $x10 ; CHECK-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s64) = G_ZEXTLOAD [[COPY]](p0) :: (load (s8)) ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = nuw inbounds G_PTR_ADD [[COPY]], [[C]](s64) ; CHECK-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s64) = G_ZEXTLOAD [[PTR_ADD]](p0) :: (load (s8) from unknown-address + 1) ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[ZEXTLOAD1]], [[C1]](s64) ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s64) = G_OR [[SHL]], [[ZEXTLOAD]] ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; CHECK-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64) + ; CHECK-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = nuw inbounds G_PTR_ADD [[COPY]], [[C2]](s64) ; CHECK-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s64) = G_ZEXTLOAD [[PTR_ADD1]](p0) :: (load (s8) from unknown-address + 2) - ; CHECK-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64) + ; CHECK-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = nuw inbounds G_PTR_ADD [[PTR_ADD1]], [[C]](s64) ; CHECK-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s64) = G_ZEXTLOAD [[PTR_ADD2]](p0) :: (load (s8) from unknown-address + 3) ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s64) = G_SHL [[ZEXTLOAD3]], [[C1]](s64) ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s64) = G_OR [[SHL1]], [[ZEXTLOAD2]] @@ -565,15 +565,15 @@ body: | ; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(s64) = G_SHL [[OR1]], [[C3]](s64) ; CHECK-NEXT: [[OR2:%[0-9]+]]:_(s64) = G_OR [[SHL2]], [[OR]] ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; CHECK-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C4]](s64) + ; CHECK-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p0) = nuw inbounds G_PTR_ADD [[COPY]], [[C4]](s64) ; CHECK-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(s64) = G_ZEXTLOAD [[PTR_ADD3]](p0) :: (load (s8) from unknown-address + 4) - ; CHECK-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64) + ; CHECK-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p0) = nuw inbounds G_PTR_ADD [[PTR_ADD3]], [[C]](s64) ; CHECK-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(s64) = G_ZEXTLOAD [[PTR_ADD4]](p0) :: (load (s8) from unknown-address + 5) ; CHECK-NEXT: [[SHL3:%[0-9]+]]:_(s64) = G_SHL [[ZEXTLOAD5]], [[C1]](s64) ; CHECK-NEXT: [[OR3:%[0-9]+]]:_(s64) = G_OR [[SHL3]], [[ZEXTLOAD4]] - ; CHECK-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s64) + ; CHECK-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p0) = nuw inbounds G_PTR_ADD [[PTR_ADD3]], [[C2]](s64) ; CHECK-NEXT: [[ZEXTLOAD6:%[0-9]+]]:_(s64) = G_ZEXTLOAD [[PTR_ADD5]](p0) :: (load (s8) from unknown-address + 6) - ; CHECK-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD5]], [[C]](s64) + ; CHECK-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p0) = nuw inbounds G_PTR_ADD [[PTR_ADD5]], [[C]](s64) ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD6]](p0) :: (load (s8) from unknown-address + 7) ; CHECK-NEXT: [[SHL4:%[0-9]+]]:_(s64) = G_SHL [[LOAD]], [[C1]](s64) ; CHECK-NEXT: [[OR4:%[0-9]+]]:_(s64) = G_OR [[SHL4]], [[ZEXTLOAD6]] @@ -582,29 +582,29 @@ body: | ; CHECK-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 32 ; CHECK-NEXT: [[SHL6:%[0-9]+]]:_(s64) = G_SHL [[OR5]], [[C5]](s64) ; CHECK-NEXT: [[OR6:%[0-9]+]]:_(s64) = G_OR [[SHL6]], [[OR2]] - ; CHECK-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](s64) + ; CHECK-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p0) = nuw inbounds G_PTR_ADD [[COPY]], [[C1]](s64) ; CHECK-NEXT: [[ZEXTLOAD7:%[0-9]+]]:_(s64) = G_ZEXTLOAD [[PTR_ADD7]](p0) :: (load (s8) from unknown-address + 8) - ; CHECK-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD7]], [[C]](s64) + ; CHECK-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p0) = nuw inbounds G_PTR_ADD [[PTR_ADD7]], [[C]](s64) ; CHECK-NEXT: [[ZEXTLOAD8:%[0-9]+]]:_(s64) = G_ZEXTLOAD [[PTR_ADD8]](p0) :: (load (s8) from unknown-address + 9) ; CHECK-NEXT: [[SHL7:%[0-9]+]]:_(s64) = G_SHL [[ZEXTLOAD8]], [[C1]](s64) ; CHECK-NEXT: [[OR7:%[0-9]+]]:_(s64) = G_OR [[SHL7]], [[ZEXTLOAD7]] - ; CHECK-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s64) + ; CHECK-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p0) = nuw inbounds G_PTR_ADD [[PTR_ADD7]], [[C2]](s64) ; CHECK-NEXT: [[ZEXTLOAD9:%[0-9]+]]:_(s64) = G_ZEXTLOAD [[PTR_ADD9]](p0) :: (load (s8) from unknown-address + 10) - ; CHECK-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD9]], [[C]](s64) + ; CHECK-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p0) = nuw inbounds G_PTR_ADD [[PTR_ADD9]], [[C]](s64) ; CHECK-NEXT: [[ZEXTLOAD10:%[0-9]+]]:_(s64) = G_ZEXTLOAD [[PTR_ADD10]](p0) :: (load (s8) from unknown-address + 11) ; CHECK-NEXT: [[SHL8:%[0-9]+]]:_(s64) = G_SHL [[ZEXTLOAD10]], [[C1]](s64) ; CHECK-NEXT: [[OR8:%[0-9]+]]:_(s64) = G_OR [[SHL8]], [[ZEXTLOAD9]] ; CHECK-NEXT: [[SHL9:%[0-9]+]]:_(s64) = G_SHL [[OR8]], [[C3]](s64) ; CHECK-NEXT: [[OR9:%[0-9]+]]:_(s64) = G_OR [[SHL9]], [[OR7]] - ; CHECK-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD7]], [[C4]](s64) + ; CHECK-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p0) = nuw inbounds G_PTR_ADD [[PTR_ADD7]], [[C4]](s64) ; CHECK-NEXT: [[ZEXTLOAD11:%[0-9]+]]:_(s64) = G_ZEXTLOAD [[PTR_ADD11]](p0) :: (load (s8) from unknown-address + 12) - ; CHECK-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD11]], [[C]](s64) + ; CHECK-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p0) = nuw inbounds G_PTR_ADD [[PTR_ADD11]], [[C]](s64) ; CHECK-NEXT: [[ZEXTLOAD12:%[0-9]+]]:_(s64) = G_ZEXTLOAD [[PTR_ADD12]](p0) :: (load (s8) from unknown-address + 13) ; CHECK-NEXT: [[SHL10:%[0-9]+]]:_(s64) = G_SHL [[ZEXTLOAD12]], [[C1]](s64) ; CHECK-NEXT: [[OR10:%[0-9]+]]:_(s64) = G_OR [[SHL10]], [[ZEXTLOAD11]] - ; CHECK-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD11]], [[C2]](s64) + ; CHECK-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p0) = nuw inbounds G_PTR_ADD [[PTR_ADD11]], [[C2]](s64) ; CHECK-NEXT: [[ZEXTLOAD13:%[0-9]+]]:_(s64) = G_ZEXTLOAD [[PTR_ADD13]](p0) :: (load (s8) from unknown-address + 14) - ; CHECK-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD13]], [[C]](s64) + ; CHECK-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p0) = nuw inbounds G_PTR_ADD [[PTR_ADD13]], [[C]](s64) ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD14]](p0) :: (load (s8) from unknown-address + 15) ; CHECK-NEXT: [[SHL11:%[0-9]+]]:_(s64) = G_SHL [[LOAD1]], [[C1]](s64) ; CHECK-NEXT: [[OR11:%[0-9]+]]:_(s64) = G_OR [[SHL11]], [[ZEXTLOAD13]] @@ -622,7 +622,7 @@ body: | ; UNALIGNED-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $x10 ; UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p0) :: (load (s64), align 1) ; UNALIGNED-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; UNALIGNED-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) + ; UNALIGNED-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = nuw inbounds G_PTR_ADD [[COPY]], [[C]](s64) ; UNALIGNED-NEXT: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD]](p0) :: (load (s64) from unknown-address + 8, align 1) ; UNALIGNED-NEXT: $x10 = COPY [[LOAD]](s64) ; UNALIGNED-NEXT: $x11 = COPY [[LOAD1]](s64) diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/legalize-store-rv32.mir b/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/legalize-store-rv32.mir index 5a7a042..cb5db22 100644 --- a/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/legalize-store-rv32.mir +++ b/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/legalize-store-rv32.mir @@ -149,7 +149,7 @@ body: | ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(p0) = COPY $x12 ; CHECK-NEXT: G_STORE [[COPY]](s32), [[COPY2]](p0) :: (store (s32), align 8) ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY2]], [[C]](s32) + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = nuw inbounds G_PTR_ADD [[COPY2]], [[C]](s32) ; CHECK-NEXT: G_STORE [[COPY1]](s32), [[PTR_ADD]](p0) :: (store (s32) into unknown-address + 4) ; CHECK-NEXT: PseudoRET ; @@ -161,7 +161,7 @@ body: | ; UNALIGNED-NEXT: [[COPY2:%[0-9]+]]:_(p0) = COPY $x12 ; UNALIGNED-NEXT: G_STORE [[COPY]](s32), [[COPY2]](p0) :: (store (s32), align 8) ; UNALIGNED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; UNALIGNED-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY2]], [[C]](s32) + ; UNALIGNED-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = nuw inbounds G_PTR_ADD [[COPY2]], [[C]](s32) ; UNALIGNED-NEXT: G_STORE [[COPY1]](s32), [[PTR_ADD]](p0) :: (store (s32) into unknown-address + 4) ; UNALIGNED-NEXT: PseudoRET %2:_(s32) = COPY $x10 @@ -239,7 +239,7 @@ body: | ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[AND]], [[C]](s32) ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY1]], [[C2]](s32) + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = nuw inbounds G_PTR_ADD [[COPY1]], [[C2]](s32) ; CHECK-NEXT: G_STORE [[COPY2]](s16), [[COPY1]](p0) :: (store (s8)) ; CHECK-NEXT: G_STORE [[TRUNC1]](s16), [[PTR_ADD]](p0) :: (store (s8) into unknown-address + 1) ; CHECK-NEXT: PseudoRET @@ -284,7 +284,7 @@ body: | ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY2]], [[C]](s32) ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY1]], [[C1]](s32) + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = nuw inbounds G_PTR_ADD [[COPY1]], [[C1]](s32) ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32) ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 @@ -292,14 +292,14 @@ body: | ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[AND]], [[C2]](s32) ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY1]], [[C4]](s32) + ; CHECK-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = nuw inbounds G_PTR_ADD [[COPY1]], [[C4]](s32) ; CHECK-NEXT: G_STORE [[TRUNC]](s16), [[COPY1]](p0) :: (store (s8)) ; CHECK-NEXT: G_STORE [[TRUNC1]](s16), [[PTR_ADD1]](p0) :: (store (s8) into unknown-address + 1) ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) ; CHECK-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 ; CHECK-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[LSHR]], [[C5]](s32) ; CHECK-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) - ; CHECK-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD]], [[C4]](s32) + ; CHECK-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = nuw inbounds G_PTR_ADD [[PTR_ADD]], [[C4]](s32) ; CHECK-NEXT: G_STORE [[TRUNC2]](s16), [[PTR_ADD]](p0) :: (store (s8) into unknown-address + 2) ; CHECK-NEXT: G_STORE [[TRUNC3]](s16), [[PTR_ADD2]](p0) :: (store (s8) into unknown-address + 3) ; CHECK-NEXT: PseudoRET @@ -342,7 +342,7 @@ body: | ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY2]], [[C]](s32) ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY1]], [[C1]](s32) + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = nuw inbounds G_PTR_ADD [[COPY1]], [[C1]](s32) ; CHECK-NEXT: G_STORE [[COPY2]](s32), [[COPY1]](p0) :: (store (s16)) ; CHECK-NEXT: G_STORE [[LSHR]](s32), [[PTR_ADD]](p0) :: (store (s16) into unknown-address + 2) ; CHECK-NEXT: PseudoRET diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/legalize-store-rv64.mir b/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/legalize-store-rv64.mir index 8704dde..7c1ede0 100644 --- a/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/legalize-store-rv64.mir +++ b/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/legalize-store-rv64.mir @@ -268,7 +268,7 @@ body: | ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[AND]], [[C]](s64) ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s64) ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY1]], [[C2]](s64) + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = nuw inbounds G_PTR_ADD [[COPY1]], [[C2]](s64) ; CHECK-NEXT: G_STORE [[COPY2]](s16), [[COPY1]](p0) :: (store (s8)) ; CHECK-NEXT: G_STORE [[TRUNC1]](s16), [[PTR_ADD]](p0) :: (store (s8) into unknown-address + 1) ; CHECK-NEXT: PseudoRET @@ -315,7 +315,7 @@ body: | ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s64) = G_AND [[COPY]], [[C1]] ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[AND]], [[C]](s64) ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY1]], [[C2]](s64) + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = nuw inbounds G_PTR_ADD [[COPY1]], [[C2]](s64) ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s64) ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 65535 @@ -323,7 +323,7 @@ body: | ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s64) = G_LSHR [[AND1]], [[C3]](s64) ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s64) ; CHECK-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; CHECK-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY1]], [[C5]](s64) + ; CHECK-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = nuw inbounds G_PTR_ADD [[COPY1]], [[C5]](s64) ; CHECK-NEXT: G_STORE [[TRUNC]](s16), [[COPY1]](p0) :: (store (s8)) ; CHECK-NEXT: G_STORE [[TRUNC1]](s16), [[PTR_ADD1]](p0) :: (store (s8) into unknown-address + 1) ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s64) @@ -331,7 +331,7 @@ body: | ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s64) = G_AND [[LSHR]], [[C4]] ; CHECK-NEXT: [[LSHR2:%[0-9]+]]:_(s64) = G_LSHR [[AND2]], [[C6]](s64) ; CHECK-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s64) - ; CHECK-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD]], [[C5]](s64) + ; CHECK-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = nuw inbounds G_PTR_ADD [[PTR_ADD]], [[C5]](s64) ; CHECK-NEXT: G_STORE [[TRUNC2]](s16), [[PTR_ADD]](p0) :: (store (s8) into unknown-address + 2) ; CHECK-NEXT: G_STORE [[TRUNC3]](s16), [[PTR_ADD2]](p0) :: (store (s8) into unknown-address + 3) ; CHECK-NEXT: PseudoRET @@ -381,7 +381,7 @@ body: | ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[AND]], [[C]](s64) ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC [[LSHR]](s64) ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY1]], [[C2]](s64) + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = nuw inbounds G_PTR_ADD [[COPY1]], [[C2]](s64) ; CHECK-NEXT: G_STORE [[COPY2]](s32), [[COPY1]](p0) :: (store (s16)) ; CHECK-NEXT: G_STORE [[TRUNC1]](s32), [[PTR_ADD]](p0) :: (store (s16) into unknown-address + 2) ; CHECK-NEXT: PseudoRET @@ -426,7 +426,7 @@ body: | ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 32 ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[COPY2]], [[C]](s64) ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY1]], [[C1]](s64) + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = nuw inbounds G_PTR_ADD [[COPY1]], [[C1]](s64) ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY2]](s64) ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 4294967295 @@ -434,14 +434,14 @@ body: | ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s64) = G_LSHR [[AND]], [[C2]](s64) ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC [[LSHR1]](s64) ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; CHECK-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY1]], [[C4]](s64) + ; CHECK-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = nuw inbounds G_PTR_ADD [[COPY1]], [[C4]](s64) ; CHECK-NEXT: G_STORE [[TRUNC]](s32), [[COPY1]](p0) :: (store (s16)) ; CHECK-NEXT: G_STORE [[TRUNC1]](s32), [[PTR_ADD1]](p0) :: (store (s16) into unknown-address + 2) ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(s32) = G_TRUNC [[LSHR]](s64) ; CHECK-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; CHECK-NEXT: [[LSHR2:%[0-9]+]]:_(s64) = G_LSHR [[LSHR]], [[C5]](s64) ; CHECK-NEXT: [[TRUNC3:%[0-9]+]]:_(s32) = G_TRUNC [[LSHR2]](s64) - ; CHECK-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD]], [[C4]](s64) + ; CHECK-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = nuw inbounds G_PTR_ADD [[PTR_ADD]], [[C4]](s64) ; CHECK-NEXT: G_STORE [[TRUNC2]](s32), [[PTR_ADD]](p0) :: (store (s16) into unknown-address + 4) ; CHECK-NEXT: G_STORE [[TRUNC3]](s32), [[PTR_ADD2]](p0) :: (store (s16) into unknown-address + 6) ; CHECK-NEXT: PseudoRET diff --git a/llvm/test/CodeGen/RISCV/attributes.ll b/llvm/test/CodeGen/RISCV/attributes.ll index d566069..a28b818 100644 --- a/llvm/test/CodeGen/RISCV/attributes.ll +++ b/llvm/test/CodeGen/RISCV/attributes.ll @@ -435,7 +435,7 @@ ; RV32XCVMEM: .attribute 5, "rv32i2p1_xcvmem1p0" ; RV32XCVSIMD: .attribute 5, "rv32i2p1_xcvsimd1p0" ; RV32XCVBI: .attribute 5, "rv32i2p1_xcvbi1p0" -; RV32XSFVFWMACCQQQ: .attribute 5, "rv32i2p1_f2p2_zicsr2p0_zve32f1p0_zve32x1p0_zvfbfmin1p0_zvl32b1p0_xsfvfwmaccqqq1p0" +; RV32XSFVFWMACCQQQ: .attribute 5, "rv32i2p1_f2p2_zicsr2p0_zve32f1p0_zve32x1p0_zvfbfmin1p0_zvl128b1p0_zvl32b1p0_zvl64b1p0_xsfvfwmaccqqq1p0" ; RV32XTHEADCMO: .attribute 5, "rv32i2p1_xtheadcmo1p0" ; RV32XTHEADCONDMOV: .attribute 5, "rv32i2p1_xtheadcondmov1p0" ; RV32XTHEADFMEMIDX: .attribute 5, "rv32i2p1_xtheadfmemidx1p0" @@ -610,7 +610,7 @@ ; RV64SVVPTC: .attribute 5, "rv64i2p1_svvptc1p0" ; RV64SVINVAL: .attribute 5, "rv64i2p1_svinval1p0" ; RV64XVENTANACONDOPS: .attribute 5, "rv64i2p1_xventanacondops1p0" -; RV64XSFVFWMACCQQQ: .attribute 5, "rv64i2p1_f2p2_zicsr2p0_zve32f1p0_zve32x1p0_zvfbfmin1p0_zvl32b1p0_xsfvfwmaccqqq1p0" +; RV64XSFVFWMACCQQQ: .attribute 5, "rv64i2p1_f2p2_zicsr2p0_zve32f1p0_zve32x1p0_zvfbfmin1p0_zvl128b1p0_zvl32b1p0_zvl64b1p0_xsfvfwmaccqqq1p0" ; RV64XTHEADBA: .attribute 5, "rv64i2p1_xtheadba1p0" ; RV64XTHEADBB: .attribute 5, "rv64i2p1_xtheadbb1p0" ; RV64XTHEADBS: .attribute 5, "rv64i2p1_xtheadbs1p0" diff --git a/llvm/test/CodeGen/RISCV/calleetypeid-directcall-mismatched.ll b/llvm/test/CodeGen/RISCV/calleetypeid-directcall-mismatched.ll new file mode 100644 index 0000000..34493ce --- /dev/null +++ b/llvm/test/CodeGen/RISCV/calleetypeid-directcall-mismatched.ll @@ -0,0 +1,33 @@ +;; Tests that callee_type metadata attached to direct call sites are safely ignored. + +; RUN: llc --call-graph-section -mtriple riscv64 < %s -stop-after=finalize-isel -o - | FileCheck --match-full-lines %s +; RUN: llc --call-graph-section -mtriple riscv32 < %s -stop-after=finalize-isel -o - | FileCheck --match-full-lines %s + +;; Test that `calleeTypeIds` field is not present in `callSites` +; CHECK-LABEL: callSites: +; CHECK-NEXT: - { bb: {{[0-9]+}}, offset: {{[0-9]+}}, fwdArgRegs: [] } +; CHECK-NEXT: - { bb: {{[0-9]+}}, offset: {{[0-9]+}}, fwdArgRegs: [] } +; CHECK-NEXT: - { bb: {{[0-9]+}}, offset: {{[0-9]+}}, fwdArgRegs: [] } +define i32 @foo(i32 %x, i32 %y) !type !0 { +entry: + ;; Call instruction with accurate callee_type. + ;; callee_type should be dropped seemlessly. + %call = call i32 @fizz(i32 %x, i32 %y), !callee_type !1 + ;; Call instruction with mismatched callee_type. + ;; callee_type should be dropped seemlessly without errors. + %call1 = call i32 @fizz(i32 %x, i32 %y), !callee_type !3 + %add = add nsw i32 %call, %call1 + ;; Call instruction with mismatched callee_type. + ;; callee_type should be dropped seemlessly without errors. + %call2 = call i32 @fizz(i32 %add, i32 %y), !callee_type !3 + %sub = sub nsw i32 %add, %call2 + ret i32 %sub +} + +declare !type !2 i32 @fizz(i32, i32) + +!0 = !{i64 0, !"_ZTSFiiiiE.generalized"} +!1 = !{!2} +!2 = !{i64 0, !"_ZTSFiiiE.generalized"} +!3 = !{!4} +!4 = !{i64 0, !"_ZTSFicE.generalized"} diff --git a/llvm/test/CodeGen/RISCV/calling-conv-preserve-most.ll b/llvm/test/CodeGen/RISCV/calling-conv-preserve-most.ll new file mode 100644 index 0000000..08340bb --- /dev/null +++ b/llvm/test/CodeGen/RISCV/calling-conv-preserve-most.ll @@ -0,0 +1,449 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=riscv32 < %s | FileCheck %s -check-prefix=RV32I +; RUN: llc -mtriple=riscv64 < %s | FileCheck %s -check-prefix=RV64I +; RUN: llc -mtriple=riscv32 -mattr=+e -target-abi ilp32e < %s | FileCheck %s -check-prefix=RV32E +; RUN: llc -mtriple=riscv64 -mattr=+e -target-abi lp64e < %s | FileCheck %s -check-prefix=RV64E + +; Check the PreserveMost calling convention works. + +declare void @standard_cc_func() +declare preserve_mostcc void @preserve_mostcc_func() + +define preserve_mostcc void @preserve_mostcc1() nounwind { +; RV32I-LABEL: preserve_mostcc1: +; RV32I: # %bb.0: # %entry +; RV32I-NEXT: addi sp, sp, -64 +; RV32I-NEXT: sw ra, 60(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw t0, 56(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw a0, 52(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw a1, 48(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw a2, 44(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw a3, 40(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw a4, 36(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw a5, 32(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw a6, 28(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw a7, 24(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw t4, 20(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw t5, 16(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw t6, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: call standard_cc_func +; RV32I-NEXT: lw ra, 60(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw t0, 56(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw a0, 52(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw a1, 48(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw a2, 44(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw a3, 40(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw a4, 36(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw a5, 32(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw a6, 28(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw a7, 24(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw t4, 20(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw t5, 16(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw t6, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 64 +; RV32I-NEXT: ret +; +; RV64I-LABEL: preserve_mostcc1: +; RV64I: # %bb.0: # %entry +; RV64I-NEXT: addi sp, sp, -112 +; RV64I-NEXT: sd ra, 104(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd t0, 96(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd a0, 88(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd a1, 80(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd a2, 72(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd a3, 64(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd a4, 56(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd a5, 48(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd a6, 40(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd a7, 32(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd t4, 24(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd t5, 16(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd t6, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: call standard_cc_func +; RV64I-NEXT: ld ra, 104(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld t0, 96(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld a0, 88(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld a1, 80(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld a2, 72(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld a3, 64(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld a4, 56(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld a5, 48(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld a6, 40(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld a7, 32(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld t4, 24(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld t5, 16(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld t6, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 112 +; RV64I-NEXT: ret +; +; RV32E-LABEL: preserve_mostcc1: +; RV32E: # %bb.0: # %entry +; RV32E-NEXT: addi sp, sp, -32 +; RV32E-NEXT: sw ra, 28(sp) # 4-byte Folded Spill +; RV32E-NEXT: sw t0, 24(sp) # 4-byte Folded Spill +; RV32E-NEXT: sw a0, 20(sp) # 4-byte Folded Spill +; RV32E-NEXT: sw a1, 16(sp) # 4-byte Folded Spill +; RV32E-NEXT: sw a2, 12(sp) # 4-byte Folded Spill +; RV32E-NEXT: sw a3, 8(sp) # 4-byte Folded Spill +; RV32E-NEXT: sw a4, 4(sp) # 4-byte Folded Spill +; RV32E-NEXT: sw a5, 0(sp) # 4-byte Folded Spill +; RV32E-NEXT: call standard_cc_func +; RV32E-NEXT: lw ra, 28(sp) # 4-byte Folded Reload +; RV32E-NEXT: lw t0, 24(sp) # 4-byte Folded Reload +; RV32E-NEXT: lw a0, 20(sp) # 4-byte Folded Reload +; RV32E-NEXT: lw a1, 16(sp) # 4-byte Folded Reload +; RV32E-NEXT: lw a2, 12(sp) # 4-byte Folded Reload +; RV32E-NEXT: lw a3, 8(sp) # 4-byte Folded Reload +; RV32E-NEXT: lw a4, 4(sp) # 4-byte Folded Reload +; RV32E-NEXT: lw a5, 0(sp) # 4-byte Folded Reload +; RV32E-NEXT: addi sp, sp, 32 +; RV32E-NEXT: ret +; +; RV64E-LABEL: preserve_mostcc1: +; RV64E: # %bb.0: # %entry +; RV64E-NEXT: addi sp, sp, -64 +; RV64E-NEXT: sd ra, 56(sp) # 8-byte Folded Spill +; RV64E-NEXT: sd t0, 48(sp) # 8-byte Folded Spill +; RV64E-NEXT: sd a0, 40(sp) # 8-byte Folded Spill +; RV64E-NEXT: sd a1, 32(sp) # 8-byte Folded Spill +; RV64E-NEXT: sd a2, 24(sp) # 8-byte Folded Spill +; RV64E-NEXT: sd a3, 16(sp) # 8-byte Folded Spill +; RV64E-NEXT: sd a4, 8(sp) # 8-byte Folded Spill +; RV64E-NEXT: sd a5, 0(sp) # 8-byte Folded Spill +; RV64E-NEXT: call standard_cc_func +; RV64E-NEXT: ld ra, 56(sp) # 8-byte Folded Reload +; RV64E-NEXT: ld t0, 48(sp) # 8-byte Folded Reload +; RV64E-NEXT: ld a0, 40(sp) # 8-byte Folded Reload +; RV64E-NEXT: ld a1, 32(sp) # 8-byte Folded Reload +; RV64E-NEXT: ld a2, 24(sp) # 8-byte Folded Reload +; RV64E-NEXT: ld a3, 16(sp) # 8-byte Folded Reload +; RV64E-NEXT: ld a4, 8(sp) # 8-byte Folded Reload +; RV64E-NEXT: ld a5, 0(sp) # 8-byte Folded Reload +; RV64E-NEXT: addi sp, sp, 64 +; RV64E-NEXT: ret +entry: + call void @standard_cc_func() + ret void +} + +define preserve_mostcc void @preserve_mostcc2() nounwind { +; RV32I-LABEL: preserve_mostcc2: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: call preserve_mostcc_func +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV64I-LABEL: preserve_mostcc2: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: call preserve_mostcc_func +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret +; +; RV32E-LABEL: preserve_mostcc2: +; RV32E: # %bb.0: +; RV32E-NEXT: addi sp, sp, -4 +; RV32E-NEXT: sw ra, 0(sp) # 4-byte Folded Spill +; RV32E-NEXT: call preserve_mostcc_func +; RV32E-NEXT: lw ra, 0(sp) # 4-byte Folded Reload +; RV32E-NEXT: addi sp, sp, 4 +; RV32E-NEXT: ret +; +; RV64E-LABEL: preserve_mostcc2: +; RV64E: # %bb.0: +; RV64E-NEXT: addi sp, sp, -8 +; RV64E-NEXT: sd ra, 0(sp) # 8-byte Folded Spill +; RV64E-NEXT: call preserve_mostcc_func +; RV64E-NEXT: ld ra, 0(sp) # 8-byte Folded Reload +; RV64E-NEXT: addi sp, sp, 8 +; RV64E-NEXT: ret + call preserve_mostcc void @preserve_mostcc_func() + ret void +} + +; X6, X7 and X28 will be saved to registers. +define void @preserve_mostcc3() nounwind { +; RV32I-LABEL: preserve_mostcc3: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s0, 8(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s1, 4(sp) # 4-byte Folded Spill +; RV32I-NEXT: #APP +; RV32I-NEXT: #NO_APP +; RV32I-NEXT: mv a0, t1 +; RV32I-NEXT: #APP +; RV32I-NEXT: #NO_APP +; RV32I-NEXT: mv a1, t2 +; RV32I-NEXT: #APP +; RV32I-NEXT: #NO_APP +; RV32I-NEXT: #APP +; RV32I-NEXT: #NO_APP +; RV32I-NEXT: #APP +; RV32I-NEXT: #NO_APP +; RV32I-NEXT: mv a2, t3 +; RV32I-NEXT: call preserve_mostcc_func +; RV32I-NEXT: mv t1, a0 +; RV32I-NEXT: mv t2, a1 +; RV32I-NEXT: mv t3, a2 +; RV32I-NEXT: #APP +; RV32I-NEXT: #NO_APP +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s0, 8(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s1, 4(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV64I-LABEL: preserve_mostcc3: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -32 +; RV64I-NEXT: sd ra, 24(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s0, 16(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s1, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: #APP +; RV64I-NEXT: #NO_APP +; RV64I-NEXT: mv a0, t1 +; RV64I-NEXT: #APP +; RV64I-NEXT: #NO_APP +; RV64I-NEXT: mv a1, t2 +; RV64I-NEXT: #APP +; RV64I-NEXT: #NO_APP +; RV64I-NEXT: #APP +; RV64I-NEXT: #NO_APP +; RV64I-NEXT: #APP +; RV64I-NEXT: #NO_APP +; RV64I-NEXT: mv a2, t3 +; RV64I-NEXT: call preserve_mostcc_func +; RV64I-NEXT: mv t1, a0 +; RV64I-NEXT: mv t2, a1 +; RV64I-NEXT: mv t3, a2 +; RV64I-NEXT: #APP +; RV64I-NEXT: #NO_APP +; RV64I-NEXT: ld ra, 24(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s0, 16(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s1, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 32 +; RV64I-NEXT: ret +; +; RV32E-LABEL: preserve_mostcc3: +; RV32E: # %bb.0: +; RV32E-NEXT: addi sp, sp, -12 +; RV32E-NEXT: sw ra, 8(sp) # 4-byte Folded Spill +; RV32E-NEXT: sw s0, 4(sp) # 4-byte Folded Spill +; RV32E-NEXT: sw s1, 0(sp) # 4-byte Folded Spill +; RV32E-NEXT: #APP +; RV32E-NEXT: #NO_APP +; RV32E-NEXT: mv a0, t1 +; RV32E-NEXT: #APP +; RV32E-NEXT: #NO_APP +; RV32E-NEXT: mv a1, t2 +; RV32E-NEXT: #APP +; RV32E-NEXT: #NO_APP +; RV32E-NEXT: #APP +; RV32E-NEXT: #NO_APP +; RV32E-NEXT: #APP +; RV32E-NEXT: #NO_APP +; RV32E-NEXT: mv a2, t3 +; RV32E-NEXT: call preserve_mostcc_func +; RV32E-NEXT: mv t1, a0 +; RV32E-NEXT: mv t2, a1 +; RV32E-NEXT: mv t3, a2 +; RV32E-NEXT: #APP +; RV32E-NEXT: #NO_APP +; RV32E-NEXT: lw ra, 8(sp) # 4-byte Folded Reload +; RV32E-NEXT: lw s0, 4(sp) # 4-byte Folded Reload +; RV32E-NEXT: lw s1, 0(sp) # 4-byte Folded Reload +; RV32E-NEXT: addi sp, sp, 12 +; RV32E-NEXT: ret +; +; RV64E-LABEL: preserve_mostcc3: +; RV64E: # %bb.0: +; RV64E-NEXT: addi sp, sp, -24 +; RV64E-NEXT: sd ra, 16(sp) # 8-byte Folded Spill +; RV64E-NEXT: sd s0, 8(sp) # 8-byte Folded Spill +; RV64E-NEXT: sd s1, 0(sp) # 8-byte Folded Spill +; RV64E-NEXT: #APP +; RV64E-NEXT: #NO_APP +; RV64E-NEXT: mv a0, t1 +; RV64E-NEXT: #APP +; RV64E-NEXT: #NO_APP +; RV64E-NEXT: mv a1, t2 +; RV64E-NEXT: #APP +; RV64E-NEXT: #NO_APP +; RV64E-NEXT: #APP +; RV64E-NEXT: #NO_APP +; RV64E-NEXT: #APP +; RV64E-NEXT: #NO_APP +; RV64E-NEXT: mv a2, t3 +; RV64E-NEXT: call preserve_mostcc_func +; RV64E-NEXT: mv t1, a0 +; RV64E-NEXT: mv t2, a1 +; RV64E-NEXT: mv t3, a2 +; RV64E-NEXT: #APP +; RV64E-NEXT: #NO_APP +; RV64E-NEXT: ld ra, 16(sp) # 8-byte Folded Reload +; RV64E-NEXT: ld s0, 8(sp) # 8-byte Folded Reload +; RV64E-NEXT: ld s1, 0(sp) # 8-byte Folded Reload +; RV64E-NEXT: addi sp, sp, 24 +; RV64E-NEXT: ret + %1 = call i32 asm sideeffect "", "={x6}"() nounwind + %2 = call i32 asm sideeffect "", "={x7}"() nounwind + %3 = call i32 asm sideeffect "", "={x8}"() nounwind + %4 = call i32 asm sideeffect "", "={x9}"() nounwind + %5 = call i32 asm sideeffect "", "={x28}"() nounwind + call preserve_mostcc void @preserve_mostcc_func() + call void asm sideeffect "", "{x6},{x7},{x8},{x9},{x28}"(i32 %1, i32 %2, i32 %3, i32 %4, i32 %5) + ret void +} + +; X6, X7 and X28 will be saved to the stack. +define void @preserve_mostcc4() nounwind { +; RV32I-LABEL: preserve_mostcc4: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -32 +; RV32I-NEXT: sw ra, 28(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s0, 24(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s3, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s4, 8(sp) # 4-byte Folded Spill +; RV32I-NEXT: #APP +; RV32I-NEXT: #NO_APP +; RV32I-NEXT: mv s2, t1 +; RV32I-NEXT: #APP +; RV32I-NEXT: #NO_APP +; RV32I-NEXT: mv s3, t2 +; RV32I-NEXT: #APP +; RV32I-NEXT: #NO_APP +; RV32I-NEXT: #APP +; RV32I-NEXT: #NO_APP +; RV32I-NEXT: #APP +; RV32I-NEXT: #NO_APP +; RV32I-NEXT: mv s4, t3 +; RV32I-NEXT: call standard_cc_func +; RV32I-NEXT: mv t1, s2 +; RV32I-NEXT: mv t2, s3 +; RV32I-NEXT: mv t3, s4 +; RV32I-NEXT: #APP +; RV32I-NEXT: #NO_APP +; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s2, 16(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s3, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s4, 8(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 32 +; RV32I-NEXT: ret +; +; RV64I-LABEL: preserve_mostcc4: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -48 +; RV64I-NEXT: sd ra, 40(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s0, 32(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s1, 24(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s3, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s4, 0(sp) # 8-byte Folded Spill +; RV64I-NEXT: #APP +; RV64I-NEXT: #NO_APP +; RV64I-NEXT: mv s2, t1 +; RV64I-NEXT: #APP +; RV64I-NEXT: #NO_APP +; RV64I-NEXT: mv s3, t2 +; RV64I-NEXT: #APP +; RV64I-NEXT: #NO_APP +; RV64I-NEXT: #APP +; RV64I-NEXT: #NO_APP +; RV64I-NEXT: #APP +; RV64I-NEXT: #NO_APP +; RV64I-NEXT: mv s4, t3 +; RV64I-NEXT: call standard_cc_func +; RV64I-NEXT: mv t1, s2 +; RV64I-NEXT: mv t2, s3 +; RV64I-NEXT: mv t3, s4 +; RV64I-NEXT: #APP +; RV64I-NEXT: #NO_APP +; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s0, 32(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s1, 24(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s2, 16(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s3, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s4, 0(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 48 +; RV64I-NEXT: ret +; +; RV32E-LABEL: preserve_mostcc4: +; RV32E: # %bb.0: +; RV32E-NEXT: addi sp, sp, -24 +; RV32E-NEXT: sw ra, 20(sp) # 4-byte Folded Spill +; RV32E-NEXT: sw s0, 16(sp) # 4-byte Folded Spill +; RV32E-NEXT: sw s1, 12(sp) # 4-byte Folded Spill +; RV32E-NEXT: #APP +; RV32E-NEXT: #NO_APP +; RV32E-NEXT: sw t1, 8(sp) # 4-byte Folded Spill +; RV32E-NEXT: #APP +; RV32E-NEXT: #NO_APP +; RV32E-NEXT: sw t2, 4(sp) # 4-byte Folded Spill +; RV32E-NEXT: #APP +; RV32E-NEXT: #NO_APP +; RV32E-NEXT: #APP +; RV32E-NEXT: #NO_APP +; RV32E-NEXT: #APP +; RV32E-NEXT: #NO_APP +; RV32E-NEXT: sw t3, 0(sp) # 4-byte Folded Spill +; RV32E-NEXT: call standard_cc_func +; RV32E-NEXT: lw t1, 8(sp) # 4-byte Folded Reload +; RV32E-NEXT: lw t2, 4(sp) # 4-byte Folded Reload +; RV32E-NEXT: lw t3, 0(sp) # 4-byte Folded Reload +; RV32E-NEXT: #APP +; RV32E-NEXT: #NO_APP +; RV32E-NEXT: lw ra, 20(sp) # 4-byte Folded Reload +; RV32E-NEXT: lw s0, 16(sp) # 4-byte Folded Reload +; RV32E-NEXT: lw s1, 12(sp) # 4-byte Folded Reload +; RV32E-NEXT: addi sp, sp, 24 +; RV32E-NEXT: ret +; +; RV64E-LABEL: preserve_mostcc4: +; RV64E: # %bb.0: +; RV64E-NEXT: addi sp, sp, -48 +; RV64E-NEXT: sd ra, 40(sp) # 8-byte Folded Spill +; RV64E-NEXT: sd s0, 32(sp) # 8-byte Folded Spill +; RV64E-NEXT: sd s1, 24(sp) # 8-byte Folded Spill +; RV64E-NEXT: #APP +; RV64E-NEXT: #NO_APP +; RV64E-NEXT: sd t1, 16(sp) # 8-byte Folded Spill +; RV64E-NEXT: #APP +; RV64E-NEXT: #NO_APP +; RV64E-NEXT: sd t2, 8(sp) # 8-byte Folded Spill +; RV64E-NEXT: #APP +; RV64E-NEXT: #NO_APP +; RV64E-NEXT: #APP +; RV64E-NEXT: #NO_APP +; RV64E-NEXT: #APP +; RV64E-NEXT: #NO_APP +; RV64E-NEXT: sd t3, 0(sp) # 8-byte Folded Spill +; RV64E-NEXT: call standard_cc_func +; RV64E-NEXT: ld t1, 16(sp) # 8-byte Folded Reload +; RV64E-NEXT: ld t2, 8(sp) # 8-byte Folded Reload +; RV64E-NEXT: ld t3, 0(sp) # 8-byte Folded Reload +; RV64E-NEXT: #APP +; RV64E-NEXT: #NO_APP +; RV64E-NEXT: ld ra, 40(sp) # 8-byte Folded Reload +; RV64E-NEXT: ld s0, 32(sp) # 8-byte Folded Reload +; RV64E-NEXT: ld s1, 24(sp) # 8-byte Folded Reload +; RV64E-NEXT: addi sp, sp, 48 +; RV64E-NEXT: ret + %1 = call i32 asm sideeffect "", "={x6}"() nounwind + %2 = call i32 asm sideeffect "", "={x7}"() nounwind + %3 = call i32 asm sideeffect "", "={x8}"() nounwind + %4 = call i32 asm sideeffect "", "={x9}"() nounwind + %5 = call i32 asm sideeffect "", "={x28}"() nounwind + call void @standard_cc_func() + call void asm sideeffect "", "{x6},{x7},{x8},{x9},{x28}"(i32 %1, i32 %2, i32 %3, i32 %4, i32 %5) + ret void +} diff --git a/llvm/test/CodeGen/RISCV/callsite-emit-calleetypeid-tailcall.ll b/llvm/test/CodeGen/RISCV/callsite-emit-calleetypeid-tailcall.ll new file mode 100644 index 0000000..6e1fe92 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/callsite-emit-calleetypeid-tailcall.ll @@ -0,0 +1,20 @@ +;; Tests that call site callee type ids can be extracted and set from +;; callee_type metadata for indirect tail calls. + +;; Verify the exact calleeTypeIds value to ensure it is not garbage but the value +;; computed as the type id from the callee_type operand bundle. +; RUN: llc --call-graph-section -mtriple riscv64 < %s -stop-after=finalize-isel -o - | FileCheck --match-full-lines %s +; RUN: llc --call-graph-section -mtriple riscv32 < %s -stop-after=finalize-isel -o - | FileCheck --match-full-lines %s + +define i32 @check_tailcall(ptr %func, i8 %x) !type !0 { +entry: + ; CHECK: callSites: + ; CHECK-NEXT: - { bb: {{.*}}, offset: {{.*}}, fwdArgRegs: [], calleeTypeIds: + ; CHECK-NEXT: [ 3498816979441845844 ] } + %call = tail call i32 %func(i8 signext %x), !callee_type !1 + ret i32 %call +} + +!0 = !{i64 0, !"_ZTSFiPvcE.generalized"} +!1 = !{!2} +!2 = !{i64 0, !"_ZTSFicE.generalized"} diff --git a/llvm/test/CodeGen/RISCV/callsite-emit-calleetypeid.ll b/llvm/test/CodeGen/RISCV/callsite-emit-calleetypeid.ll new file mode 100644 index 0000000..1f91f41 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/callsite-emit-calleetypeid.ll @@ -0,0 +1,21 @@ +;; Tests that call site callee type ids can be extracted and set from +;; callee_type metadata. + +;; Verify the exact calleeTypeIds value to ensure it is not garbage but the value +;; computed as the type id from the callee_type operand bundle. +; RUN: llc --call-graph-section -mtriple riscv64 < %s -stop-after=finalize-isel -o - | FileCheck --match-full-lines %s +; RUN: llc --call-graph-section -mtriple riscv32 < %s -stop-after=finalize-isel -o - | FileCheck --match-full-lines %s + +; CHECK: name: main +; CHECK: callSites: +; CHECK-NEXT: - { bb: {{.*}}, offset: {{.*}}, fwdArgRegs: [], calleeTypeIds: +; CHECK-NEXT: [ 7854600665770582568 ] } +define i32 @main() { +entry: + %fn = load ptr, ptr null, align 8 + call void %fn(i8 0), !callee_type !0 + ret i32 0 +} + +!0 = !{!1} +!1 = !{i64 0, !"_ZTSFvcE.generalized"} diff --git a/llvm/test/CodeGen/RISCV/fpclamptosat.ll b/llvm/test/CodeGen/RISCV/fpclamptosat.ll index 246e6a6..117e3e4 100644 --- a/llvm/test/CodeGen/RISCV/fpclamptosat.ll +++ b/llvm/test/CodeGen/RISCV/fpclamptosat.ll @@ -3292,30 +3292,30 @@ define i64 @ustest_f64i64_mm(double %x) { ; RV32IF-NEXT: mv a1, a0 ; RV32IF-NEXT: addi a0, sp, 8 ; RV32IF-NEXT: call __fixdfti -; RV32IF-NEXT: lw a0, 8(sp) -; RV32IF-NEXT: lw a1, 12(sp) -; RV32IF-NEXT: lw a2, 20(sp) +; RV32IF-NEXT: lw a0, 20(sp) +; RV32IF-NEXT: lw a1, 8(sp) +; RV32IF-NEXT: lw a2, 12(sp) ; RV32IF-NEXT: lw a3, 16(sp) -; RV32IF-NEXT: beqz a2, .LBB47_2 +; RV32IF-NEXT: beqz a0, .LBB47_2 ; RV32IF-NEXT: # %bb.1: # %entry -; RV32IF-NEXT: slti a4, a2, 0 +; RV32IF-NEXT: slti a4, a0, 0 ; RV32IF-NEXT: j .LBB47_3 ; RV32IF-NEXT: .LBB47_2: ; RV32IF-NEXT: seqz a4, a3 ; RV32IF-NEXT: .LBB47_3: # %entry ; RV32IF-NEXT: xori a3, a3, 1 -; RV32IF-NEXT: or a3, a3, a2 +; RV32IF-NEXT: or a3, a3, a0 ; RV32IF-NEXT: seqz a3, a3 ; RV32IF-NEXT: addi a3, a3, -1 ; RV32IF-NEXT: and a3, a3, a4 ; RV32IF-NEXT: neg a3, a3 +; RV32IF-NEXT: and a2, a3, a2 ; RV32IF-NEXT: and a1, a3, a1 ; RV32IF-NEXT: and a0, a3, a0 -; RV32IF-NEXT: and a2, a3, a2 -; RV32IF-NEXT: slti a2, a2, 0 -; RV32IF-NEXT: addi a2, a2, -1 -; RV32IF-NEXT: and a0, a2, a0 -; RV32IF-NEXT: and a1, a2, a1 +; RV32IF-NEXT: slti a0, a0, 0 +; RV32IF-NEXT: addi a3, a0, -1 +; RV32IF-NEXT: and a0, a3, a1 +; RV32IF-NEXT: and a1, a3, a2 ; RV32IF-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; RV32IF-NEXT: .cfi_restore ra ; RV32IF-NEXT: addi sp, sp, 32 @@ -3354,30 +3354,30 @@ define i64 @ustest_f64i64_mm(double %x) { ; RV32IFD-NEXT: .cfi_offset ra, -4 ; RV32IFD-NEXT: addi a0, sp, 8 ; RV32IFD-NEXT: call __fixdfti -; RV32IFD-NEXT: lw a0, 8(sp) -; RV32IFD-NEXT: lw a1, 12(sp) -; RV32IFD-NEXT: lw a2, 20(sp) +; RV32IFD-NEXT: lw a0, 20(sp) +; RV32IFD-NEXT: lw a1, 8(sp) +; RV32IFD-NEXT: lw a2, 12(sp) ; RV32IFD-NEXT: lw a3, 16(sp) -; RV32IFD-NEXT: beqz a2, .LBB47_2 +; RV32IFD-NEXT: beqz a0, .LBB47_2 ; RV32IFD-NEXT: # %bb.1: # %entry -; RV32IFD-NEXT: slti a4, a2, 0 +; RV32IFD-NEXT: slti a4, a0, 0 ; RV32IFD-NEXT: j .LBB47_3 ; RV32IFD-NEXT: .LBB47_2: ; RV32IFD-NEXT: seqz a4, a3 ; RV32IFD-NEXT: .LBB47_3: # %entry ; RV32IFD-NEXT: xori a3, a3, 1 -; RV32IFD-NEXT: or a3, a3, a2 +; RV32IFD-NEXT: or a3, a3, a0 ; RV32IFD-NEXT: seqz a3, a3 ; RV32IFD-NEXT: addi a3, a3, -1 ; RV32IFD-NEXT: and a3, a3, a4 ; RV32IFD-NEXT: neg a3, a3 +; RV32IFD-NEXT: and a2, a3, a2 ; RV32IFD-NEXT: and a1, a3, a1 ; RV32IFD-NEXT: and a0, a3, a0 -; RV32IFD-NEXT: and a2, a3, a2 -; RV32IFD-NEXT: slti a2, a2, 0 -; RV32IFD-NEXT: addi a2, a2, -1 -; RV32IFD-NEXT: and a0, a2, a0 -; RV32IFD-NEXT: and a1, a2, a1 +; RV32IFD-NEXT: slti a0, a0, 0 +; RV32IFD-NEXT: addi a3, a0, -1 +; RV32IFD-NEXT: and a0, a3, a1 +; RV32IFD-NEXT: and a1, a3, a2 ; RV32IFD-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; RV32IFD-NEXT: .cfi_restore ra ; RV32IFD-NEXT: addi sp, sp, 32 @@ -3530,30 +3530,30 @@ define i64 @ustest_f32i64_mm(float %x) { ; RV32-NEXT: .cfi_offset ra, -4 ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: call __fixsfti -; RV32-NEXT: lw a0, 8(sp) -; RV32-NEXT: lw a1, 12(sp) -; RV32-NEXT: lw a2, 20(sp) +; RV32-NEXT: lw a0, 20(sp) +; RV32-NEXT: lw a1, 8(sp) +; RV32-NEXT: lw a2, 12(sp) ; RV32-NEXT: lw a3, 16(sp) -; RV32-NEXT: beqz a2, .LBB50_2 +; RV32-NEXT: beqz a0, .LBB50_2 ; RV32-NEXT: # %bb.1: # %entry -; RV32-NEXT: slti a4, a2, 0 +; RV32-NEXT: slti a4, a0, 0 ; RV32-NEXT: j .LBB50_3 ; RV32-NEXT: .LBB50_2: ; RV32-NEXT: seqz a4, a3 ; RV32-NEXT: .LBB50_3: # %entry ; RV32-NEXT: xori a3, a3, 1 -; RV32-NEXT: or a3, a3, a2 +; RV32-NEXT: or a3, a3, a0 ; RV32-NEXT: seqz a3, a3 ; RV32-NEXT: addi a3, a3, -1 ; RV32-NEXT: and a3, a3, a4 ; RV32-NEXT: neg a3, a3 +; RV32-NEXT: and a2, a3, a2 ; RV32-NEXT: and a1, a3, a1 ; RV32-NEXT: and a0, a3, a0 -; RV32-NEXT: and a2, a3, a2 -; RV32-NEXT: slti a2, a2, 0 -; RV32-NEXT: addi a2, a2, -1 -; RV32-NEXT: and a0, a2, a0 -; RV32-NEXT: and a1, a2, a1 +; RV32-NEXT: slti a0, a0, 0 +; RV32-NEXT: addi a3, a0, -1 +; RV32-NEXT: and a0, a3, a1 +; RV32-NEXT: and a1, a3, a2 ; RV32-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; RV32-NEXT: .cfi_restore ra ; RV32-NEXT: addi sp, sp, 32 @@ -3767,30 +3767,30 @@ define i64 @ustest_f16i64_mm(half %x) { ; RV32-NEXT: call __extendhfsf2 ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: call __fixsfti -; RV32-NEXT: lw a0, 8(sp) -; RV32-NEXT: lw a1, 12(sp) -; RV32-NEXT: lw a2, 20(sp) +; RV32-NEXT: lw a0, 20(sp) +; RV32-NEXT: lw a1, 8(sp) +; RV32-NEXT: lw a2, 12(sp) ; RV32-NEXT: lw a3, 16(sp) -; RV32-NEXT: beqz a2, .LBB53_2 +; RV32-NEXT: beqz a0, .LBB53_2 ; RV32-NEXT: # %bb.1: # %entry -; RV32-NEXT: slti a4, a2, 0 +; RV32-NEXT: slti a4, a0, 0 ; RV32-NEXT: j .LBB53_3 ; RV32-NEXT: .LBB53_2: ; RV32-NEXT: seqz a4, a3 ; RV32-NEXT: .LBB53_3: # %entry ; RV32-NEXT: xori a3, a3, 1 -; RV32-NEXT: or a3, a3, a2 +; RV32-NEXT: or a3, a3, a0 ; RV32-NEXT: seqz a3, a3 ; RV32-NEXT: addi a3, a3, -1 ; RV32-NEXT: and a3, a3, a4 ; RV32-NEXT: neg a3, a3 +; RV32-NEXT: and a2, a3, a2 ; RV32-NEXT: and a1, a3, a1 ; RV32-NEXT: and a0, a3, a0 -; RV32-NEXT: and a2, a3, a2 -; RV32-NEXT: slti a2, a2, 0 -; RV32-NEXT: addi a2, a2, -1 -; RV32-NEXT: and a0, a2, a0 -; RV32-NEXT: and a1, a2, a1 +; RV32-NEXT: slti a0, a0, 0 +; RV32-NEXT: addi a3, a0, -1 +; RV32-NEXT: and a0, a3, a1 +; RV32-NEXT: and a1, a3, a2 ; RV32-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; RV32-NEXT: .cfi_restore ra ; RV32-NEXT: addi sp, sp, 32 diff --git a/llvm/test/CodeGen/RISCV/interrupt-attr.ll b/llvm/test/CodeGen/RISCV/interrupt-attr.ll index e278b8d..472b903 100644 --- a/llvm/test/CodeGen/RISCV/interrupt-attr.ll +++ b/llvm/test/CodeGen/RISCV/interrupt-attr.ll @@ -794,498 +794,46 @@ define void @foo_with_call() #1 { ; CHECK-RV32-V-NEXT: slli a0, a0, 5 ; CHECK-RV32-V-NEXT: sub sp, sp, a0 ; CHECK-RV32-V-NEXT: csrr a0, vlenb -; CHECK-RV32-V-NEXT: slli a1, a0, 5 -; CHECK-RV32-V-NEXT: sub a0, a1, a0 -; CHECK-RV32-V-NEXT: add a0, sp, a0 -; CHECK-RV32-V-NEXT: addi a0, a0, 16 -; CHECK-RV32-V-NEXT: vs1r.v v0, (a0) # vscale x 8-byte Folded Spill -; CHECK-RV32-V-NEXT: csrr a0, vlenb -; CHECK-RV32-V-NEXT: slli a0, a0, 1 -; CHECK-RV32-V-NEXT: mv a1, a0 -; CHECK-RV32-V-NEXT: slli a0, a0, 1 -; CHECK-RV32-V-NEXT: add a1, a1, a0 -; CHECK-RV32-V-NEXT: slli a0, a0, 1 -; CHECK-RV32-V-NEXT: add a1, a1, a0 -; CHECK-RV32-V-NEXT: slli a0, a0, 1 -; CHECK-RV32-V-NEXT: add a0, a0, a1 -; CHECK-RV32-V-NEXT: add a0, sp, a0 -; CHECK-RV32-V-NEXT: addi a0, a0, 16 -; CHECK-RV32-V-NEXT: vs1r.v v1, (a0) # vscale x 8-byte Folded Spill -; CHECK-RV32-V-NEXT: csrr a0, vlenb -; CHECK-RV32-V-NEXT: mv a1, a0 -; CHECK-RV32-V-NEXT: slli a0, a0, 2 -; CHECK-RV32-V-NEXT: add a1, a1, a0 -; CHECK-RV32-V-NEXT: slli a0, a0, 1 -; CHECK-RV32-V-NEXT: add a1, a1, a0 -; CHECK-RV32-V-NEXT: slli a0, a0, 1 -; CHECK-RV32-V-NEXT: add a0, a0, a1 -; CHECK-RV32-V-NEXT: add a0, sp, a0 -; CHECK-RV32-V-NEXT: addi a0, a0, 16 -; CHECK-RV32-V-NEXT: vs1r.v v2, (a0) # vscale x 8-byte Folded Spill -; CHECK-RV32-V-NEXT: csrr a0, vlenb -; CHECK-RV32-V-NEXT: slli a0, a0, 2 -; CHECK-RV32-V-NEXT: mv a1, a0 -; CHECK-RV32-V-NEXT: slli a0, a0, 1 -; CHECK-RV32-V-NEXT: add a1, a1, a0 -; CHECK-RV32-V-NEXT: slli a0, a0, 1 -; CHECK-RV32-V-NEXT: add a0, a0, a1 -; CHECK-RV32-V-NEXT: add a0, sp, a0 -; CHECK-RV32-V-NEXT: addi a0, a0, 16 -; CHECK-RV32-V-NEXT: vs1r.v v3, (a0) # vscale x 8-byte Folded Spill -; CHECK-RV32-V-NEXT: csrr a0, vlenb -; CHECK-RV32-V-NEXT: mv a1, a0 -; CHECK-RV32-V-NEXT: slli a0, a0, 1 -; CHECK-RV32-V-NEXT: add a1, a1, a0 -; CHECK-RV32-V-NEXT: slli a0, a0, 2 -; CHECK-RV32-V-NEXT: add a1, a1, a0 -; CHECK-RV32-V-NEXT: slli a0, a0, 1 -; CHECK-RV32-V-NEXT: add a0, a0, a1 -; CHECK-RV32-V-NEXT: add a0, sp, a0 -; CHECK-RV32-V-NEXT: addi a0, a0, 16 -; CHECK-RV32-V-NEXT: vs1r.v v4, (a0) # vscale x 8-byte Folded Spill -; CHECK-RV32-V-NEXT: csrr a0, vlenb -; CHECK-RV32-V-NEXT: slli a0, a0, 1 -; CHECK-RV32-V-NEXT: mv a1, a0 -; CHECK-RV32-V-NEXT: slli a0, a0, 2 -; CHECK-RV32-V-NEXT: add a1, a1, a0 -; CHECK-RV32-V-NEXT: slli a0, a0, 1 -; CHECK-RV32-V-NEXT: add a0, a0, a1 -; CHECK-RV32-V-NEXT: add a0, sp, a0 -; CHECK-RV32-V-NEXT: addi a0, a0, 16 -; CHECK-RV32-V-NEXT: vs1r.v v5, (a0) # vscale x 8-byte Folded Spill -; CHECK-RV32-V-NEXT: csrr a0, vlenb -; CHECK-RV32-V-NEXT: mv a1, a0 -; CHECK-RV32-V-NEXT: slli a0, a0, 3 -; CHECK-RV32-V-NEXT: add a1, a1, a0 -; CHECK-RV32-V-NEXT: slli a0, a0, 1 -; CHECK-RV32-V-NEXT: add a0, a0, a1 -; CHECK-RV32-V-NEXT: add a0, sp, a0 -; CHECK-RV32-V-NEXT: addi a0, a0, 16 -; CHECK-RV32-V-NEXT: vs1r.v v6, (a0) # vscale x 8-byte Folded Spill -; CHECK-RV32-V-NEXT: csrr a0, vlenb ; CHECK-RV32-V-NEXT: slli a0, a0, 3 ; CHECK-RV32-V-NEXT: mv a1, a0 ; CHECK-RV32-V-NEXT: slli a0, a0, 1 ; CHECK-RV32-V-NEXT: add a0, a0, a1 ; CHECK-RV32-V-NEXT: add a0, sp, a0 ; CHECK-RV32-V-NEXT: addi a0, a0, 16 -; CHECK-RV32-V-NEXT: vs1r.v v7, (a0) # vscale x 8-byte Folded Spill -; CHECK-RV32-V-NEXT: csrr a0, vlenb -; CHECK-RV32-V-NEXT: mv a1, a0 -; CHECK-RV32-V-NEXT: slli a0, a0, 1 -; CHECK-RV32-V-NEXT: add a1, a1, a0 -; CHECK-RV32-V-NEXT: slli a0, a0, 1 -; CHECK-RV32-V-NEXT: add a1, a1, a0 -; CHECK-RV32-V-NEXT: slli a0, a0, 2 -; CHECK-RV32-V-NEXT: add a0, a0, a1 -; CHECK-RV32-V-NEXT: add a0, sp, a0 -; CHECK-RV32-V-NEXT: addi a0, a0, 16 -; CHECK-RV32-V-NEXT: vs1r.v v8, (a0) # vscale x 8-byte Folded Spill -; CHECK-RV32-V-NEXT: csrr a0, vlenb -; CHECK-RV32-V-NEXT: slli a0, a0, 1 -; CHECK-RV32-V-NEXT: mv a1, a0 -; CHECK-RV32-V-NEXT: slli a0, a0, 1 -; CHECK-RV32-V-NEXT: add a1, a1, a0 -; CHECK-RV32-V-NEXT: slli a0, a0, 2 -; CHECK-RV32-V-NEXT: add a0, a0, a1 -; CHECK-RV32-V-NEXT: add a0, sp, a0 -; CHECK-RV32-V-NEXT: addi a0, a0, 16 -; CHECK-RV32-V-NEXT: vs1r.v v9, (a0) # vscale x 8-byte Folded Spill -; CHECK-RV32-V-NEXT: csrr a0, vlenb -; CHECK-RV32-V-NEXT: mv a1, a0 -; CHECK-RV32-V-NEXT: slli a0, a0, 2 -; CHECK-RV32-V-NEXT: add a1, a1, a0 -; CHECK-RV32-V-NEXT: slli a0, a0, 2 -; CHECK-RV32-V-NEXT: add a0, a0, a1 -; CHECK-RV32-V-NEXT: add a0, sp, a0 -; CHECK-RV32-V-NEXT: addi a0, a0, 16 -; CHECK-RV32-V-NEXT: vs1r.v v10, (a0) # vscale x 8-byte Folded Spill -; CHECK-RV32-V-NEXT: csrr a0, vlenb -; CHECK-RV32-V-NEXT: slli a0, a0, 2 -; CHECK-RV32-V-NEXT: mv a1, a0 -; CHECK-RV32-V-NEXT: slli a0, a0, 2 -; CHECK-RV32-V-NEXT: add a0, a0, a1 -; CHECK-RV32-V-NEXT: add a0, sp, a0 -; CHECK-RV32-V-NEXT: addi a0, a0, 16 -; CHECK-RV32-V-NEXT: vs1r.v v11, (a0) # vscale x 8-byte Folded Spill -; CHECK-RV32-V-NEXT: csrr a0, vlenb -; CHECK-RV32-V-NEXT: mv a1, a0 -; CHECK-RV32-V-NEXT: slli a0, a0, 1 -; CHECK-RV32-V-NEXT: add a1, a1, a0 -; CHECK-RV32-V-NEXT: slli a0, a0, 3 -; CHECK-RV32-V-NEXT: add a0, a0, a1 -; CHECK-RV32-V-NEXT: add a0, sp, a0 -; CHECK-RV32-V-NEXT: addi a0, a0, 16 -; CHECK-RV32-V-NEXT: vs1r.v v12, (a0) # vscale x 8-byte Folded Spill -; CHECK-RV32-V-NEXT: csrr a0, vlenb -; CHECK-RV32-V-NEXT: slli a0, a0, 1 -; CHECK-RV32-V-NEXT: mv a1, a0 -; CHECK-RV32-V-NEXT: slli a0, a0, 3 -; CHECK-RV32-V-NEXT: add a0, a0, a1 -; CHECK-RV32-V-NEXT: add a0, sp, a0 -; CHECK-RV32-V-NEXT: addi a0, a0, 16 -; CHECK-RV32-V-NEXT: vs1r.v v13, (a0) # vscale x 8-byte Folded Spill -; CHECK-RV32-V-NEXT: csrr a0, vlenb -; CHECK-RV32-V-NEXT: slli a1, a0, 4 -; CHECK-RV32-V-NEXT: add a0, a1, a0 -; CHECK-RV32-V-NEXT: add a0, sp, a0 -; CHECK-RV32-V-NEXT: addi a0, a0, 16 -; CHECK-RV32-V-NEXT: vs1r.v v14, (a0) # vscale x 8-byte Folded Spill +; CHECK-RV32-V-NEXT: vs8r.v v0, (a0) # vscale x 64-byte Folded Spill ; CHECK-RV32-V-NEXT: csrr a0, vlenb ; CHECK-RV32-V-NEXT: slli a0, a0, 4 ; CHECK-RV32-V-NEXT: add a0, sp, a0 ; CHECK-RV32-V-NEXT: addi a0, a0, 16 -; CHECK-RV32-V-NEXT: vs1r.v v15, (a0) # vscale x 8-byte Folded Spill -; CHECK-RV32-V-NEXT: csrr a0, vlenb -; CHECK-RV32-V-NEXT: slli a1, a0, 4 -; CHECK-RV32-V-NEXT: sub a0, a1, a0 -; CHECK-RV32-V-NEXT: add a0, sp, a0 -; CHECK-RV32-V-NEXT: addi a0, a0, 16 -; CHECK-RV32-V-NEXT: vs1r.v v16, (a0) # vscale x 8-byte Folded Spill -; CHECK-RV32-V-NEXT: csrr a0, vlenb -; CHECK-RV32-V-NEXT: slli a0, a0, 1 -; CHECK-RV32-V-NEXT: mv a1, a0 -; CHECK-RV32-V-NEXT: slli a0, a0, 1 -; CHECK-RV32-V-NEXT: add a1, a1, a0 -; CHECK-RV32-V-NEXT: slli a0, a0, 1 -; CHECK-RV32-V-NEXT: add a0, a0, a1 -; CHECK-RV32-V-NEXT: add a0, sp, a0 -; CHECK-RV32-V-NEXT: addi a0, a0, 16 -; CHECK-RV32-V-NEXT: vs1r.v v17, (a0) # vscale x 8-byte Folded Spill -; CHECK-RV32-V-NEXT: csrr a0, vlenb -; CHECK-RV32-V-NEXT: mv a1, a0 -; CHECK-RV32-V-NEXT: slli a0, a0, 2 -; CHECK-RV32-V-NEXT: add a1, a1, a0 -; CHECK-RV32-V-NEXT: slli a0, a0, 1 -; CHECK-RV32-V-NEXT: add a0, a0, a1 -; CHECK-RV32-V-NEXT: add a0, sp, a0 -; CHECK-RV32-V-NEXT: addi a0, a0, 16 -; CHECK-RV32-V-NEXT: vs1r.v v18, (a0) # vscale x 8-byte Folded Spill -; CHECK-RV32-V-NEXT: csrr a0, vlenb -; CHECK-RV32-V-NEXT: slli a0, a0, 2 -; CHECK-RV32-V-NEXT: mv a1, a0 -; CHECK-RV32-V-NEXT: slli a0, a0, 1 -; CHECK-RV32-V-NEXT: add a0, a0, a1 -; CHECK-RV32-V-NEXT: add a0, sp, a0 -; CHECK-RV32-V-NEXT: addi a0, a0, 16 -; CHECK-RV32-V-NEXT: vs1r.v v19, (a0) # vscale x 8-byte Folded Spill -; CHECK-RV32-V-NEXT: csrr a0, vlenb -; CHECK-RV32-V-NEXT: mv a1, a0 -; CHECK-RV32-V-NEXT: slli a0, a0, 1 -; CHECK-RV32-V-NEXT: add a1, a1, a0 -; CHECK-RV32-V-NEXT: slli a0, a0, 2 -; CHECK-RV32-V-NEXT: add a0, a0, a1 -; CHECK-RV32-V-NEXT: add a0, sp, a0 -; CHECK-RV32-V-NEXT: addi a0, a0, 16 -; CHECK-RV32-V-NEXT: vs1r.v v20, (a0) # vscale x 8-byte Folded Spill -; CHECK-RV32-V-NEXT: csrr a0, vlenb -; CHECK-RV32-V-NEXT: slli a0, a0, 1 -; CHECK-RV32-V-NEXT: mv a1, a0 -; CHECK-RV32-V-NEXT: slli a0, a0, 2 -; CHECK-RV32-V-NEXT: add a0, a0, a1 -; CHECK-RV32-V-NEXT: add a0, sp, a0 -; CHECK-RV32-V-NEXT: addi a0, a0, 16 -; CHECK-RV32-V-NEXT: vs1r.v v21, (a0) # vscale x 8-byte Folded Spill -; CHECK-RV32-V-NEXT: csrr a0, vlenb -; CHECK-RV32-V-NEXT: slli a1, a0, 3 -; CHECK-RV32-V-NEXT: add a0, a1, a0 -; CHECK-RV32-V-NEXT: add a0, sp, a0 -; CHECK-RV32-V-NEXT: addi a0, a0, 16 -; CHECK-RV32-V-NEXT: vs1r.v v22, (a0) # vscale x 8-byte Folded Spill +; CHECK-RV32-V-NEXT: vs8r.v v8, (a0) # vscale x 64-byte Folded Spill ; CHECK-RV32-V-NEXT: csrr a0, vlenb ; CHECK-RV32-V-NEXT: slli a0, a0, 3 ; CHECK-RV32-V-NEXT: add a0, sp, a0 ; CHECK-RV32-V-NEXT: addi a0, a0, 16 -; CHECK-RV32-V-NEXT: vs1r.v v23, (a0) # vscale x 8-byte Folded Spill -; CHECK-RV32-V-NEXT: csrr a0, vlenb -; CHECK-RV32-V-NEXT: slli a1, a0, 3 -; CHECK-RV32-V-NEXT: sub a0, a1, a0 -; CHECK-RV32-V-NEXT: add a0, sp, a0 -; CHECK-RV32-V-NEXT: addi a0, a0, 16 -; CHECK-RV32-V-NEXT: vs1r.v v24, (a0) # vscale x 8-byte Folded Spill -; CHECK-RV32-V-NEXT: csrr a0, vlenb -; CHECK-RV32-V-NEXT: slli a0, a0, 1 -; CHECK-RV32-V-NEXT: mv a1, a0 -; CHECK-RV32-V-NEXT: slli a0, a0, 1 -; CHECK-RV32-V-NEXT: add a0, a0, a1 -; CHECK-RV32-V-NEXT: add a0, sp, a0 -; CHECK-RV32-V-NEXT: addi a0, a0, 16 -; CHECK-RV32-V-NEXT: vs1r.v v25, (a0) # vscale x 8-byte Folded Spill -; CHECK-RV32-V-NEXT: csrr a0, vlenb -; CHECK-RV32-V-NEXT: slli a1, a0, 2 -; CHECK-RV32-V-NEXT: add a0, a1, a0 -; CHECK-RV32-V-NEXT: add a0, sp, a0 -; CHECK-RV32-V-NEXT: addi a0, a0, 16 -; CHECK-RV32-V-NEXT: vs1r.v v26, (a0) # vscale x 8-byte Folded Spill -; CHECK-RV32-V-NEXT: csrr a0, vlenb -; CHECK-RV32-V-NEXT: slli a0, a0, 2 -; CHECK-RV32-V-NEXT: add a0, sp, a0 -; CHECK-RV32-V-NEXT: addi a0, a0, 16 -; CHECK-RV32-V-NEXT: vs1r.v v27, (a0) # vscale x 8-byte Folded Spill -; CHECK-RV32-V-NEXT: csrr a0, vlenb -; CHECK-RV32-V-NEXT: slli a1, a0, 1 -; CHECK-RV32-V-NEXT: add a0, a1, a0 -; CHECK-RV32-V-NEXT: add a0, sp, a0 -; CHECK-RV32-V-NEXT: addi a0, a0, 16 -; CHECK-RV32-V-NEXT: vs1r.v v28, (a0) # vscale x 8-byte Folded Spill -; CHECK-RV32-V-NEXT: csrr a0, vlenb -; CHECK-RV32-V-NEXT: slli a0, a0, 1 -; CHECK-RV32-V-NEXT: add a0, sp, a0 -; CHECK-RV32-V-NEXT: addi a0, a0, 16 -; CHECK-RV32-V-NEXT: vs1r.v v29, (a0) # vscale x 8-byte Folded Spill -; CHECK-RV32-V-NEXT: csrr a0, vlenb -; CHECK-RV32-V-NEXT: add a0, sp, a0 -; CHECK-RV32-V-NEXT: addi a0, a0, 16 -; CHECK-RV32-V-NEXT: vs1r.v v30, (a0) # vscale x 8-byte Folded Spill +; CHECK-RV32-V-NEXT: vs8r.v v16, (a0) # vscale x 64-byte Folded Spill ; CHECK-RV32-V-NEXT: addi a0, sp, 16 -; CHECK-RV32-V-NEXT: vs1r.v v31, (a0) # vscale x 8-byte Folded Spill +; CHECK-RV32-V-NEXT: vs8r.v v24, (a0) # vscale x 64-byte Folded Spill ; CHECK-RV32-V-NEXT: call otherfoo ; CHECK-RV32-V-NEXT: csrr a0, vlenb -; CHECK-RV32-V-NEXT: slli a1, a0, 5 -; CHECK-RV32-V-NEXT: sub a0, a1, a0 -; CHECK-RV32-V-NEXT: add a0, sp, a0 -; CHECK-RV32-V-NEXT: addi a0, a0, 16 -; CHECK-RV32-V-NEXT: vl1r.v v0, (a0) # vscale x 8-byte Folded Reload -; CHECK-RV32-V-NEXT: csrr a0, vlenb -; CHECK-RV32-V-NEXT: slli a0, a0, 1 -; CHECK-RV32-V-NEXT: mv a1, a0 -; CHECK-RV32-V-NEXT: slli a0, a0, 1 -; CHECK-RV32-V-NEXT: add a1, a1, a0 -; CHECK-RV32-V-NEXT: slli a0, a0, 1 -; CHECK-RV32-V-NEXT: add a1, a1, a0 -; CHECK-RV32-V-NEXT: slli a0, a0, 1 -; CHECK-RV32-V-NEXT: add a0, a0, a1 -; CHECK-RV32-V-NEXT: add a0, sp, a0 -; CHECK-RV32-V-NEXT: addi a0, a0, 16 -; CHECK-RV32-V-NEXT: vl1r.v v1, (a0) # vscale x 8-byte Folded Reload -; CHECK-RV32-V-NEXT: csrr a0, vlenb -; CHECK-RV32-V-NEXT: mv a1, a0 -; CHECK-RV32-V-NEXT: slli a0, a0, 2 -; CHECK-RV32-V-NEXT: add a1, a1, a0 -; CHECK-RV32-V-NEXT: slli a0, a0, 1 -; CHECK-RV32-V-NEXT: add a1, a1, a0 -; CHECK-RV32-V-NEXT: slli a0, a0, 1 -; CHECK-RV32-V-NEXT: add a0, a0, a1 -; CHECK-RV32-V-NEXT: add a0, sp, a0 -; CHECK-RV32-V-NEXT: addi a0, a0, 16 -; CHECK-RV32-V-NEXT: vl1r.v v2, (a0) # vscale x 8-byte Folded Reload -; CHECK-RV32-V-NEXT: csrr a0, vlenb -; CHECK-RV32-V-NEXT: slli a0, a0, 2 -; CHECK-RV32-V-NEXT: mv a1, a0 -; CHECK-RV32-V-NEXT: slli a0, a0, 1 -; CHECK-RV32-V-NEXT: add a1, a1, a0 -; CHECK-RV32-V-NEXT: slli a0, a0, 1 -; CHECK-RV32-V-NEXT: add a0, a0, a1 -; CHECK-RV32-V-NEXT: add a0, sp, a0 -; CHECK-RV32-V-NEXT: addi a0, a0, 16 -; CHECK-RV32-V-NEXT: vl1r.v v3, (a0) # vscale x 8-byte Folded Reload -; CHECK-RV32-V-NEXT: csrr a0, vlenb -; CHECK-RV32-V-NEXT: mv a1, a0 -; CHECK-RV32-V-NEXT: slli a0, a0, 1 -; CHECK-RV32-V-NEXT: add a1, a1, a0 -; CHECK-RV32-V-NEXT: slli a0, a0, 2 -; CHECK-RV32-V-NEXT: add a1, a1, a0 -; CHECK-RV32-V-NEXT: slli a0, a0, 1 -; CHECK-RV32-V-NEXT: add a0, a0, a1 -; CHECK-RV32-V-NEXT: add a0, sp, a0 -; CHECK-RV32-V-NEXT: addi a0, a0, 16 -; CHECK-RV32-V-NEXT: vl1r.v v4, (a0) # vscale x 8-byte Folded Reload -; CHECK-RV32-V-NEXT: csrr a0, vlenb -; CHECK-RV32-V-NEXT: slli a0, a0, 1 -; CHECK-RV32-V-NEXT: mv a1, a0 -; CHECK-RV32-V-NEXT: slli a0, a0, 2 -; CHECK-RV32-V-NEXT: add a1, a1, a0 -; CHECK-RV32-V-NEXT: slli a0, a0, 1 -; CHECK-RV32-V-NEXT: add a0, a0, a1 -; CHECK-RV32-V-NEXT: add a0, sp, a0 -; CHECK-RV32-V-NEXT: addi a0, a0, 16 -; CHECK-RV32-V-NEXT: vl1r.v v5, (a0) # vscale x 8-byte Folded Reload -; CHECK-RV32-V-NEXT: csrr a0, vlenb -; CHECK-RV32-V-NEXT: mv a1, a0 ; CHECK-RV32-V-NEXT: slli a0, a0, 3 -; CHECK-RV32-V-NEXT: add a1, a1, a0 -; CHECK-RV32-V-NEXT: slli a0, a0, 1 -; CHECK-RV32-V-NEXT: add a0, a0, a1 -; CHECK-RV32-V-NEXT: add a0, sp, a0 -; CHECK-RV32-V-NEXT: addi a0, a0, 16 -; CHECK-RV32-V-NEXT: vl1r.v v6, (a0) # vscale x 8-byte Folded Reload -; CHECK-RV32-V-NEXT: csrr a0, vlenb -; CHECK-RV32-V-NEXT: slli a0, a0, 3 -; CHECK-RV32-V-NEXT: mv a1, a0 -; CHECK-RV32-V-NEXT: slli a0, a0, 1 -; CHECK-RV32-V-NEXT: add a0, a0, a1 -; CHECK-RV32-V-NEXT: add a0, sp, a0 -; CHECK-RV32-V-NEXT: addi a0, a0, 16 -; CHECK-RV32-V-NEXT: vl1r.v v7, (a0) # vscale x 8-byte Folded Reload -; CHECK-RV32-V-NEXT: csrr a0, vlenb -; CHECK-RV32-V-NEXT: mv a1, a0 -; CHECK-RV32-V-NEXT: slli a0, a0, 1 -; CHECK-RV32-V-NEXT: add a1, a1, a0 -; CHECK-RV32-V-NEXT: slli a0, a0, 1 -; CHECK-RV32-V-NEXT: add a1, a1, a0 -; CHECK-RV32-V-NEXT: slli a0, a0, 2 -; CHECK-RV32-V-NEXT: add a0, a0, a1 -; CHECK-RV32-V-NEXT: add a0, sp, a0 -; CHECK-RV32-V-NEXT: addi a0, a0, 16 -; CHECK-RV32-V-NEXT: vl1r.v v8, (a0) # vscale x 8-byte Folded Reload -; CHECK-RV32-V-NEXT: csrr a0, vlenb -; CHECK-RV32-V-NEXT: slli a0, a0, 1 -; CHECK-RV32-V-NEXT: mv a1, a0 -; CHECK-RV32-V-NEXT: slli a0, a0, 1 -; CHECK-RV32-V-NEXT: add a1, a1, a0 -; CHECK-RV32-V-NEXT: slli a0, a0, 2 -; CHECK-RV32-V-NEXT: add a0, a0, a1 -; CHECK-RV32-V-NEXT: add a0, sp, a0 -; CHECK-RV32-V-NEXT: addi a0, a0, 16 -; CHECK-RV32-V-NEXT: vl1r.v v9, (a0) # vscale x 8-byte Folded Reload -; CHECK-RV32-V-NEXT: csrr a0, vlenb -; CHECK-RV32-V-NEXT: mv a1, a0 -; CHECK-RV32-V-NEXT: slli a0, a0, 2 -; CHECK-RV32-V-NEXT: add a1, a1, a0 -; CHECK-RV32-V-NEXT: slli a0, a0, 2 -; CHECK-RV32-V-NEXT: add a0, a0, a1 -; CHECK-RV32-V-NEXT: add a0, sp, a0 -; CHECK-RV32-V-NEXT: addi a0, a0, 16 -; CHECK-RV32-V-NEXT: vl1r.v v10, (a0) # vscale x 8-byte Folded Reload -; CHECK-RV32-V-NEXT: csrr a0, vlenb -; CHECK-RV32-V-NEXT: slli a0, a0, 2 -; CHECK-RV32-V-NEXT: mv a1, a0 -; CHECK-RV32-V-NEXT: slli a0, a0, 2 -; CHECK-RV32-V-NEXT: add a0, a0, a1 -; CHECK-RV32-V-NEXT: add a0, sp, a0 -; CHECK-RV32-V-NEXT: addi a0, a0, 16 -; CHECK-RV32-V-NEXT: vl1r.v v11, (a0) # vscale x 8-byte Folded Reload -; CHECK-RV32-V-NEXT: csrr a0, vlenb ; CHECK-RV32-V-NEXT: mv a1, a0 ; CHECK-RV32-V-NEXT: slli a0, a0, 1 -; CHECK-RV32-V-NEXT: add a1, a1, a0 -; CHECK-RV32-V-NEXT: slli a0, a0, 3 ; CHECK-RV32-V-NEXT: add a0, a0, a1 ; CHECK-RV32-V-NEXT: add a0, sp, a0 ; CHECK-RV32-V-NEXT: addi a0, a0, 16 -; CHECK-RV32-V-NEXT: vl1r.v v12, (a0) # vscale x 8-byte Folded Reload -; CHECK-RV32-V-NEXT: csrr a0, vlenb -; CHECK-RV32-V-NEXT: slli a0, a0, 1 -; CHECK-RV32-V-NEXT: mv a1, a0 -; CHECK-RV32-V-NEXT: slli a0, a0, 3 -; CHECK-RV32-V-NEXT: add a0, a0, a1 -; CHECK-RV32-V-NEXT: add a0, sp, a0 -; CHECK-RV32-V-NEXT: addi a0, a0, 16 -; CHECK-RV32-V-NEXT: vl1r.v v13, (a0) # vscale x 8-byte Folded Reload -; CHECK-RV32-V-NEXT: csrr a0, vlenb -; CHECK-RV32-V-NEXT: slli a1, a0, 4 -; CHECK-RV32-V-NEXT: add a0, a1, a0 -; CHECK-RV32-V-NEXT: add a0, sp, a0 -; CHECK-RV32-V-NEXT: addi a0, a0, 16 -; CHECK-RV32-V-NEXT: vl1r.v v14, (a0) # vscale x 8-byte Folded Reload +; CHECK-RV32-V-NEXT: vl8r.v v0, (a0) # vscale x 64-byte Folded Reload ; CHECK-RV32-V-NEXT: csrr a0, vlenb ; CHECK-RV32-V-NEXT: slli a0, a0, 4 ; CHECK-RV32-V-NEXT: add a0, sp, a0 ; CHECK-RV32-V-NEXT: addi a0, a0, 16 -; CHECK-RV32-V-NEXT: vl1r.v v15, (a0) # vscale x 8-byte Folded Reload -; CHECK-RV32-V-NEXT: csrr a0, vlenb -; CHECK-RV32-V-NEXT: slli a1, a0, 4 -; CHECK-RV32-V-NEXT: sub a0, a1, a0 -; CHECK-RV32-V-NEXT: add a0, sp, a0 -; CHECK-RV32-V-NEXT: addi a0, a0, 16 -; CHECK-RV32-V-NEXT: vl1r.v v16, (a0) # vscale x 8-byte Folded Reload -; CHECK-RV32-V-NEXT: csrr a0, vlenb -; CHECK-RV32-V-NEXT: slli a0, a0, 1 -; CHECK-RV32-V-NEXT: mv a1, a0 -; CHECK-RV32-V-NEXT: slli a0, a0, 1 -; CHECK-RV32-V-NEXT: add a1, a1, a0 -; CHECK-RV32-V-NEXT: slli a0, a0, 1 -; CHECK-RV32-V-NEXT: add a0, a0, a1 -; CHECK-RV32-V-NEXT: add a0, sp, a0 -; CHECK-RV32-V-NEXT: addi a0, a0, 16 -; CHECK-RV32-V-NEXT: vl1r.v v17, (a0) # vscale x 8-byte Folded Reload -; CHECK-RV32-V-NEXT: csrr a0, vlenb -; CHECK-RV32-V-NEXT: mv a1, a0 -; CHECK-RV32-V-NEXT: slli a0, a0, 2 -; CHECK-RV32-V-NEXT: add a1, a1, a0 -; CHECK-RV32-V-NEXT: slli a0, a0, 1 -; CHECK-RV32-V-NEXT: add a0, a0, a1 -; CHECK-RV32-V-NEXT: add a0, sp, a0 -; CHECK-RV32-V-NEXT: addi a0, a0, 16 -; CHECK-RV32-V-NEXT: vl1r.v v18, (a0) # vscale x 8-byte Folded Reload -; CHECK-RV32-V-NEXT: csrr a0, vlenb -; CHECK-RV32-V-NEXT: slli a0, a0, 2 -; CHECK-RV32-V-NEXT: mv a1, a0 -; CHECK-RV32-V-NEXT: slli a0, a0, 1 -; CHECK-RV32-V-NEXT: add a0, a0, a1 -; CHECK-RV32-V-NEXT: add a0, sp, a0 -; CHECK-RV32-V-NEXT: addi a0, a0, 16 -; CHECK-RV32-V-NEXT: vl1r.v v19, (a0) # vscale x 8-byte Folded Reload -; CHECK-RV32-V-NEXT: csrr a0, vlenb -; CHECK-RV32-V-NEXT: mv a1, a0 -; CHECK-RV32-V-NEXT: slli a0, a0, 1 -; CHECK-RV32-V-NEXT: add a1, a1, a0 -; CHECK-RV32-V-NEXT: slli a0, a0, 2 -; CHECK-RV32-V-NEXT: add a0, a0, a1 -; CHECK-RV32-V-NEXT: add a0, sp, a0 -; CHECK-RV32-V-NEXT: addi a0, a0, 16 -; CHECK-RV32-V-NEXT: vl1r.v v20, (a0) # vscale x 8-byte Folded Reload -; CHECK-RV32-V-NEXT: csrr a0, vlenb -; CHECK-RV32-V-NEXT: slli a0, a0, 1 -; CHECK-RV32-V-NEXT: mv a1, a0 -; CHECK-RV32-V-NEXT: slli a0, a0, 2 -; CHECK-RV32-V-NEXT: add a0, a0, a1 -; CHECK-RV32-V-NEXT: add a0, sp, a0 -; CHECK-RV32-V-NEXT: addi a0, a0, 16 -; CHECK-RV32-V-NEXT: vl1r.v v21, (a0) # vscale x 8-byte Folded Reload -; CHECK-RV32-V-NEXT: csrr a0, vlenb -; CHECK-RV32-V-NEXT: slli a1, a0, 3 -; CHECK-RV32-V-NEXT: add a0, a1, a0 -; CHECK-RV32-V-NEXT: add a0, sp, a0 -; CHECK-RV32-V-NEXT: addi a0, a0, 16 -; CHECK-RV32-V-NEXT: vl1r.v v22, (a0) # vscale x 8-byte Folded Reload +; CHECK-RV32-V-NEXT: vl8r.v v8, (a0) # vscale x 64-byte Folded Reload ; CHECK-RV32-V-NEXT: csrr a0, vlenb ; CHECK-RV32-V-NEXT: slli a0, a0, 3 ; CHECK-RV32-V-NEXT: add a0, sp, a0 ; CHECK-RV32-V-NEXT: addi a0, a0, 16 -; CHECK-RV32-V-NEXT: vl1r.v v23, (a0) # vscale x 8-byte Folded Reload -; CHECK-RV32-V-NEXT: csrr a0, vlenb -; CHECK-RV32-V-NEXT: slli a1, a0, 3 -; CHECK-RV32-V-NEXT: sub a0, a1, a0 -; CHECK-RV32-V-NEXT: add a0, sp, a0 -; CHECK-RV32-V-NEXT: addi a0, a0, 16 -; CHECK-RV32-V-NEXT: vl1r.v v24, (a0) # vscale x 8-byte Folded Reload -; CHECK-RV32-V-NEXT: csrr a0, vlenb -; CHECK-RV32-V-NEXT: slli a0, a0, 1 -; CHECK-RV32-V-NEXT: mv a1, a0 -; CHECK-RV32-V-NEXT: slli a0, a0, 1 -; CHECK-RV32-V-NEXT: add a0, a0, a1 -; CHECK-RV32-V-NEXT: add a0, sp, a0 -; CHECK-RV32-V-NEXT: addi a0, a0, 16 -; CHECK-RV32-V-NEXT: vl1r.v v25, (a0) # vscale x 8-byte Folded Reload -; CHECK-RV32-V-NEXT: csrr a0, vlenb -; CHECK-RV32-V-NEXT: slli a1, a0, 2 -; CHECK-RV32-V-NEXT: add a0, a1, a0 -; CHECK-RV32-V-NEXT: add a0, sp, a0 -; CHECK-RV32-V-NEXT: addi a0, a0, 16 -; CHECK-RV32-V-NEXT: vl1r.v v26, (a0) # vscale x 8-byte Folded Reload -; CHECK-RV32-V-NEXT: csrr a0, vlenb -; CHECK-RV32-V-NEXT: slli a0, a0, 2 -; CHECK-RV32-V-NEXT: add a0, sp, a0 -; CHECK-RV32-V-NEXT: addi a0, a0, 16 -; CHECK-RV32-V-NEXT: vl1r.v v27, (a0) # vscale x 8-byte Folded Reload -; CHECK-RV32-V-NEXT: csrr a0, vlenb -; CHECK-RV32-V-NEXT: slli a1, a0, 1 -; CHECK-RV32-V-NEXT: add a0, a1, a0 -; CHECK-RV32-V-NEXT: add a0, sp, a0 -; CHECK-RV32-V-NEXT: addi a0, a0, 16 -; CHECK-RV32-V-NEXT: vl1r.v v28, (a0) # vscale x 8-byte Folded Reload -; CHECK-RV32-V-NEXT: csrr a0, vlenb -; CHECK-RV32-V-NEXT: slli a0, a0, 1 -; CHECK-RV32-V-NEXT: add a0, sp, a0 -; CHECK-RV32-V-NEXT: addi a0, a0, 16 -; CHECK-RV32-V-NEXT: vl1r.v v29, (a0) # vscale x 8-byte Folded Reload -; CHECK-RV32-V-NEXT: csrr a0, vlenb -; CHECK-RV32-V-NEXT: add a0, sp, a0 -; CHECK-RV32-V-NEXT: addi a0, a0, 16 -; CHECK-RV32-V-NEXT: vl1r.v v30, (a0) # vscale x 8-byte Folded Reload +; CHECK-RV32-V-NEXT: vl8r.v v16, (a0) # vscale x 64-byte Folded Reload ; CHECK-RV32-V-NEXT: addi a0, sp, 16 -; CHECK-RV32-V-NEXT: vl1r.v v31, (a0) # vscale x 8-byte Folded Reload +; CHECK-RV32-V-NEXT: vl8r.v v24, (a0) # vscale x 64-byte Folded Reload ; CHECK-RV32-V-NEXT: csrr a0, vlenb ; CHECK-RV32-V-NEXT: slli a0, a0, 5 ; CHECK-RV32-V-NEXT: add sp, sp, a0 @@ -1351,498 +899,46 @@ define void @foo_with_call() #1 { ; CHECK-RV32-FV-NEXT: slli a0, a0, 5 ; CHECK-RV32-FV-NEXT: sub sp, sp, a0 ; CHECK-RV32-FV-NEXT: csrr a0, vlenb -; CHECK-RV32-FV-NEXT: slli a1, a0, 5 -; CHECK-RV32-FV-NEXT: sub a0, a1, a0 -; CHECK-RV32-FV-NEXT: add a0, sp, a0 -; CHECK-RV32-FV-NEXT: addi a0, a0, 16 -; CHECK-RV32-FV-NEXT: vs1r.v v0, (a0) # vscale x 8-byte Folded Spill -; CHECK-RV32-FV-NEXT: csrr a0, vlenb -; CHECK-RV32-FV-NEXT: slli a0, a0, 1 -; CHECK-RV32-FV-NEXT: mv a1, a0 -; CHECK-RV32-FV-NEXT: slli a0, a0, 1 -; CHECK-RV32-FV-NEXT: add a1, a1, a0 -; CHECK-RV32-FV-NEXT: slli a0, a0, 1 -; CHECK-RV32-FV-NEXT: add a1, a1, a0 -; CHECK-RV32-FV-NEXT: slli a0, a0, 1 -; CHECK-RV32-FV-NEXT: add a0, a0, a1 -; CHECK-RV32-FV-NEXT: add a0, sp, a0 -; CHECK-RV32-FV-NEXT: addi a0, a0, 16 -; CHECK-RV32-FV-NEXT: vs1r.v v1, (a0) # vscale x 8-byte Folded Spill -; CHECK-RV32-FV-NEXT: csrr a0, vlenb -; CHECK-RV32-FV-NEXT: mv a1, a0 -; CHECK-RV32-FV-NEXT: slli a0, a0, 2 -; CHECK-RV32-FV-NEXT: add a1, a1, a0 -; CHECK-RV32-FV-NEXT: slli a0, a0, 1 -; CHECK-RV32-FV-NEXT: add a1, a1, a0 -; CHECK-RV32-FV-NEXT: slli a0, a0, 1 -; CHECK-RV32-FV-NEXT: add a0, a0, a1 -; CHECK-RV32-FV-NEXT: add a0, sp, a0 -; CHECK-RV32-FV-NEXT: addi a0, a0, 16 -; CHECK-RV32-FV-NEXT: vs1r.v v2, (a0) # vscale x 8-byte Folded Spill -; CHECK-RV32-FV-NEXT: csrr a0, vlenb -; CHECK-RV32-FV-NEXT: slli a0, a0, 2 -; CHECK-RV32-FV-NEXT: mv a1, a0 -; CHECK-RV32-FV-NEXT: slli a0, a0, 1 -; CHECK-RV32-FV-NEXT: add a1, a1, a0 -; CHECK-RV32-FV-NEXT: slli a0, a0, 1 -; CHECK-RV32-FV-NEXT: add a0, a0, a1 -; CHECK-RV32-FV-NEXT: add a0, sp, a0 -; CHECK-RV32-FV-NEXT: addi a0, a0, 16 -; CHECK-RV32-FV-NEXT: vs1r.v v3, (a0) # vscale x 8-byte Folded Spill -; CHECK-RV32-FV-NEXT: csrr a0, vlenb -; CHECK-RV32-FV-NEXT: mv a1, a0 -; CHECK-RV32-FV-NEXT: slli a0, a0, 1 -; CHECK-RV32-FV-NEXT: add a1, a1, a0 -; CHECK-RV32-FV-NEXT: slli a0, a0, 2 -; CHECK-RV32-FV-NEXT: add a1, a1, a0 -; CHECK-RV32-FV-NEXT: slli a0, a0, 1 -; CHECK-RV32-FV-NEXT: add a0, a0, a1 -; CHECK-RV32-FV-NEXT: add a0, sp, a0 -; CHECK-RV32-FV-NEXT: addi a0, a0, 16 -; CHECK-RV32-FV-NEXT: vs1r.v v4, (a0) # vscale x 8-byte Folded Spill -; CHECK-RV32-FV-NEXT: csrr a0, vlenb -; CHECK-RV32-FV-NEXT: slli a0, a0, 1 -; CHECK-RV32-FV-NEXT: mv a1, a0 -; CHECK-RV32-FV-NEXT: slli a0, a0, 2 -; CHECK-RV32-FV-NEXT: add a1, a1, a0 -; CHECK-RV32-FV-NEXT: slli a0, a0, 1 -; CHECK-RV32-FV-NEXT: add a0, a0, a1 -; CHECK-RV32-FV-NEXT: add a0, sp, a0 -; CHECK-RV32-FV-NEXT: addi a0, a0, 16 -; CHECK-RV32-FV-NEXT: vs1r.v v5, (a0) # vscale x 8-byte Folded Spill -; CHECK-RV32-FV-NEXT: csrr a0, vlenb -; CHECK-RV32-FV-NEXT: mv a1, a0 -; CHECK-RV32-FV-NEXT: slli a0, a0, 3 -; CHECK-RV32-FV-NEXT: add a1, a1, a0 -; CHECK-RV32-FV-NEXT: slli a0, a0, 1 -; CHECK-RV32-FV-NEXT: add a0, a0, a1 -; CHECK-RV32-FV-NEXT: add a0, sp, a0 -; CHECK-RV32-FV-NEXT: addi a0, a0, 16 -; CHECK-RV32-FV-NEXT: vs1r.v v6, (a0) # vscale x 8-byte Folded Spill -; CHECK-RV32-FV-NEXT: csrr a0, vlenb ; CHECK-RV32-FV-NEXT: slli a0, a0, 3 ; CHECK-RV32-FV-NEXT: mv a1, a0 ; CHECK-RV32-FV-NEXT: slli a0, a0, 1 ; CHECK-RV32-FV-NEXT: add a0, a0, a1 ; CHECK-RV32-FV-NEXT: add a0, sp, a0 ; CHECK-RV32-FV-NEXT: addi a0, a0, 16 -; CHECK-RV32-FV-NEXT: vs1r.v v7, (a0) # vscale x 8-byte Folded Spill -; CHECK-RV32-FV-NEXT: csrr a0, vlenb -; CHECK-RV32-FV-NEXT: mv a1, a0 -; CHECK-RV32-FV-NEXT: slli a0, a0, 1 -; CHECK-RV32-FV-NEXT: add a1, a1, a0 -; CHECK-RV32-FV-NEXT: slli a0, a0, 1 -; CHECK-RV32-FV-NEXT: add a1, a1, a0 -; CHECK-RV32-FV-NEXT: slli a0, a0, 2 -; CHECK-RV32-FV-NEXT: add a0, a0, a1 -; CHECK-RV32-FV-NEXT: add a0, sp, a0 -; CHECK-RV32-FV-NEXT: addi a0, a0, 16 -; CHECK-RV32-FV-NEXT: vs1r.v v8, (a0) # vscale x 8-byte Folded Spill -; CHECK-RV32-FV-NEXT: csrr a0, vlenb -; CHECK-RV32-FV-NEXT: slli a0, a0, 1 -; CHECK-RV32-FV-NEXT: mv a1, a0 -; CHECK-RV32-FV-NEXT: slli a0, a0, 1 -; CHECK-RV32-FV-NEXT: add a1, a1, a0 -; CHECK-RV32-FV-NEXT: slli a0, a0, 2 -; CHECK-RV32-FV-NEXT: add a0, a0, a1 -; CHECK-RV32-FV-NEXT: add a0, sp, a0 -; CHECK-RV32-FV-NEXT: addi a0, a0, 16 -; CHECK-RV32-FV-NEXT: vs1r.v v9, (a0) # vscale x 8-byte Folded Spill -; CHECK-RV32-FV-NEXT: csrr a0, vlenb -; CHECK-RV32-FV-NEXT: mv a1, a0 -; CHECK-RV32-FV-NEXT: slli a0, a0, 2 -; CHECK-RV32-FV-NEXT: add a1, a1, a0 -; CHECK-RV32-FV-NEXT: slli a0, a0, 2 -; CHECK-RV32-FV-NEXT: add a0, a0, a1 -; CHECK-RV32-FV-NEXT: add a0, sp, a0 -; CHECK-RV32-FV-NEXT: addi a0, a0, 16 -; CHECK-RV32-FV-NEXT: vs1r.v v10, (a0) # vscale x 8-byte Folded Spill -; CHECK-RV32-FV-NEXT: csrr a0, vlenb -; CHECK-RV32-FV-NEXT: slli a0, a0, 2 -; CHECK-RV32-FV-NEXT: mv a1, a0 -; CHECK-RV32-FV-NEXT: slli a0, a0, 2 -; CHECK-RV32-FV-NEXT: add a0, a0, a1 -; CHECK-RV32-FV-NEXT: add a0, sp, a0 -; CHECK-RV32-FV-NEXT: addi a0, a0, 16 -; CHECK-RV32-FV-NEXT: vs1r.v v11, (a0) # vscale x 8-byte Folded Spill -; CHECK-RV32-FV-NEXT: csrr a0, vlenb -; CHECK-RV32-FV-NEXT: mv a1, a0 -; CHECK-RV32-FV-NEXT: slli a0, a0, 1 -; CHECK-RV32-FV-NEXT: add a1, a1, a0 -; CHECK-RV32-FV-NEXT: slli a0, a0, 3 -; CHECK-RV32-FV-NEXT: add a0, a0, a1 -; CHECK-RV32-FV-NEXT: add a0, sp, a0 -; CHECK-RV32-FV-NEXT: addi a0, a0, 16 -; CHECK-RV32-FV-NEXT: vs1r.v v12, (a0) # vscale x 8-byte Folded Spill -; CHECK-RV32-FV-NEXT: csrr a0, vlenb -; CHECK-RV32-FV-NEXT: slli a0, a0, 1 -; CHECK-RV32-FV-NEXT: mv a1, a0 -; CHECK-RV32-FV-NEXT: slli a0, a0, 3 -; CHECK-RV32-FV-NEXT: add a0, a0, a1 -; CHECK-RV32-FV-NEXT: add a0, sp, a0 -; CHECK-RV32-FV-NEXT: addi a0, a0, 16 -; CHECK-RV32-FV-NEXT: vs1r.v v13, (a0) # vscale x 8-byte Folded Spill -; CHECK-RV32-FV-NEXT: csrr a0, vlenb -; CHECK-RV32-FV-NEXT: slli a1, a0, 4 -; CHECK-RV32-FV-NEXT: add a0, a1, a0 -; CHECK-RV32-FV-NEXT: add a0, sp, a0 -; CHECK-RV32-FV-NEXT: addi a0, a0, 16 -; CHECK-RV32-FV-NEXT: vs1r.v v14, (a0) # vscale x 8-byte Folded Spill +; CHECK-RV32-FV-NEXT: vs8r.v v0, (a0) # vscale x 64-byte Folded Spill ; CHECK-RV32-FV-NEXT: csrr a0, vlenb ; CHECK-RV32-FV-NEXT: slli a0, a0, 4 ; CHECK-RV32-FV-NEXT: add a0, sp, a0 ; CHECK-RV32-FV-NEXT: addi a0, a0, 16 -; CHECK-RV32-FV-NEXT: vs1r.v v15, (a0) # vscale x 8-byte Folded Spill -; CHECK-RV32-FV-NEXT: csrr a0, vlenb -; CHECK-RV32-FV-NEXT: slli a1, a0, 4 -; CHECK-RV32-FV-NEXT: sub a0, a1, a0 -; CHECK-RV32-FV-NEXT: add a0, sp, a0 -; CHECK-RV32-FV-NEXT: addi a0, a0, 16 -; CHECK-RV32-FV-NEXT: vs1r.v v16, (a0) # vscale x 8-byte Folded Spill -; CHECK-RV32-FV-NEXT: csrr a0, vlenb -; CHECK-RV32-FV-NEXT: slli a0, a0, 1 -; CHECK-RV32-FV-NEXT: mv a1, a0 -; CHECK-RV32-FV-NEXT: slli a0, a0, 1 -; CHECK-RV32-FV-NEXT: add a1, a1, a0 -; CHECK-RV32-FV-NEXT: slli a0, a0, 1 -; CHECK-RV32-FV-NEXT: add a0, a0, a1 -; CHECK-RV32-FV-NEXT: add a0, sp, a0 -; CHECK-RV32-FV-NEXT: addi a0, a0, 16 -; CHECK-RV32-FV-NEXT: vs1r.v v17, (a0) # vscale x 8-byte Folded Spill -; CHECK-RV32-FV-NEXT: csrr a0, vlenb -; CHECK-RV32-FV-NEXT: mv a1, a0 -; CHECK-RV32-FV-NEXT: slli a0, a0, 2 -; CHECK-RV32-FV-NEXT: add a1, a1, a0 -; CHECK-RV32-FV-NEXT: slli a0, a0, 1 -; CHECK-RV32-FV-NEXT: add a0, a0, a1 -; CHECK-RV32-FV-NEXT: add a0, sp, a0 -; CHECK-RV32-FV-NEXT: addi a0, a0, 16 -; CHECK-RV32-FV-NEXT: vs1r.v v18, (a0) # vscale x 8-byte Folded Spill -; CHECK-RV32-FV-NEXT: csrr a0, vlenb -; CHECK-RV32-FV-NEXT: slli a0, a0, 2 -; CHECK-RV32-FV-NEXT: mv a1, a0 -; CHECK-RV32-FV-NEXT: slli a0, a0, 1 -; CHECK-RV32-FV-NEXT: add a0, a0, a1 -; CHECK-RV32-FV-NEXT: add a0, sp, a0 -; CHECK-RV32-FV-NEXT: addi a0, a0, 16 -; CHECK-RV32-FV-NEXT: vs1r.v v19, (a0) # vscale x 8-byte Folded Spill -; CHECK-RV32-FV-NEXT: csrr a0, vlenb -; CHECK-RV32-FV-NEXT: mv a1, a0 -; CHECK-RV32-FV-NEXT: slli a0, a0, 1 -; CHECK-RV32-FV-NEXT: add a1, a1, a0 -; CHECK-RV32-FV-NEXT: slli a0, a0, 2 -; CHECK-RV32-FV-NEXT: add a0, a0, a1 -; CHECK-RV32-FV-NEXT: add a0, sp, a0 -; CHECK-RV32-FV-NEXT: addi a0, a0, 16 -; CHECK-RV32-FV-NEXT: vs1r.v v20, (a0) # vscale x 8-byte Folded Spill -; CHECK-RV32-FV-NEXT: csrr a0, vlenb -; CHECK-RV32-FV-NEXT: slli a0, a0, 1 -; CHECK-RV32-FV-NEXT: mv a1, a0 -; CHECK-RV32-FV-NEXT: slli a0, a0, 2 -; CHECK-RV32-FV-NEXT: add a0, a0, a1 -; CHECK-RV32-FV-NEXT: add a0, sp, a0 -; CHECK-RV32-FV-NEXT: addi a0, a0, 16 -; CHECK-RV32-FV-NEXT: vs1r.v v21, (a0) # vscale x 8-byte Folded Spill -; CHECK-RV32-FV-NEXT: csrr a0, vlenb -; CHECK-RV32-FV-NEXT: slli a1, a0, 3 -; CHECK-RV32-FV-NEXT: add a0, a1, a0 -; CHECK-RV32-FV-NEXT: add a0, sp, a0 -; CHECK-RV32-FV-NEXT: addi a0, a0, 16 -; CHECK-RV32-FV-NEXT: vs1r.v v22, (a0) # vscale x 8-byte Folded Spill +; CHECK-RV32-FV-NEXT: vs8r.v v8, (a0) # vscale x 64-byte Folded Spill ; CHECK-RV32-FV-NEXT: csrr a0, vlenb ; CHECK-RV32-FV-NEXT: slli a0, a0, 3 ; CHECK-RV32-FV-NEXT: add a0, sp, a0 ; CHECK-RV32-FV-NEXT: addi a0, a0, 16 -; CHECK-RV32-FV-NEXT: vs1r.v v23, (a0) # vscale x 8-byte Folded Spill -; CHECK-RV32-FV-NEXT: csrr a0, vlenb -; CHECK-RV32-FV-NEXT: slli a1, a0, 3 -; CHECK-RV32-FV-NEXT: sub a0, a1, a0 -; CHECK-RV32-FV-NEXT: add a0, sp, a0 -; CHECK-RV32-FV-NEXT: addi a0, a0, 16 -; CHECK-RV32-FV-NEXT: vs1r.v v24, (a0) # vscale x 8-byte Folded Spill -; CHECK-RV32-FV-NEXT: csrr a0, vlenb -; CHECK-RV32-FV-NEXT: slli a0, a0, 1 -; CHECK-RV32-FV-NEXT: mv a1, a0 -; CHECK-RV32-FV-NEXT: slli a0, a0, 1 -; CHECK-RV32-FV-NEXT: add a0, a0, a1 -; CHECK-RV32-FV-NEXT: add a0, sp, a0 -; CHECK-RV32-FV-NEXT: addi a0, a0, 16 -; CHECK-RV32-FV-NEXT: vs1r.v v25, (a0) # vscale x 8-byte Folded Spill -; CHECK-RV32-FV-NEXT: csrr a0, vlenb -; CHECK-RV32-FV-NEXT: slli a1, a0, 2 -; CHECK-RV32-FV-NEXT: add a0, a1, a0 -; CHECK-RV32-FV-NEXT: add a0, sp, a0 -; CHECK-RV32-FV-NEXT: addi a0, a0, 16 -; CHECK-RV32-FV-NEXT: vs1r.v v26, (a0) # vscale x 8-byte Folded Spill -; CHECK-RV32-FV-NEXT: csrr a0, vlenb -; CHECK-RV32-FV-NEXT: slli a0, a0, 2 -; CHECK-RV32-FV-NEXT: add a0, sp, a0 -; CHECK-RV32-FV-NEXT: addi a0, a0, 16 -; CHECK-RV32-FV-NEXT: vs1r.v v27, (a0) # vscale x 8-byte Folded Spill -; CHECK-RV32-FV-NEXT: csrr a0, vlenb -; CHECK-RV32-FV-NEXT: slli a1, a0, 1 -; CHECK-RV32-FV-NEXT: add a0, a1, a0 -; CHECK-RV32-FV-NEXT: add a0, sp, a0 -; CHECK-RV32-FV-NEXT: addi a0, a0, 16 -; CHECK-RV32-FV-NEXT: vs1r.v v28, (a0) # vscale x 8-byte Folded Spill -; CHECK-RV32-FV-NEXT: csrr a0, vlenb -; CHECK-RV32-FV-NEXT: slli a0, a0, 1 -; CHECK-RV32-FV-NEXT: add a0, sp, a0 -; CHECK-RV32-FV-NEXT: addi a0, a0, 16 -; CHECK-RV32-FV-NEXT: vs1r.v v29, (a0) # vscale x 8-byte Folded Spill -; CHECK-RV32-FV-NEXT: csrr a0, vlenb -; CHECK-RV32-FV-NEXT: add a0, sp, a0 -; CHECK-RV32-FV-NEXT: addi a0, a0, 16 -; CHECK-RV32-FV-NEXT: vs1r.v v30, (a0) # vscale x 8-byte Folded Spill +; CHECK-RV32-FV-NEXT: vs8r.v v16, (a0) # vscale x 64-byte Folded Spill ; CHECK-RV32-FV-NEXT: addi a0, sp, 16 -; CHECK-RV32-FV-NEXT: vs1r.v v31, (a0) # vscale x 8-byte Folded Spill +; CHECK-RV32-FV-NEXT: vs8r.v v24, (a0) # vscale x 64-byte Folded Spill ; CHECK-RV32-FV-NEXT: call otherfoo ; CHECK-RV32-FV-NEXT: csrr a0, vlenb -; CHECK-RV32-FV-NEXT: slli a1, a0, 5 -; CHECK-RV32-FV-NEXT: sub a0, a1, a0 -; CHECK-RV32-FV-NEXT: add a0, sp, a0 -; CHECK-RV32-FV-NEXT: addi a0, a0, 16 -; CHECK-RV32-FV-NEXT: vl1r.v v0, (a0) # vscale x 8-byte Folded Reload -; CHECK-RV32-FV-NEXT: csrr a0, vlenb -; CHECK-RV32-FV-NEXT: slli a0, a0, 1 -; CHECK-RV32-FV-NEXT: mv a1, a0 -; CHECK-RV32-FV-NEXT: slli a0, a0, 1 -; CHECK-RV32-FV-NEXT: add a1, a1, a0 -; CHECK-RV32-FV-NEXT: slli a0, a0, 1 -; CHECK-RV32-FV-NEXT: add a1, a1, a0 -; CHECK-RV32-FV-NEXT: slli a0, a0, 1 -; CHECK-RV32-FV-NEXT: add a0, a0, a1 -; CHECK-RV32-FV-NEXT: add a0, sp, a0 -; CHECK-RV32-FV-NEXT: addi a0, a0, 16 -; CHECK-RV32-FV-NEXT: vl1r.v v1, (a0) # vscale x 8-byte Folded Reload -; CHECK-RV32-FV-NEXT: csrr a0, vlenb -; CHECK-RV32-FV-NEXT: mv a1, a0 -; CHECK-RV32-FV-NEXT: slli a0, a0, 2 -; CHECK-RV32-FV-NEXT: add a1, a1, a0 -; CHECK-RV32-FV-NEXT: slli a0, a0, 1 -; CHECK-RV32-FV-NEXT: add a1, a1, a0 -; CHECK-RV32-FV-NEXT: slli a0, a0, 1 -; CHECK-RV32-FV-NEXT: add a0, a0, a1 -; CHECK-RV32-FV-NEXT: add a0, sp, a0 -; CHECK-RV32-FV-NEXT: addi a0, a0, 16 -; CHECK-RV32-FV-NEXT: vl1r.v v2, (a0) # vscale x 8-byte Folded Reload -; CHECK-RV32-FV-NEXT: csrr a0, vlenb -; CHECK-RV32-FV-NEXT: slli a0, a0, 2 -; CHECK-RV32-FV-NEXT: mv a1, a0 -; CHECK-RV32-FV-NEXT: slli a0, a0, 1 -; CHECK-RV32-FV-NEXT: add a1, a1, a0 -; CHECK-RV32-FV-NEXT: slli a0, a0, 1 -; CHECK-RV32-FV-NEXT: add a0, a0, a1 -; CHECK-RV32-FV-NEXT: add a0, sp, a0 -; CHECK-RV32-FV-NEXT: addi a0, a0, 16 -; CHECK-RV32-FV-NEXT: vl1r.v v3, (a0) # vscale x 8-byte Folded Reload -; CHECK-RV32-FV-NEXT: csrr a0, vlenb -; CHECK-RV32-FV-NEXT: mv a1, a0 -; CHECK-RV32-FV-NEXT: slli a0, a0, 1 -; CHECK-RV32-FV-NEXT: add a1, a1, a0 -; CHECK-RV32-FV-NEXT: slli a0, a0, 2 -; CHECK-RV32-FV-NEXT: add a1, a1, a0 -; CHECK-RV32-FV-NEXT: slli a0, a0, 1 -; CHECK-RV32-FV-NEXT: add a0, a0, a1 -; CHECK-RV32-FV-NEXT: add a0, sp, a0 -; CHECK-RV32-FV-NEXT: addi a0, a0, 16 -; CHECK-RV32-FV-NEXT: vl1r.v v4, (a0) # vscale x 8-byte Folded Reload -; CHECK-RV32-FV-NEXT: csrr a0, vlenb -; CHECK-RV32-FV-NEXT: slli a0, a0, 1 -; CHECK-RV32-FV-NEXT: mv a1, a0 -; CHECK-RV32-FV-NEXT: slli a0, a0, 2 -; CHECK-RV32-FV-NEXT: add a1, a1, a0 -; CHECK-RV32-FV-NEXT: slli a0, a0, 1 -; CHECK-RV32-FV-NEXT: add a0, a0, a1 -; CHECK-RV32-FV-NEXT: add a0, sp, a0 -; CHECK-RV32-FV-NEXT: addi a0, a0, 16 -; CHECK-RV32-FV-NEXT: vl1r.v v5, (a0) # vscale x 8-byte Folded Reload -; CHECK-RV32-FV-NEXT: csrr a0, vlenb -; CHECK-RV32-FV-NEXT: mv a1, a0 -; CHECK-RV32-FV-NEXT: slli a0, a0, 3 -; CHECK-RV32-FV-NEXT: add a1, a1, a0 -; CHECK-RV32-FV-NEXT: slli a0, a0, 1 -; CHECK-RV32-FV-NEXT: add a0, a0, a1 -; CHECK-RV32-FV-NEXT: add a0, sp, a0 -; CHECK-RV32-FV-NEXT: addi a0, a0, 16 -; CHECK-RV32-FV-NEXT: vl1r.v v6, (a0) # vscale x 8-byte Folded Reload -; CHECK-RV32-FV-NEXT: csrr a0, vlenb ; CHECK-RV32-FV-NEXT: slli a0, a0, 3 ; CHECK-RV32-FV-NEXT: mv a1, a0 ; CHECK-RV32-FV-NEXT: slli a0, a0, 1 ; CHECK-RV32-FV-NEXT: add a0, a0, a1 ; CHECK-RV32-FV-NEXT: add a0, sp, a0 ; CHECK-RV32-FV-NEXT: addi a0, a0, 16 -; CHECK-RV32-FV-NEXT: vl1r.v v7, (a0) # vscale x 8-byte Folded Reload -; CHECK-RV32-FV-NEXT: csrr a0, vlenb -; CHECK-RV32-FV-NEXT: mv a1, a0 -; CHECK-RV32-FV-NEXT: slli a0, a0, 1 -; CHECK-RV32-FV-NEXT: add a1, a1, a0 -; CHECK-RV32-FV-NEXT: slli a0, a0, 1 -; CHECK-RV32-FV-NEXT: add a1, a1, a0 -; CHECK-RV32-FV-NEXT: slli a0, a0, 2 -; CHECK-RV32-FV-NEXT: add a0, a0, a1 -; CHECK-RV32-FV-NEXT: add a0, sp, a0 -; CHECK-RV32-FV-NEXT: addi a0, a0, 16 -; CHECK-RV32-FV-NEXT: vl1r.v v8, (a0) # vscale x 8-byte Folded Reload -; CHECK-RV32-FV-NEXT: csrr a0, vlenb -; CHECK-RV32-FV-NEXT: slli a0, a0, 1 -; CHECK-RV32-FV-NEXT: mv a1, a0 -; CHECK-RV32-FV-NEXT: slli a0, a0, 1 -; CHECK-RV32-FV-NEXT: add a1, a1, a0 -; CHECK-RV32-FV-NEXT: slli a0, a0, 2 -; CHECK-RV32-FV-NEXT: add a0, a0, a1 -; CHECK-RV32-FV-NEXT: add a0, sp, a0 -; CHECK-RV32-FV-NEXT: addi a0, a0, 16 -; CHECK-RV32-FV-NEXT: vl1r.v v9, (a0) # vscale x 8-byte Folded Reload -; CHECK-RV32-FV-NEXT: csrr a0, vlenb -; CHECK-RV32-FV-NEXT: mv a1, a0 -; CHECK-RV32-FV-NEXT: slli a0, a0, 2 -; CHECK-RV32-FV-NEXT: add a1, a1, a0 -; CHECK-RV32-FV-NEXT: slli a0, a0, 2 -; CHECK-RV32-FV-NEXT: add a0, a0, a1 -; CHECK-RV32-FV-NEXT: add a0, sp, a0 -; CHECK-RV32-FV-NEXT: addi a0, a0, 16 -; CHECK-RV32-FV-NEXT: vl1r.v v10, (a0) # vscale x 8-byte Folded Reload -; CHECK-RV32-FV-NEXT: csrr a0, vlenb -; CHECK-RV32-FV-NEXT: slli a0, a0, 2 -; CHECK-RV32-FV-NEXT: mv a1, a0 -; CHECK-RV32-FV-NEXT: slli a0, a0, 2 -; CHECK-RV32-FV-NEXT: add a0, a0, a1 -; CHECK-RV32-FV-NEXT: add a0, sp, a0 -; CHECK-RV32-FV-NEXT: addi a0, a0, 16 -; CHECK-RV32-FV-NEXT: vl1r.v v11, (a0) # vscale x 8-byte Folded Reload -; CHECK-RV32-FV-NEXT: csrr a0, vlenb -; CHECK-RV32-FV-NEXT: mv a1, a0 -; CHECK-RV32-FV-NEXT: slli a0, a0, 1 -; CHECK-RV32-FV-NEXT: add a1, a1, a0 -; CHECK-RV32-FV-NEXT: slli a0, a0, 3 -; CHECK-RV32-FV-NEXT: add a0, a0, a1 -; CHECK-RV32-FV-NEXT: add a0, sp, a0 -; CHECK-RV32-FV-NEXT: addi a0, a0, 16 -; CHECK-RV32-FV-NEXT: vl1r.v v12, (a0) # vscale x 8-byte Folded Reload -; CHECK-RV32-FV-NEXT: csrr a0, vlenb -; CHECK-RV32-FV-NEXT: slli a0, a0, 1 -; CHECK-RV32-FV-NEXT: mv a1, a0 -; CHECK-RV32-FV-NEXT: slli a0, a0, 3 -; CHECK-RV32-FV-NEXT: add a0, a0, a1 -; CHECK-RV32-FV-NEXT: add a0, sp, a0 -; CHECK-RV32-FV-NEXT: addi a0, a0, 16 -; CHECK-RV32-FV-NEXT: vl1r.v v13, (a0) # vscale x 8-byte Folded Reload -; CHECK-RV32-FV-NEXT: csrr a0, vlenb -; CHECK-RV32-FV-NEXT: slli a1, a0, 4 -; CHECK-RV32-FV-NEXT: add a0, a1, a0 -; CHECK-RV32-FV-NEXT: add a0, sp, a0 -; CHECK-RV32-FV-NEXT: addi a0, a0, 16 -; CHECK-RV32-FV-NEXT: vl1r.v v14, (a0) # vscale x 8-byte Folded Reload +; CHECK-RV32-FV-NEXT: vl8r.v v0, (a0) # vscale x 64-byte Folded Reload ; CHECK-RV32-FV-NEXT: csrr a0, vlenb ; CHECK-RV32-FV-NEXT: slli a0, a0, 4 ; CHECK-RV32-FV-NEXT: add a0, sp, a0 ; CHECK-RV32-FV-NEXT: addi a0, a0, 16 -; CHECK-RV32-FV-NEXT: vl1r.v v15, (a0) # vscale x 8-byte Folded Reload -; CHECK-RV32-FV-NEXT: csrr a0, vlenb -; CHECK-RV32-FV-NEXT: slli a1, a0, 4 -; CHECK-RV32-FV-NEXT: sub a0, a1, a0 -; CHECK-RV32-FV-NEXT: add a0, sp, a0 -; CHECK-RV32-FV-NEXT: addi a0, a0, 16 -; CHECK-RV32-FV-NEXT: vl1r.v v16, (a0) # vscale x 8-byte Folded Reload -; CHECK-RV32-FV-NEXT: csrr a0, vlenb -; CHECK-RV32-FV-NEXT: slli a0, a0, 1 -; CHECK-RV32-FV-NEXT: mv a1, a0 -; CHECK-RV32-FV-NEXT: slli a0, a0, 1 -; CHECK-RV32-FV-NEXT: add a1, a1, a0 -; CHECK-RV32-FV-NEXT: slli a0, a0, 1 -; CHECK-RV32-FV-NEXT: add a0, a0, a1 -; CHECK-RV32-FV-NEXT: add a0, sp, a0 -; CHECK-RV32-FV-NEXT: addi a0, a0, 16 -; CHECK-RV32-FV-NEXT: vl1r.v v17, (a0) # vscale x 8-byte Folded Reload -; CHECK-RV32-FV-NEXT: csrr a0, vlenb -; CHECK-RV32-FV-NEXT: mv a1, a0 -; CHECK-RV32-FV-NEXT: slli a0, a0, 2 -; CHECK-RV32-FV-NEXT: add a1, a1, a0 -; CHECK-RV32-FV-NEXT: slli a0, a0, 1 -; CHECK-RV32-FV-NEXT: add a0, a0, a1 -; CHECK-RV32-FV-NEXT: add a0, sp, a0 -; CHECK-RV32-FV-NEXT: addi a0, a0, 16 -; CHECK-RV32-FV-NEXT: vl1r.v v18, (a0) # vscale x 8-byte Folded Reload -; CHECK-RV32-FV-NEXT: csrr a0, vlenb -; CHECK-RV32-FV-NEXT: slli a0, a0, 2 -; CHECK-RV32-FV-NEXT: mv a1, a0 -; CHECK-RV32-FV-NEXT: slli a0, a0, 1 -; CHECK-RV32-FV-NEXT: add a0, a0, a1 -; CHECK-RV32-FV-NEXT: add a0, sp, a0 -; CHECK-RV32-FV-NEXT: addi a0, a0, 16 -; CHECK-RV32-FV-NEXT: vl1r.v v19, (a0) # vscale x 8-byte Folded Reload -; CHECK-RV32-FV-NEXT: csrr a0, vlenb -; CHECK-RV32-FV-NEXT: mv a1, a0 -; CHECK-RV32-FV-NEXT: slli a0, a0, 1 -; CHECK-RV32-FV-NEXT: add a1, a1, a0 -; CHECK-RV32-FV-NEXT: slli a0, a0, 2 -; CHECK-RV32-FV-NEXT: add a0, a0, a1 -; CHECK-RV32-FV-NEXT: add a0, sp, a0 -; CHECK-RV32-FV-NEXT: addi a0, a0, 16 -; CHECK-RV32-FV-NEXT: vl1r.v v20, (a0) # vscale x 8-byte Folded Reload -; CHECK-RV32-FV-NEXT: csrr a0, vlenb -; CHECK-RV32-FV-NEXT: slli a0, a0, 1 -; CHECK-RV32-FV-NEXT: mv a1, a0 -; CHECK-RV32-FV-NEXT: slli a0, a0, 2 -; CHECK-RV32-FV-NEXT: add a0, a0, a1 -; CHECK-RV32-FV-NEXT: add a0, sp, a0 -; CHECK-RV32-FV-NEXT: addi a0, a0, 16 -; CHECK-RV32-FV-NEXT: vl1r.v v21, (a0) # vscale x 8-byte Folded Reload -; CHECK-RV32-FV-NEXT: csrr a0, vlenb -; CHECK-RV32-FV-NEXT: slli a1, a0, 3 -; CHECK-RV32-FV-NEXT: add a0, a1, a0 -; CHECK-RV32-FV-NEXT: add a0, sp, a0 -; CHECK-RV32-FV-NEXT: addi a0, a0, 16 -; CHECK-RV32-FV-NEXT: vl1r.v v22, (a0) # vscale x 8-byte Folded Reload +; CHECK-RV32-FV-NEXT: vl8r.v v8, (a0) # vscale x 64-byte Folded Reload ; CHECK-RV32-FV-NEXT: csrr a0, vlenb ; CHECK-RV32-FV-NEXT: slli a0, a0, 3 ; CHECK-RV32-FV-NEXT: add a0, sp, a0 ; CHECK-RV32-FV-NEXT: addi a0, a0, 16 -; CHECK-RV32-FV-NEXT: vl1r.v v23, (a0) # vscale x 8-byte Folded Reload -; CHECK-RV32-FV-NEXT: csrr a0, vlenb -; CHECK-RV32-FV-NEXT: slli a1, a0, 3 -; CHECK-RV32-FV-NEXT: sub a0, a1, a0 -; CHECK-RV32-FV-NEXT: add a0, sp, a0 -; CHECK-RV32-FV-NEXT: addi a0, a0, 16 -; CHECK-RV32-FV-NEXT: vl1r.v v24, (a0) # vscale x 8-byte Folded Reload -; CHECK-RV32-FV-NEXT: csrr a0, vlenb -; CHECK-RV32-FV-NEXT: slli a0, a0, 1 -; CHECK-RV32-FV-NEXT: mv a1, a0 -; CHECK-RV32-FV-NEXT: slli a0, a0, 1 -; CHECK-RV32-FV-NEXT: add a0, a0, a1 -; CHECK-RV32-FV-NEXT: add a0, sp, a0 -; CHECK-RV32-FV-NEXT: addi a0, a0, 16 -; CHECK-RV32-FV-NEXT: vl1r.v v25, (a0) # vscale x 8-byte Folded Reload -; CHECK-RV32-FV-NEXT: csrr a0, vlenb -; CHECK-RV32-FV-NEXT: slli a1, a0, 2 -; CHECK-RV32-FV-NEXT: add a0, a1, a0 -; CHECK-RV32-FV-NEXT: add a0, sp, a0 -; CHECK-RV32-FV-NEXT: addi a0, a0, 16 -; CHECK-RV32-FV-NEXT: vl1r.v v26, (a0) # vscale x 8-byte Folded Reload -; CHECK-RV32-FV-NEXT: csrr a0, vlenb -; CHECK-RV32-FV-NEXT: slli a0, a0, 2 -; CHECK-RV32-FV-NEXT: add a0, sp, a0 -; CHECK-RV32-FV-NEXT: addi a0, a0, 16 -; CHECK-RV32-FV-NEXT: vl1r.v v27, (a0) # vscale x 8-byte Folded Reload -; CHECK-RV32-FV-NEXT: csrr a0, vlenb -; CHECK-RV32-FV-NEXT: slli a1, a0, 1 -; CHECK-RV32-FV-NEXT: add a0, a1, a0 -; CHECK-RV32-FV-NEXT: add a0, sp, a0 -; CHECK-RV32-FV-NEXT: addi a0, a0, 16 -; CHECK-RV32-FV-NEXT: vl1r.v v28, (a0) # vscale x 8-byte Folded Reload -; CHECK-RV32-FV-NEXT: csrr a0, vlenb -; CHECK-RV32-FV-NEXT: slli a0, a0, 1 -; CHECK-RV32-FV-NEXT: add a0, sp, a0 -; CHECK-RV32-FV-NEXT: addi a0, a0, 16 -; CHECK-RV32-FV-NEXT: vl1r.v v29, (a0) # vscale x 8-byte Folded Reload -; CHECK-RV32-FV-NEXT: csrr a0, vlenb -; CHECK-RV32-FV-NEXT: add a0, sp, a0 -; CHECK-RV32-FV-NEXT: addi a0, a0, 16 -; CHECK-RV32-FV-NEXT: vl1r.v v30, (a0) # vscale x 8-byte Folded Reload +; CHECK-RV32-FV-NEXT: vl8r.v v16, (a0) # vscale x 64-byte Folded Reload ; CHECK-RV32-FV-NEXT: addi a0, sp, 16 -; CHECK-RV32-FV-NEXT: vl1r.v v31, (a0) # vscale x 8-byte Folded Reload +; CHECK-RV32-FV-NEXT: vl8r.v v24, (a0) # vscale x 64-byte Folded Reload ; CHECK-RV32-FV-NEXT: csrr a0, vlenb ; CHECK-RV32-FV-NEXT: slli a0, a0, 5 ; CHECK-RV32-FV-NEXT: add sp, sp, a0 @@ -1928,498 +1024,46 @@ define void @foo_with_call() #1 { ; CHECK-RV32-FDV-NEXT: slli a0, a0, 5 ; CHECK-RV32-FDV-NEXT: sub sp, sp, a0 ; CHECK-RV32-FDV-NEXT: csrr a0, vlenb -; CHECK-RV32-FDV-NEXT: slli a1, a0, 5 -; CHECK-RV32-FDV-NEXT: sub a0, a1, a0 -; CHECK-RV32-FDV-NEXT: add a0, sp, a0 -; CHECK-RV32-FDV-NEXT: addi a0, a0, 16 -; CHECK-RV32-FDV-NEXT: vs1r.v v0, (a0) # vscale x 8-byte Folded Spill -; CHECK-RV32-FDV-NEXT: csrr a0, vlenb -; CHECK-RV32-FDV-NEXT: slli a0, a0, 1 -; CHECK-RV32-FDV-NEXT: mv a1, a0 -; CHECK-RV32-FDV-NEXT: slli a0, a0, 1 -; CHECK-RV32-FDV-NEXT: add a1, a1, a0 -; CHECK-RV32-FDV-NEXT: slli a0, a0, 1 -; CHECK-RV32-FDV-NEXT: add a1, a1, a0 -; CHECK-RV32-FDV-NEXT: slli a0, a0, 1 -; CHECK-RV32-FDV-NEXT: add a0, a0, a1 -; CHECK-RV32-FDV-NEXT: add a0, sp, a0 -; CHECK-RV32-FDV-NEXT: addi a0, a0, 16 -; CHECK-RV32-FDV-NEXT: vs1r.v v1, (a0) # vscale x 8-byte Folded Spill -; CHECK-RV32-FDV-NEXT: csrr a0, vlenb -; CHECK-RV32-FDV-NEXT: mv a1, a0 -; CHECK-RV32-FDV-NEXT: slli a0, a0, 2 -; CHECK-RV32-FDV-NEXT: add a1, a1, a0 -; CHECK-RV32-FDV-NEXT: slli a0, a0, 1 -; CHECK-RV32-FDV-NEXT: add a1, a1, a0 -; CHECK-RV32-FDV-NEXT: slli a0, a0, 1 -; CHECK-RV32-FDV-NEXT: add a0, a0, a1 -; CHECK-RV32-FDV-NEXT: add a0, sp, a0 -; CHECK-RV32-FDV-NEXT: addi a0, a0, 16 -; CHECK-RV32-FDV-NEXT: vs1r.v v2, (a0) # vscale x 8-byte Folded Spill -; CHECK-RV32-FDV-NEXT: csrr a0, vlenb -; CHECK-RV32-FDV-NEXT: slli a0, a0, 2 -; CHECK-RV32-FDV-NEXT: mv a1, a0 -; CHECK-RV32-FDV-NEXT: slli a0, a0, 1 -; CHECK-RV32-FDV-NEXT: add a1, a1, a0 -; CHECK-RV32-FDV-NEXT: slli a0, a0, 1 -; CHECK-RV32-FDV-NEXT: add a0, a0, a1 -; CHECK-RV32-FDV-NEXT: add a0, sp, a0 -; CHECK-RV32-FDV-NEXT: addi a0, a0, 16 -; CHECK-RV32-FDV-NEXT: vs1r.v v3, (a0) # vscale x 8-byte Folded Spill -; CHECK-RV32-FDV-NEXT: csrr a0, vlenb -; CHECK-RV32-FDV-NEXT: mv a1, a0 -; CHECK-RV32-FDV-NEXT: slli a0, a0, 1 -; CHECK-RV32-FDV-NEXT: add a1, a1, a0 -; CHECK-RV32-FDV-NEXT: slli a0, a0, 2 -; CHECK-RV32-FDV-NEXT: add a1, a1, a0 -; CHECK-RV32-FDV-NEXT: slli a0, a0, 1 -; CHECK-RV32-FDV-NEXT: add a0, a0, a1 -; CHECK-RV32-FDV-NEXT: add a0, sp, a0 -; CHECK-RV32-FDV-NEXT: addi a0, a0, 16 -; CHECK-RV32-FDV-NEXT: vs1r.v v4, (a0) # vscale x 8-byte Folded Spill -; CHECK-RV32-FDV-NEXT: csrr a0, vlenb -; CHECK-RV32-FDV-NEXT: slli a0, a0, 1 -; CHECK-RV32-FDV-NEXT: mv a1, a0 -; CHECK-RV32-FDV-NEXT: slli a0, a0, 2 -; CHECK-RV32-FDV-NEXT: add a1, a1, a0 -; CHECK-RV32-FDV-NEXT: slli a0, a0, 1 -; CHECK-RV32-FDV-NEXT: add a0, a0, a1 -; CHECK-RV32-FDV-NEXT: add a0, sp, a0 -; CHECK-RV32-FDV-NEXT: addi a0, a0, 16 -; CHECK-RV32-FDV-NEXT: vs1r.v v5, (a0) # vscale x 8-byte Folded Spill -; CHECK-RV32-FDV-NEXT: csrr a0, vlenb -; CHECK-RV32-FDV-NEXT: mv a1, a0 ; CHECK-RV32-FDV-NEXT: slli a0, a0, 3 -; CHECK-RV32-FDV-NEXT: add a1, a1, a0 -; CHECK-RV32-FDV-NEXT: slli a0, a0, 1 -; CHECK-RV32-FDV-NEXT: add a0, a0, a1 -; CHECK-RV32-FDV-NEXT: add a0, sp, a0 -; CHECK-RV32-FDV-NEXT: addi a0, a0, 16 -; CHECK-RV32-FDV-NEXT: vs1r.v v6, (a0) # vscale x 8-byte Folded Spill -; CHECK-RV32-FDV-NEXT: csrr a0, vlenb -; CHECK-RV32-FDV-NEXT: slli a0, a0, 3 -; CHECK-RV32-FDV-NEXT: mv a1, a0 -; CHECK-RV32-FDV-NEXT: slli a0, a0, 1 -; CHECK-RV32-FDV-NEXT: add a0, a0, a1 -; CHECK-RV32-FDV-NEXT: add a0, sp, a0 -; CHECK-RV32-FDV-NEXT: addi a0, a0, 16 -; CHECK-RV32-FDV-NEXT: vs1r.v v7, (a0) # vscale x 8-byte Folded Spill -; CHECK-RV32-FDV-NEXT: csrr a0, vlenb -; CHECK-RV32-FDV-NEXT: mv a1, a0 -; CHECK-RV32-FDV-NEXT: slli a0, a0, 1 -; CHECK-RV32-FDV-NEXT: add a1, a1, a0 -; CHECK-RV32-FDV-NEXT: slli a0, a0, 1 -; CHECK-RV32-FDV-NEXT: add a1, a1, a0 -; CHECK-RV32-FDV-NEXT: slli a0, a0, 2 -; CHECK-RV32-FDV-NEXT: add a0, a0, a1 -; CHECK-RV32-FDV-NEXT: add a0, sp, a0 -; CHECK-RV32-FDV-NEXT: addi a0, a0, 16 -; CHECK-RV32-FDV-NEXT: vs1r.v v8, (a0) # vscale x 8-byte Folded Spill -; CHECK-RV32-FDV-NEXT: csrr a0, vlenb -; CHECK-RV32-FDV-NEXT: slli a0, a0, 1 -; CHECK-RV32-FDV-NEXT: mv a1, a0 -; CHECK-RV32-FDV-NEXT: slli a0, a0, 1 -; CHECK-RV32-FDV-NEXT: add a1, a1, a0 -; CHECK-RV32-FDV-NEXT: slli a0, a0, 2 -; CHECK-RV32-FDV-NEXT: add a0, a0, a1 -; CHECK-RV32-FDV-NEXT: add a0, sp, a0 -; CHECK-RV32-FDV-NEXT: addi a0, a0, 16 -; CHECK-RV32-FDV-NEXT: vs1r.v v9, (a0) # vscale x 8-byte Folded Spill -; CHECK-RV32-FDV-NEXT: csrr a0, vlenb -; CHECK-RV32-FDV-NEXT: mv a1, a0 -; CHECK-RV32-FDV-NEXT: slli a0, a0, 2 -; CHECK-RV32-FDV-NEXT: add a1, a1, a0 -; CHECK-RV32-FDV-NEXT: slli a0, a0, 2 -; CHECK-RV32-FDV-NEXT: add a0, a0, a1 -; CHECK-RV32-FDV-NEXT: add a0, sp, a0 -; CHECK-RV32-FDV-NEXT: addi a0, a0, 16 -; CHECK-RV32-FDV-NEXT: vs1r.v v10, (a0) # vscale x 8-byte Folded Spill -; CHECK-RV32-FDV-NEXT: csrr a0, vlenb -; CHECK-RV32-FDV-NEXT: slli a0, a0, 2 -; CHECK-RV32-FDV-NEXT: mv a1, a0 -; CHECK-RV32-FDV-NEXT: slli a0, a0, 2 -; CHECK-RV32-FDV-NEXT: add a0, a0, a1 -; CHECK-RV32-FDV-NEXT: add a0, sp, a0 -; CHECK-RV32-FDV-NEXT: addi a0, a0, 16 -; CHECK-RV32-FDV-NEXT: vs1r.v v11, (a0) # vscale x 8-byte Folded Spill -; CHECK-RV32-FDV-NEXT: csrr a0, vlenb ; CHECK-RV32-FDV-NEXT: mv a1, a0 ; CHECK-RV32-FDV-NEXT: slli a0, a0, 1 -; CHECK-RV32-FDV-NEXT: add a1, a1, a0 -; CHECK-RV32-FDV-NEXT: slli a0, a0, 3 ; CHECK-RV32-FDV-NEXT: add a0, a0, a1 ; CHECK-RV32-FDV-NEXT: add a0, sp, a0 ; CHECK-RV32-FDV-NEXT: addi a0, a0, 16 -; CHECK-RV32-FDV-NEXT: vs1r.v v12, (a0) # vscale x 8-byte Folded Spill -; CHECK-RV32-FDV-NEXT: csrr a0, vlenb -; CHECK-RV32-FDV-NEXT: slli a0, a0, 1 -; CHECK-RV32-FDV-NEXT: mv a1, a0 -; CHECK-RV32-FDV-NEXT: slli a0, a0, 3 -; CHECK-RV32-FDV-NEXT: add a0, a0, a1 -; CHECK-RV32-FDV-NEXT: add a0, sp, a0 -; CHECK-RV32-FDV-NEXT: addi a0, a0, 16 -; CHECK-RV32-FDV-NEXT: vs1r.v v13, (a0) # vscale x 8-byte Folded Spill -; CHECK-RV32-FDV-NEXT: csrr a0, vlenb -; CHECK-RV32-FDV-NEXT: slli a1, a0, 4 -; CHECK-RV32-FDV-NEXT: add a0, a1, a0 -; CHECK-RV32-FDV-NEXT: add a0, sp, a0 -; CHECK-RV32-FDV-NEXT: addi a0, a0, 16 -; CHECK-RV32-FDV-NEXT: vs1r.v v14, (a0) # vscale x 8-byte Folded Spill +; CHECK-RV32-FDV-NEXT: vs8r.v v0, (a0) # vscale x 64-byte Folded Spill ; CHECK-RV32-FDV-NEXT: csrr a0, vlenb ; CHECK-RV32-FDV-NEXT: slli a0, a0, 4 ; CHECK-RV32-FDV-NEXT: add a0, sp, a0 ; CHECK-RV32-FDV-NEXT: addi a0, a0, 16 -; CHECK-RV32-FDV-NEXT: vs1r.v v15, (a0) # vscale x 8-byte Folded Spill -; CHECK-RV32-FDV-NEXT: csrr a0, vlenb -; CHECK-RV32-FDV-NEXT: slli a1, a0, 4 -; CHECK-RV32-FDV-NEXT: sub a0, a1, a0 -; CHECK-RV32-FDV-NEXT: add a0, sp, a0 -; CHECK-RV32-FDV-NEXT: addi a0, a0, 16 -; CHECK-RV32-FDV-NEXT: vs1r.v v16, (a0) # vscale x 8-byte Folded Spill -; CHECK-RV32-FDV-NEXT: csrr a0, vlenb -; CHECK-RV32-FDV-NEXT: slli a0, a0, 1 -; CHECK-RV32-FDV-NEXT: mv a1, a0 -; CHECK-RV32-FDV-NEXT: slli a0, a0, 1 -; CHECK-RV32-FDV-NEXT: add a1, a1, a0 -; CHECK-RV32-FDV-NEXT: slli a0, a0, 1 -; CHECK-RV32-FDV-NEXT: add a0, a0, a1 -; CHECK-RV32-FDV-NEXT: add a0, sp, a0 -; CHECK-RV32-FDV-NEXT: addi a0, a0, 16 -; CHECK-RV32-FDV-NEXT: vs1r.v v17, (a0) # vscale x 8-byte Folded Spill -; CHECK-RV32-FDV-NEXT: csrr a0, vlenb -; CHECK-RV32-FDV-NEXT: mv a1, a0 -; CHECK-RV32-FDV-NEXT: slli a0, a0, 2 -; CHECK-RV32-FDV-NEXT: add a1, a1, a0 -; CHECK-RV32-FDV-NEXT: slli a0, a0, 1 -; CHECK-RV32-FDV-NEXT: add a0, a0, a1 -; CHECK-RV32-FDV-NEXT: add a0, sp, a0 -; CHECK-RV32-FDV-NEXT: addi a0, a0, 16 -; CHECK-RV32-FDV-NEXT: vs1r.v v18, (a0) # vscale x 8-byte Folded Spill -; CHECK-RV32-FDV-NEXT: csrr a0, vlenb -; CHECK-RV32-FDV-NEXT: slli a0, a0, 2 -; CHECK-RV32-FDV-NEXT: mv a1, a0 -; CHECK-RV32-FDV-NEXT: slli a0, a0, 1 -; CHECK-RV32-FDV-NEXT: add a0, a0, a1 -; CHECK-RV32-FDV-NEXT: add a0, sp, a0 -; CHECK-RV32-FDV-NEXT: addi a0, a0, 16 -; CHECK-RV32-FDV-NEXT: vs1r.v v19, (a0) # vscale x 8-byte Folded Spill -; CHECK-RV32-FDV-NEXT: csrr a0, vlenb -; CHECK-RV32-FDV-NEXT: mv a1, a0 -; CHECK-RV32-FDV-NEXT: slli a0, a0, 1 -; CHECK-RV32-FDV-NEXT: add a1, a1, a0 -; CHECK-RV32-FDV-NEXT: slli a0, a0, 2 -; CHECK-RV32-FDV-NEXT: add a0, a0, a1 -; CHECK-RV32-FDV-NEXT: add a0, sp, a0 -; CHECK-RV32-FDV-NEXT: addi a0, a0, 16 -; CHECK-RV32-FDV-NEXT: vs1r.v v20, (a0) # vscale x 8-byte Folded Spill -; CHECK-RV32-FDV-NEXT: csrr a0, vlenb -; CHECK-RV32-FDV-NEXT: slli a0, a0, 1 -; CHECK-RV32-FDV-NEXT: mv a1, a0 -; CHECK-RV32-FDV-NEXT: slli a0, a0, 2 -; CHECK-RV32-FDV-NEXT: add a0, a0, a1 -; CHECK-RV32-FDV-NEXT: add a0, sp, a0 -; CHECK-RV32-FDV-NEXT: addi a0, a0, 16 -; CHECK-RV32-FDV-NEXT: vs1r.v v21, (a0) # vscale x 8-byte Folded Spill -; CHECK-RV32-FDV-NEXT: csrr a0, vlenb -; CHECK-RV32-FDV-NEXT: slli a1, a0, 3 -; CHECK-RV32-FDV-NEXT: add a0, a1, a0 -; CHECK-RV32-FDV-NEXT: add a0, sp, a0 -; CHECK-RV32-FDV-NEXT: addi a0, a0, 16 -; CHECK-RV32-FDV-NEXT: vs1r.v v22, (a0) # vscale x 8-byte Folded Spill +; CHECK-RV32-FDV-NEXT: vs8r.v v8, (a0) # vscale x 64-byte Folded Spill ; CHECK-RV32-FDV-NEXT: csrr a0, vlenb ; CHECK-RV32-FDV-NEXT: slli a0, a0, 3 ; CHECK-RV32-FDV-NEXT: add a0, sp, a0 ; CHECK-RV32-FDV-NEXT: addi a0, a0, 16 -; CHECK-RV32-FDV-NEXT: vs1r.v v23, (a0) # vscale x 8-byte Folded Spill -; CHECK-RV32-FDV-NEXT: csrr a0, vlenb -; CHECK-RV32-FDV-NEXT: slli a1, a0, 3 -; CHECK-RV32-FDV-NEXT: sub a0, a1, a0 -; CHECK-RV32-FDV-NEXT: add a0, sp, a0 -; CHECK-RV32-FDV-NEXT: addi a0, a0, 16 -; CHECK-RV32-FDV-NEXT: vs1r.v v24, (a0) # vscale x 8-byte Folded Spill -; CHECK-RV32-FDV-NEXT: csrr a0, vlenb -; CHECK-RV32-FDV-NEXT: slli a0, a0, 1 -; CHECK-RV32-FDV-NEXT: mv a1, a0 -; CHECK-RV32-FDV-NEXT: slli a0, a0, 1 -; CHECK-RV32-FDV-NEXT: add a0, a0, a1 -; CHECK-RV32-FDV-NEXT: add a0, sp, a0 -; CHECK-RV32-FDV-NEXT: addi a0, a0, 16 -; CHECK-RV32-FDV-NEXT: vs1r.v v25, (a0) # vscale x 8-byte Folded Spill -; CHECK-RV32-FDV-NEXT: csrr a0, vlenb -; CHECK-RV32-FDV-NEXT: slli a1, a0, 2 -; CHECK-RV32-FDV-NEXT: add a0, a1, a0 -; CHECK-RV32-FDV-NEXT: add a0, sp, a0 -; CHECK-RV32-FDV-NEXT: addi a0, a0, 16 -; CHECK-RV32-FDV-NEXT: vs1r.v v26, (a0) # vscale x 8-byte Folded Spill -; CHECK-RV32-FDV-NEXT: csrr a0, vlenb -; CHECK-RV32-FDV-NEXT: slli a0, a0, 2 -; CHECK-RV32-FDV-NEXT: add a0, sp, a0 -; CHECK-RV32-FDV-NEXT: addi a0, a0, 16 -; CHECK-RV32-FDV-NEXT: vs1r.v v27, (a0) # vscale x 8-byte Folded Spill -; CHECK-RV32-FDV-NEXT: csrr a0, vlenb -; CHECK-RV32-FDV-NEXT: slli a1, a0, 1 -; CHECK-RV32-FDV-NEXT: add a0, a1, a0 -; CHECK-RV32-FDV-NEXT: add a0, sp, a0 -; CHECK-RV32-FDV-NEXT: addi a0, a0, 16 -; CHECK-RV32-FDV-NEXT: vs1r.v v28, (a0) # vscale x 8-byte Folded Spill -; CHECK-RV32-FDV-NEXT: csrr a0, vlenb -; CHECK-RV32-FDV-NEXT: slli a0, a0, 1 -; CHECK-RV32-FDV-NEXT: add a0, sp, a0 -; CHECK-RV32-FDV-NEXT: addi a0, a0, 16 -; CHECK-RV32-FDV-NEXT: vs1r.v v29, (a0) # vscale x 8-byte Folded Spill -; CHECK-RV32-FDV-NEXT: csrr a0, vlenb -; CHECK-RV32-FDV-NEXT: add a0, sp, a0 -; CHECK-RV32-FDV-NEXT: addi a0, a0, 16 -; CHECK-RV32-FDV-NEXT: vs1r.v v30, (a0) # vscale x 8-byte Folded Spill +; CHECK-RV32-FDV-NEXT: vs8r.v v16, (a0) # vscale x 64-byte Folded Spill ; CHECK-RV32-FDV-NEXT: addi a0, sp, 16 -; CHECK-RV32-FDV-NEXT: vs1r.v v31, (a0) # vscale x 8-byte Folded Spill +; CHECK-RV32-FDV-NEXT: vs8r.v v24, (a0) # vscale x 64-byte Folded Spill ; CHECK-RV32-FDV-NEXT: call otherfoo ; CHECK-RV32-FDV-NEXT: csrr a0, vlenb -; CHECK-RV32-FDV-NEXT: slli a1, a0, 5 -; CHECK-RV32-FDV-NEXT: sub a0, a1, a0 -; CHECK-RV32-FDV-NEXT: add a0, sp, a0 -; CHECK-RV32-FDV-NEXT: addi a0, a0, 16 -; CHECK-RV32-FDV-NEXT: vl1r.v v0, (a0) # vscale x 8-byte Folded Reload -; CHECK-RV32-FDV-NEXT: csrr a0, vlenb -; CHECK-RV32-FDV-NEXT: slli a0, a0, 1 -; CHECK-RV32-FDV-NEXT: mv a1, a0 -; CHECK-RV32-FDV-NEXT: slli a0, a0, 1 -; CHECK-RV32-FDV-NEXT: add a1, a1, a0 -; CHECK-RV32-FDV-NEXT: slli a0, a0, 1 -; CHECK-RV32-FDV-NEXT: add a1, a1, a0 -; CHECK-RV32-FDV-NEXT: slli a0, a0, 1 -; CHECK-RV32-FDV-NEXT: add a0, a0, a1 -; CHECK-RV32-FDV-NEXT: add a0, sp, a0 -; CHECK-RV32-FDV-NEXT: addi a0, a0, 16 -; CHECK-RV32-FDV-NEXT: vl1r.v v1, (a0) # vscale x 8-byte Folded Reload -; CHECK-RV32-FDV-NEXT: csrr a0, vlenb -; CHECK-RV32-FDV-NEXT: mv a1, a0 -; CHECK-RV32-FDV-NEXT: slli a0, a0, 2 -; CHECK-RV32-FDV-NEXT: add a1, a1, a0 -; CHECK-RV32-FDV-NEXT: slli a0, a0, 1 -; CHECK-RV32-FDV-NEXT: add a1, a1, a0 -; CHECK-RV32-FDV-NEXT: slli a0, a0, 1 -; CHECK-RV32-FDV-NEXT: add a0, a0, a1 -; CHECK-RV32-FDV-NEXT: add a0, sp, a0 -; CHECK-RV32-FDV-NEXT: addi a0, a0, 16 -; CHECK-RV32-FDV-NEXT: vl1r.v v2, (a0) # vscale x 8-byte Folded Reload -; CHECK-RV32-FDV-NEXT: csrr a0, vlenb -; CHECK-RV32-FDV-NEXT: slli a0, a0, 2 -; CHECK-RV32-FDV-NEXT: mv a1, a0 -; CHECK-RV32-FDV-NEXT: slli a0, a0, 1 -; CHECK-RV32-FDV-NEXT: add a1, a1, a0 -; CHECK-RV32-FDV-NEXT: slli a0, a0, 1 -; CHECK-RV32-FDV-NEXT: add a0, a0, a1 -; CHECK-RV32-FDV-NEXT: add a0, sp, a0 -; CHECK-RV32-FDV-NEXT: addi a0, a0, 16 -; CHECK-RV32-FDV-NEXT: vl1r.v v3, (a0) # vscale x 8-byte Folded Reload -; CHECK-RV32-FDV-NEXT: csrr a0, vlenb -; CHECK-RV32-FDV-NEXT: mv a1, a0 -; CHECK-RV32-FDV-NEXT: slli a0, a0, 1 -; CHECK-RV32-FDV-NEXT: add a1, a1, a0 -; CHECK-RV32-FDV-NEXT: slli a0, a0, 2 -; CHECK-RV32-FDV-NEXT: add a1, a1, a0 -; CHECK-RV32-FDV-NEXT: slli a0, a0, 1 -; CHECK-RV32-FDV-NEXT: add a0, a0, a1 -; CHECK-RV32-FDV-NEXT: add a0, sp, a0 -; CHECK-RV32-FDV-NEXT: addi a0, a0, 16 -; CHECK-RV32-FDV-NEXT: vl1r.v v4, (a0) # vscale x 8-byte Folded Reload -; CHECK-RV32-FDV-NEXT: csrr a0, vlenb -; CHECK-RV32-FDV-NEXT: slli a0, a0, 1 -; CHECK-RV32-FDV-NEXT: mv a1, a0 -; CHECK-RV32-FDV-NEXT: slli a0, a0, 2 -; CHECK-RV32-FDV-NEXT: add a1, a1, a0 -; CHECK-RV32-FDV-NEXT: slli a0, a0, 1 -; CHECK-RV32-FDV-NEXT: add a0, a0, a1 -; CHECK-RV32-FDV-NEXT: add a0, sp, a0 -; CHECK-RV32-FDV-NEXT: addi a0, a0, 16 -; CHECK-RV32-FDV-NEXT: vl1r.v v5, (a0) # vscale x 8-byte Folded Reload -; CHECK-RV32-FDV-NEXT: csrr a0, vlenb -; CHECK-RV32-FDV-NEXT: mv a1, a0 -; CHECK-RV32-FDV-NEXT: slli a0, a0, 3 -; CHECK-RV32-FDV-NEXT: add a1, a1, a0 -; CHECK-RV32-FDV-NEXT: slli a0, a0, 1 -; CHECK-RV32-FDV-NEXT: add a0, a0, a1 -; CHECK-RV32-FDV-NEXT: add a0, sp, a0 -; CHECK-RV32-FDV-NEXT: addi a0, a0, 16 -; CHECK-RV32-FDV-NEXT: vl1r.v v6, (a0) # vscale x 8-byte Folded Reload -; CHECK-RV32-FDV-NEXT: csrr a0, vlenb ; CHECK-RV32-FDV-NEXT: slli a0, a0, 3 ; CHECK-RV32-FDV-NEXT: mv a1, a0 ; CHECK-RV32-FDV-NEXT: slli a0, a0, 1 ; CHECK-RV32-FDV-NEXT: add a0, a0, a1 ; CHECK-RV32-FDV-NEXT: add a0, sp, a0 ; CHECK-RV32-FDV-NEXT: addi a0, a0, 16 -; CHECK-RV32-FDV-NEXT: vl1r.v v7, (a0) # vscale x 8-byte Folded Reload -; CHECK-RV32-FDV-NEXT: csrr a0, vlenb -; CHECK-RV32-FDV-NEXT: mv a1, a0 -; CHECK-RV32-FDV-NEXT: slli a0, a0, 1 -; CHECK-RV32-FDV-NEXT: add a1, a1, a0 -; CHECK-RV32-FDV-NEXT: slli a0, a0, 1 -; CHECK-RV32-FDV-NEXT: add a1, a1, a0 -; CHECK-RV32-FDV-NEXT: slli a0, a0, 2 -; CHECK-RV32-FDV-NEXT: add a0, a0, a1 -; CHECK-RV32-FDV-NEXT: add a0, sp, a0 -; CHECK-RV32-FDV-NEXT: addi a0, a0, 16 -; CHECK-RV32-FDV-NEXT: vl1r.v v8, (a0) # vscale x 8-byte Folded Reload -; CHECK-RV32-FDV-NEXT: csrr a0, vlenb -; CHECK-RV32-FDV-NEXT: slli a0, a0, 1 -; CHECK-RV32-FDV-NEXT: mv a1, a0 -; CHECK-RV32-FDV-NEXT: slli a0, a0, 1 -; CHECK-RV32-FDV-NEXT: add a1, a1, a0 -; CHECK-RV32-FDV-NEXT: slli a0, a0, 2 -; CHECK-RV32-FDV-NEXT: add a0, a0, a1 -; CHECK-RV32-FDV-NEXT: add a0, sp, a0 -; CHECK-RV32-FDV-NEXT: addi a0, a0, 16 -; CHECK-RV32-FDV-NEXT: vl1r.v v9, (a0) # vscale x 8-byte Folded Reload -; CHECK-RV32-FDV-NEXT: csrr a0, vlenb -; CHECK-RV32-FDV-NEXT: mv a1, a0 -; CHECK-RV32-FDV-NEXT: slli a0, a0, 2 -; CHECK-RV32-FDV-NEXT: add a1, a1, a0 -; CHECK-RV32-FDV-NEXT: slli a0, a0, 2 -; CHECK-RV32-FDV-NEXT: add a0, a0, a1 -; CHECK-RV32-FDV-NEXT: add a0, sp, a0 -; CHECK-RV32-FDV-NEXT: addi a0, a0, 16 -; CHECK-RV32-FDV-NEXT: vl1r.v v10, (a0) # vscale x 8-byte Folded Reload -; CHECK-RV32-FDV-NEXT: csrr a0, vlenb -; CHECK-RV32-FDV-NEXT: slli a0, a0, 2 -; CHECK-RV32-FDV-NEXT: mv a1, a0 -; CHECK-RV32-FDV-NEXT: slli a0, a0, 2 -; CHECK-RV32-FDV-NEXT: add a0, a0, a1 -; CHECK-RV32-FDV-NEXT: add a0, sp, a0 -; CHECK-RV32-FDV-NEXT: addi a0, a0, 16 -; CHECK-RV32-FDV-NEXT: vl1r.v v11, (a0) # vscale x 8-byte Folded Reload -; CHECK-RV32-FDV-NEXT: csrr a0, vlenb -; CHECK-RV32-FDV-NEXT: mv a1, a0 -; CHECK-RV32-FDV-NEXT: slli a0, a0, 1 -; CHECK-RV32-FDV-NEXT: add a1, a1, a0 -; CHECK-RV32-FDV-NEXT: slli a0, a0, 3 -; CHECK-RV32-FDV-NEXT: add a0, a0, a1 -; CHECK-RV32-FDV-NEXT: add a0, sp, a0 -; CHECK-RV32-FDV-NEXT: addi a0, a0, 16 -; CHECK-RV32-FDV-NEXT: vl1r.v v12, (a0) # vscale x 8-byte Folded Reload -; CHECK-RV32-FDV-NEXT: csrr a0, vlenb -; CHECK-RV32-FDV-NEXT: slli a0, a0, 1 -; CHECK-RV32-FDV-NEXT: mv a1, a0 -; CHECK-RV32-FDV-NEXT: slli a0, a0, 3 -; CHECK-RV32-FDV-NEXT: add a0, a0, a1 -; CHECK-RV32-FDV-NEXT: add a0, sp, a0 -; CHECK-RV32-FDV-NEXT: addi a0, a0, 16 -; CHECK-RV32-FDV-NEXT: vl1r.v v13, (a0) # vscale x 8-byte Folded Reload -; CHECK-RV32-FDV-NEXT: csrr a0, vlenb -; CHECK-RV32-FDV-NEXT: slli a1, a0, 4 -; CHECK-RV32-FDV-NEXT: add a0, a1, a0 -; CHECK-RV32-FDV-NEXT: add a0, sp, a0 -; CHECK-RV32-FDV-NEXT: addi a0, a0, 16 -; CHECK-RV32-FDV-NEXT: vl1r.v v14, (a0) # vscale x 8-byte Folded Reload +; CHECK-RV32-FDV-NEXT: vl8r.v v0, (a0) # vscale x 64-byte Folded Reload ; CHECK-RV32-FDV-NEXT: csrr a0, vlenb ; CHECK-RV32-FDV-NEXT: slli a0, a0, 4 ; CHECK-RV32-FDV-NEXT: add a0, sp, a0 ; CHECK-RV32-FDV-NEXT: addi a0, a0, 16 -; CHECK-RV32-FDV-NEXT: vl1r.v v15, (a0) # vscale x 8-byte Folded Reload -; CHECK-RV32-FDV-NEXT: csrr a0, vlenb -; CHECK-RV32-FDV-NEXT: slli a1, a0, 4 -; CHECK-RV32-FDV-NEXT: sub a0, a1, a0 -; CHECK-RV32-FDV-NEXT: add a0, sp, a0 -; CHECK-RV32-FDV-NEXT: addi a0, a0, 16 -; CHECK-RV32-FDV-NEXT: vl1r.v v16, (a0) # vscale x 8-byte Folded Reload -; CHECK-RV32-FDV-NEXT: csrr a0, vlenb -; CHECK-RV32-FDV-NEXT: slli a0, a0, 1 -; CHECK-RV32-FDV-NEXT: mv a1, a0 -; CHECK-RV32-FDV-NEXT: slli a0, a0, 1 -; CHECK-RV32-FDV-NEXT: add a1, a1, a0 -; CHECK-RV32-FDV-NEXT: slli a0, a0, 1 -; CHECK-RV32-FDV-NEXT: add a0, a0, a1 -; CHECK-RV32-FDV-NEXT: add a0, sp, a0 -; CHECK-RV32-FDV-NEXT: addi a0, a0, 16 -; CHECK-RV32-FDV-NEXT: vl1r.v v17, (a0) # vscale x 8-byte Folded Reload -; CHECK-RV32-FDV-NEXT: csrr a0, vlenb -; CHECK-RV32-FDV-NEXT: mv a1, a0 -; CHECK-RV32-FDV-NEXT: slli a0, a0, 2 -; CHECK-RV32-FDV-NEXT: add a1, a1, a0 -; CHECK-RV32-FDV-NEXT: slli a0, a0, 1 -; CHECK-RV32-FDV-NEXT: add a0, a0, a1 -; CHECK-RV32-FDV-NEXT: add a0, sp, a0 -; CHECK-RV32-FDV-NEXT: addi a0, a0, 16 -; CHECK-RV32-FDV-NEXT: vl1r.v v18, (a0) # vscale x 8-byte Folded Reload -; CHECK-RV32-FDV-NEXT: csrr a0, vlenb -; CHECK-RV32-FDV-NEXT: slli a0, a0, 2 -; CHECK-RV32-FDV-NEXT: mv a1, a0 -; CHECK-RV32-FDV-NEXT: slli a0, a0, 1 -; CHECK-RV32-FDV-NEXT: add a0, a0, a1 -; CHECK-RV32-FDV-NEXT: add a0, sp, a0 -; CHECK-RV32-FDV-NEXT: addi a0, a0, 16 -; CHECK-RV32-FDV-NEXT: vl1r.v v19, (a0) # vscale x 8-byte Folded Reload -; CHECK-RV32-FDV-NEXT: csrr a0, vlenb -; CHECK-RV32-FDV-NEXT: mv a1, a0 -; CHECK-RV32-FDV-NEXT: slli a0, a0, 1 -; CHECK-RV32-FDV-NEXT: add a1, a1, a0 -; CHECK-RV32-FDV-NEXT: slli a0, a0, 2 -; CHECK-RV32-FDV-NEXT: add a0, a0, a1 -; CHECK-RV32-FDV-NEXT: add a0, sp, a0 -; CHECK-RV32-FDV-NEXT: addi a0, a0, 16 -; CHECK-RV32-FDV-NEXT: vl1r.v v20, (a0) # vscale x 8-byte Folded Reload -; CHECK-RV32-FDV-NEXT: csrr a0, vlenb -; CHECK-RV32-FDV-NEXT: slli a0, a0, 1 -; CHECK-RV32-FDV-NEXT: mv a1, a0 -; CHECK-RV32-FDV-NEXT: slli a0, a0, 2 -; CHECK-RV32-FDV-NEXT: add a0, a0, a1 -; CHECK-RV32-FDV-NEXT: add a0, sp, a0 -; CHECK-RV32-FDV-NEXT: addi a0, a0, 16 -; CHECK-RV32-FDV-NEXT: vl1r.v v21, (a0) # vscale x 8-byte Folded Reload -; CHECK-RV32-FDV-NEXT: csrr a0, vlenb -; CHECK-RV32-FDV-NEXT: slli a1, a0, 3 -; CHECK-RV32-FDV-NEXT: add a0, a1, a0 -; CHECK-RV32-FDV-NEXT: add a0, sp, a0 -; CHECK-RV32-FDV-NEXT: addi a0, a0, 16 -; CHECK-RV32-FDV-NEXT: vl1r.v v22, (a0) # vscale x 8-byte Folded Reload +; CHECK-RV32-FDV-NEXT: vl8r.v v8, (a0) # vscale x 64-byte Folded Reload ; CHECK-RV32-FDV-NEXT: csrr a0, vlenb ; CHECK-RV32-FDV-NEXT: slli a0, a0, 3 ; CHECK-RV32-FDV-NEXT: add a0, sp, a0 ; CHECK-RV32-FDV-NEXT: addi a0, a0, 16 -; CHECK-RV32-FDV-NEXT: vl1r.v v23, (a0) # vscale x 8-byte Folded Reload -; CHECK-RV32-FDV-NEXT: csrr a0, vlenb -; CHECK-RV32-FDV-NEXT: slli a1, a0, 3 -; CHECK-RV32-FDV-NEXT: sub a0, a1, a0 -; CHECK-RV32-FDV-NEXT: add a0, sp, a0 -; CHECK-RV32-FDV-NEXT: addi a0, a0, 16 -; CHECK-RV32-FDV-NEXT: vl1r.v v24, (a0) # vscale x 8-byte Folded Reload -; CHECK-RV32-FDV-NEXT: csrr a0, vlenb -; CHECK-RV32-FDV-NEXT: slli a0, a0, 1 -; CHECK-RV32-FDV-NEXT: mv a1, a0 -; CHECK-RV32-FDV-NEXT: slli a0, a0, 1 -; CHECK-RV32-FDV-NEXT: add a0, a0, a1 -; CHECK-RV32-FDV-NEXT: add a0, sp, a0 -; CHECK-RV32-FDV-NEXT: addi a0, a0, 16 -; CHECK-RV32-FDV-NEXT: vl1r.v v25, (a0) # vscale x 8-byte Folded Reload -; CHECK-RV32-FDV-NEXT: csrr a0, vlenb -; CHECK-RV32-FDV-NEXT: slli a1, a0, 2 -; CHECK-RV32-FDV-NEXT: add a0, a1, a0 -; CHECK-RV32-FDV-NEXT: add a0, sp, a0 -; CHECK-RV32-FDV-NEXT: addi a0, a0, 16 -; CHECK-RV32-FDV-NEXT: vl1r.v v26, (a0) # vscale x 8-byte Folded Reload -; CHECK-RV32-FDV-NEXT: csrr a0, vlenb -; CHECK-RV32-FDV-NEXT: slli a0, a0, 2 -; CHECK-RV32-FDV-NEXT: add a0, sp, a0 -; CHECK-RV32-FDV-NEXT: addi a0, a0, 16 -; CHECK-RV32-FDV-NEXT: vl1r.v v27, (a0) # vscale x 8-byte Folded Reload -; CHECK-RV32-FDV-NEXT: csrr a0, vlenb -; CHECK-RV32-FDV-NEXT: slli a1, a0, 1 -; CHECK-RV32-FDV-NEXT: add a0, a1, a0 -; CHECK-RV32-FDV-NEXT: add a0, sp, a0 -; CHECK-RV32-FDV-NEXT: addi a0, a0, 16 -; CHECK-RV32-FDV-NEXT: vl1r.v v28, (a0) # vscale x 8-byte Folded Reload -; CHECK-RV32-FDV-NEXT: csrr a0, vlenb -; CHECK-RV32-FDV-NEXT: slli a0, a0, 1 -; CHECK-RV32-FDV-NEXT: add a0, sp, a0 -; CHECK-RV32-FDV-NEXT: addi a0, a0, 16 -; CHECK-RV32-FDV-NEXT: vl1r.v v29, (a0) # vscale x 8-byte Folded Reload -; CHECK-RV32-FDV-NEXT: csrr a0, vlenb -; CHECK-RV32-FDV-NEXT: add a0, sp, a0 -; CHECK-RV32-FDV-NEXT: addi a0, a0, 16 -; CHECK-RV32-FDV-NEXT: vl1r.v v30, (a0) # vscale x 8-byte Folded Reload +; CHECK-RV32-FDV-NEXT: vl8r.v v16, (a0) # vscale x 64-byte Folded Reload ; CHECK-RV32-FDV-NEXT: addi a0, sp, 16 -; CHECK-RV32-FDV-NEXT: vl1r.v v31, (a0) # vscale x 8-byte Folded Reload +; CHECK-RV32-FDV-NEXT: vl8r.v v24, (a0) # vscale x 64-byte Folded Reload ; CHECK-RV32-FDV-NEXT: csrr a0, vlenb ; CHECK-RV32-FDV-NEXT: slli a0, a0, 5 ; CHECK-RV32-FDV-NEXT: add sp, sp, a0 @@ -3259,498 +1903,46 @@ define void @foo_with_call() #1 { ; CHECK-RV64-V-NEXT: slli a0, a0, 5 ; CHECK-RV64-V-NEXT: sub sp, sp, a0 ; CHECK-RV64-V-NEXT: csrr a0, vlenb -; CHECK-RV64-V-NEXT: slli a1, a0, 5 -; CHECK-RV64-V-NEXT: sub a0, a1, a0 -; CHECK-RV64-V-NEXT: add a0, sp, a0 -; CHECK-RV64-V-NEXT: addi a0, a0, 16 -; CHECK-RV64-V-NEXT: vs1r.v v0, (a0) # vscale x 8-byte Folded Spill -; CHECK-RV64-V-NEXT: csrr a0, vlenb -; CHECK-RV64-V-NEXT: slli a0, a0, 1 -; CHECK-RV64-V-NEXT: mv a1, a0 -; CHECK-RV64-V-NEXT: slli a0, a0, 1 -; CHECK-RV64-V-NEXT: add a1, a1, a0 -; CHECK-RV64-V-NEXT: slli a0, a0, 1 -; CHECK-RV64-V-NEXT: add a1, a1, a0 -; CHECK-RV64-V-NEXT: slli a0, a0, 1 -; CHECK-RV64-V-NEXT: add a0, a0, a1 -; CHECK-RV64-V-NEXT: add a0, sp, a0 -; CHECK-RV64-V-NEXT: addi a0, a0, 16 -; CHECK-RV64-V-NEXT: vs1r.v v1, (a0) # vscale x 8-byte Folded Spill -; CHECK-RV64-V-NEXT: csrr a0, vlenb -; CHECK-RV64-V-NEXT: mv a1, a0 -; CHECK-RV64-V-NEXT: slli a0, a0, 2 -; CHECK-RV64-V-NEXT: add a1, a1, a0 -; CHECK-RV64-V-NEXT: slli a0, a0, 1 -; CHECK-RV64-V-NEXT: add a1, a1, a0 -; CHECK-RV64-V-NEXT: slli a0, a0, 1 -; CHECK-RV64-V-NEXT: add a0, a0, a1 -; CHECK-RV64-V-NEXT: add a0, sp, a0 -; CHECK-RV64-V-NEXT: addi a0, a0, 16 -; CHECK-RV64-V-NEXT: vs1r.v v2, (a0) # vscale x 8-byte Folded Spill -; CHECK-RV64-V-NEXT: csrr a0, vlenb -; CHECK-RV64-V-NEXT: slli a0, a0, 2 -; CHECK-RV64-V-NEXT: mv a1, a0 -; CHECK-RV64-V-NEXT: slli a0, a0, 1 -; CHECK-RV64-V-NEXT: add a1, a1, a0 -; CHECK-RV64-V-NEXT: slli a0, a0, 1 -; CHECK-RV64-V-NEXT: add a0, a0, a1 -; CHECK-RV64-V-NEXT: add a0, sp, a0 -; CHECK-RV64-V-NEXT: addi a0, a0, 16 -; CHECK-RV64-V-NEXT: vs1r.v v3, (a0) # vscale x 8-byte Folded Spill -; CHECK-RV64-V-NEXT: csrr a0, vlenb -; CHECK-RV64-V-NEXT: mv a1, a0 -; CHECK-RV64-V-NEXT: slli a0, a0, 1 -; CHECK-RV64-V-NEXT: add a1, a1, a0 -; CHECK-RV64-V-NEXT: slli a0, a0, 2 -; CHECK-RV64-V-NEXT: add a1, a1, a0 -; CHECK-RV64-V-NEXT: slli a0, a0, 1 -; CHECK-RV64-V-NEXT: add a0, a0, a1 -; CHECK-RV64-V-NEXT: add a0, sp, a0 -; CHECK-RV64-V-NEXT: addi a0, a0, 16 -; CHECK-RV64-V-NEXT: vs1r.v v4, (a0) # vscale x 8-byte Folded Spill -; CHECK-RV64-V-NEXT: csrr a0, vlenb -; CHECK-RV64-V-NEXT: slli a0, a0, 1 -; CHECK-RV64-V-NEXT: mv a1, a0 -; CHECK-RV64-V-NEXT: slli a0, a0, 2 -; CHECK-RV64-V-NEXT: add a1, a1, a0 -; CHECK-RV64-V-NEXT: slli a0, a0, 1 -; CHECK-RV64-V-NEXT: add a0, a0, a1 -; CHECK-RV64-V-NEXT: add a0, sp, a0 -; CHECK-RV64-V-NEXT: addi a0, a0, 16 -; CHECK-RV64-V-NEXT: vs1r.v v5, (a0) # vscale x 8-byte Folded Spill -; CHECK-RV64-V-NEXT: csrr a0, vlenb -; CHECK-RV64-V-NEXT: mv a1, a0 -; CHECK-RV64-V-NEXT: slli a0, a0, 3 -; CHECK-RV64-V-NEXT: add a1, a1, a0 -; CHECK-RV64-V-NEXT: slli a0, a0, 1 -; CHECK-RV64-V-NEXT: add a0, a0, a1 -; CHECK-RV64-V-NEXT: add a0, sp, a0 -; CHECK-RV64-V-NEXT: addi a0, a0, 16 -; CHECK-RV64-V-NEXT: vs1r.v v6, (a0) # vscale x 8-byte Folded Spill -; CHECK-RV64-V-NEXT: csrr a0, vlenb ; CHECK-RV64-V-NEXT: slli a0, a0, 3 ; CHECK-RV64-V-NEXT: mv a1, a0 ; CHECK-RV64-V-NEXT: slli a0, a0, 1 ; CHECK-RV64-V-NEXT: add a0, a0, a1 ; CHECK-RV64-V-NEXT: add a0, sp, a0 ; CHECK-RV64-V-NEXT: addi a0, a0, 16 -; CHECK-RV64-V-NEXT: vs1r.v v7, (a0) # vscale x 8-byte Folded Spill -; CHECK-RV64-V-NEXT: csrr a0, vlenb -; CHECK-RV64-V-NEXT: mv a1, a0 -; CHECK-RV64-V-NEXT: slli a0, a0, 1 -; CHECK-RV64-V-NEXT: add a1, a1, a0 -; CHECK-RV64-V-NEXT: slli a0, a0, 1 -; CHECK-RV64-V-NEXT: add a1, a1, a0 -; CHECK-RV64-V-NEXT: slli a0, a0, 2 -; CHECK-RV64-V-NEXT: add a0, a0, a1 -; CHECK-RV64-V-NEXT: add a0, sp, a0 -; CHECK-RV64-V-NEXT: addi a0, a0, 16 -; CHECK-RV64-V-NEXT: vs1r.v v8, (a0) # vscale x 8-byte Folded Spill -; CHECK-RV64-V-NEXT: csrr a0, vlenb -; CHECK-RV64-V-NEXT: slli a0, a0, 1 -; CHECK-RV64-V-NEXT: mv a1, a0 -; CHECK-RV64-V-NEXT: slli a0, a0, 1 -; CHECK-RV64-V-NEXT: add a1, a1, a0 -; CHECK-RV64-V-NEXT: slli a0, a0, 2 -; CHECK-RV64-V-NEXT: add a0, a0, a1 -; CHECK-RV64-V-NEXT: add a0, sp, a0 -; CHECK-RV64-V-NEXT: addi a0, a0, 16 -; CHECK-RV64-V-NEXT: vs1r.v v9, (a0) # vscale x 8-byte Folded Spill -; CHECK-RV64-V-NEXT: csrr a0, vlenb -; CHECK-RV64-V-NEXT: mv a1, a0 -; CHECK-RV64-V-NEXT: slli a0, a0, 2 -; CHECK-RV64-V-NEXT: add a1, a1, a0 -; CHECK-RV64-V-NEXT: slli a0, a0, 2 -; CHECK-RV64-V-NEXT: add a0, a0, a1 -; CHECK-RV64-V-NEXT: add a0, sp, a0 -; CHECK-RV64-V-NEXT: addi a0, a0, 16 -; CHECK-RV64-V-NEXT: vs1r.v v10, (a0) # vscale x 8-byte Folded Spill -; CHECK-RV64-V-NEXT: csrr a0, vlenb -; CHECK-RV64-V-NEXT: slli a0, a0, 2 -; CHECK-RV64-V-NEXT: mv a1, a0 -; CHECK-RV64-V-NEXT: slli a0, a0, 2 -; CHECK-RV64-V-NEXT: add a0, a0, a1 -; CHECK-RV64-V-NEXT: add a0, sp, a0 -; CHECK-RV64-V-NEXT: addi a0, a0, 16 -; CHECK-RV64-V-NEXT: vs1r.v v11, (a0) # vscale x 8-byte Folded Spill -; CHECK-RV64-V-NEXT: csrr a0, vlenb -; CHECK-RV64-V-NEXT: mv a1, a0 -; CHECK-RV64-V-NEXT: slli a0, a0, 1 -; CHECK-RV64-V-NEXT: add a1, a1, a0 -; CHECK-RV64-V-NEXT: slli a0, a0, 3 -; CHECK-RV64-V-NEXT: add a0, a0, a1 -; CHECK-RV64-V-NEXT: add a0, sp, a0 -; CHECK-RV64-V-NEXT: addi a0, a0, 16 -; CHECK-RV64-V-NEXT: vs1r.v v12, (a0) # vscale x 8-byte Folded Spill -; CHECK-RV64-V-NEXT: csrr a0, vlenb -; CHECK-RV64-V-NEXT: slli a0, a0, 1 -; CHECK-RV64-V-NEXT: mv a1, a0 -; CHECK-RV64-V-NEXT: slli a0, a0, 3 -; CHECK-RV64-V-NEXT: add a0, a0, a1 -; CHECK-RV64-V-NEXT: add a0, sp, a0 -; CHECK-RV64-V-NEXT: addi a0, a0, 16 -; CHECK-RV64-V-NEXT: vs1r.v v13, (a0) # vscale x 8-byte Folded Spill -; CHECK-RV64-V-NEXT: csrr a0, vlenb -; CHECK-RV64-V-NEXT: slli a1, a0, 4 -; CHECK-RV64-V-NEXT: add a0, a1, a0 -; CHECK-RV64-V-NEXT: add a0, sp, a0 -; CHECK-RV64-V-NEXT: addi a0, a0, 16 -; CHECK-RV64-V-NEXT: vs1r.v v14, (a0) # vscale x 8-byte Folded Spill +; CHECK-RV64-V-NEXT: vs8r.v v0, (a0) # vscale x 64-byte Folded Spill ; CHECK-RV64-V-NEXT: csrr a0, vlenb ; CHECK-RV64-V-NEXT: slli a0, a0, 4 ; CHECK-RV64-V-NEXT: add a0, sp, a0 ; CHECK-RV64-V-NEXT: addi a0, a0, 16 -; CHECK-RV64-V-NEXT: vs1r.v v15, (a0) # vscale x 8-byte Folded Spill -; CHECK-RV64-V-NEXT: csrr a0, vlenb -; CHECK-RV64-V-NEXT: slli a1, a0, 4 -; CHECK-RV64-V-NEXT: sub a0, a1, a0 -; CHECK-RV64-V-NEXT: add a0, sp, a0 -; CHECK-RV64-V-NEXT: addi a0, a0, 16 -; CHECK-RV64-V-NEXT: vs1r.v v16, (a0) # vscale x 8-byte Folded Spill -; CHECK-RV64-V-NEXT: csrr a0, vlenb -; CHECK-RV64-V-NEXT: slli a0, a0, 1 -; CHECK-RV64-V-NEXT: mv a1, a0 -; CHECK-RV64-V-NEXT: slli a0, a0, 1 -; CHECK-RV64-V-NEXT: add a1, a1, a0 -; CHECK-RV64-V-NEXT: slli a0, a0, 1 -; CHECK-RV64-V-NEXT: add a0, a0, a1 -; CHECK-RV64-V-NEXT: add a0, sp, a0 -; CHECK-RV64-V-NEXT: addi a0, a0, 16 -; CHECK-RV64-V-NEXT: vs1r.v v17, (a0) # vscale x 8-byte Folded Spill -; CHECK-RV64-V-NEXT: csrr a0, vlenb -; CHECK-RV64-V-NEXT: mv a1, a0 -; CHECK-RV64-V-NEXT: slli a0, a0, 2 -; CHECK-RV64-V-NEXT: add a1, a1, a0 -; CHECK-RV64-V-NEXT: slli a0, a0, 1 -; CHECK-RV64-V-NEXT: add a0, a0, a1 -; CHECK-RV64-V-NEXT: add a0, sp, a0 -; CHECK-RV64-V-NEXT: addi a0, a0, 16 -; CHECK-RV64-V-NEXT: vs1r.v v18, (a0) # vscale x 8-byte Folded Spill -; CHECK-RV64-V-NEXT: csrr a0, vlenb -; CHECK-RV64-V-NEXT: slli a0, a0, 2 -; CHECK-RV64-V-NEXT: mv a1, a0 -; CHECK-RV64-V-NEXT: slli a0, a0, 1 -; CHECK-RV64-V-NEXT: add a0, a0, a1 -; CHECK-RV64-V-NEXT: add a0, sp, a0 -; CHECK-RV64-V-NEXT: addi a0, a0, 16 -; CHECK-RV64-V-NEXT: vs1r.v v19, (a0) # vscale x 8-byte Folded Spill -; CHECK-RV64-V-NEXT: csrr a0, vlenb -; CHECK-RV64-V-NEXT: mv a1, a0 -; CHECK-RV64-V-NEXT: slli a0, a0, 1 -; CHECK-RV64-V-NEXT: add a1, a1, a0 -; CHECK-RV64-V-NEXT: slli a0, a0, 2 -; CHECK-RV64-V-NEXT: add a0, a0, a1 -; CHECK-RV64-V-NEXT: add a0, sp, a0 -; CHECK-RV64-V-NEXT: addi a0, a0, 16 -; CHECK-RV64-V-NEXT: vs1r.v v20, (a0) # vscale x 8-byte Folded Spill -; CHECK-RV64-V-NEXT: csrr a0, vlenb -; CHECK-RV64-V-NEXT: slli a0, a0, 1 -; CHECK-RV64-V-NEXT: mv a1, a0 -; CHECK-RV64-V-NEXT: slli a0, a0, 2 -; CHECK-RV64-V-NEXT: add a0, a0, a1 -; CHECK-RV64-V-NEXT: add a0, sp, a0 -; CHECK-RV64-V-NEXT: addi a0, a0, 16 -; CHECK-RV64-V-NEXT: vs1r.v v21, (a0) # vscale x 8-byte Folded Spill -; CHECK-RV64-V-NEXT: csrr a0, vlenb -; CHECK-RV64-V-NEXT: slli a1, a0, 3 -; CHECK-RV64-V-NEXT: add a0, a1, a0 -; CHECK-RV64-V-NEXT: add a0, sp, a0 -; CHECK-RV64-V-NEXT: addi a0, a0, 16 -; CHECK-RV64-V-NEXT: vs1r.v v22, (a0) # vscale x 8-byte Folded Spill +; CHECK-RV64-V-NEXT: vs8r.v v8, (a0) # vscale x 64-byte Folded Spill ; CHECK-RV64-V-NEXT: csrr a0, vlenb ; CHECK-RV64-V-NEXT: slli a0, a0, 3 ; CHECK-RV64-V-NEXT: add a0, sp, a0 ; CHECK-RV64-V-NEXT: addi a0, a0, 16 -; CHECK-RV64-V-NEXT: vs1r.v v23, (a0) # vscale x 8-byte Folded Spill -; CHECK-RV64-V-NEXT: csrr a0, vlenb -; CHECK-RV64-V-NEXT: slli a1, a0, 3 -; CHECK-RV64-V-NEXT: sub a0, a1, a0 -; CHECK-RV64-V-NEXT: add a0, sp, a0 -; CHECK-RV64-V-NEXT: addi a0, a0, 16 -; CHECK-RV64-V-NEXT: vs1r.v v24, (a0) # vscale x 8-byte Folded Spill -; CHECK-RV64-V-NEXT: csrr a0, vlenb -; CHECK-RV64-V-NEXT: slli a0, a0, 1 -; CHECK-RV64-V-NEXT: mv a1, a0 -; CHECK-RV64-V-NEXT: slli a0, a0, 1 -; CHECK-RV64-V-NEXT: add a0, a0, a1 -; CHECK-RV64-V-NEXT: add a0, sp, a0 -; CHECK-RV64-V-NEXT: addi a0, a0, 16 -; CHECK-RV64-V-NEXT: vs1r.v v25, (a0) # vscale x 8-byte Folded Spill -; CHECK-RV64-V-NEXT: csrr a0, vlenb -; CHECK-RV64-V-NEXT: slli a1, a0, 2 -; CHECK-RV64-V-NEXT: add a0, a1, a0 -; CHECK-RV64-V-NEXT: add a0, sp, a0 -; CHECK-RV64-V-NEXT: addi a0, a0, 16 -; CHECK-RV64-V-NEXT: vs1r.v v26, (a0) # vscale x 8-byte Folded Spill -; CHECK-RV64-V-NEXT: csrr a0, vlenb -; CHECK-RV64-V-NEXT: slli a0, a0, 2 -; CHECK-RV64-V-NEXT: add a0, sp, a0 -; CHECK-RV64-V-NEXT: addi a0, a0, 16 -; CHECK-RV64-V-NEXT: vs1r.v v27, (a0) # vscale x 8-byte Folded Spill -; CHECK-RV64-V-NEXT: csrr a0, vlenb -; CHECK-RV64-V-NEXT: slli a1, a0, 1 -; CHECK-RV64-V-NEXT: add a0, a1, a0 -; CHECK-RV64-V-NEXT: add a0, sp, a0 -; CHECK-RV64-V-NEXT: addi a0, a0, 16 -; CHECK-RV64-V-NEXT: vs1r.v v28, (a0) # vscale x 8-byte Folded Spill -; CHECK-RV64-V-NEXT: csrr a0, vlenb -; CHECK-RV64-V-NEXT: slli a0, a0, 1 -; CHECK-RV64-V-NEXT: add a0, sp, a0 -; CHECK-RV64-V-NEXT: addi a0, a0, 16 -; CHECK-RV64-V-NEXT: vs1r.v v29, (a0) # vscale x 8-byte Folded Spill -; CHECK-RV64-V-NEXT: csrr a0, vlenb -; CHECK-RV64-V-NEXT: add a0, sp, a0 -; CHECK-RV64-V-NEXT: addi a0, a0, 16 -; CHECK-RV64-V-NEXT: vs1r.v v30, (a0) # vscale x 8-byte Folded Spill +; CHECK-RV64-V-NEXT: vs8r.v v16, (a0) # vscale x 64-byte Folded Spill ; CHECK-RV64-V-NEXT: addi a0, sp, 16 -; CHECK-RV64-V-NEXT: vs1r.v v31, (a0) # vscale x 8-byte Folded Spill +; CHECK-RV64-V-NEXT: vs8r.v v24, (a0) # vscale x 64-byte Folded Spill ; CHECK-RV64-V-NEXT: call otherfoo ; CHECK-RV64-V-NEXT: csrr a0, vlenb -; CHECK-RV64-V-NEXT: slli a1, a0, 5 -; CHECK-RV64-V-NEXT: sub a0, a1, a0 -; CHECK-RV64-V-NEXT: add a0, sp, a0 -; CHECK-RV64-V-NEXT: addi a0, a0, 16 -; CHECK-RV64-V-NEXT: vl1r.v v0, (a0) # vscale x 8-byte Folded Reload -; CHECK-RV64-V-NEXT: csrr a0, vlenb -; CHECK-RV64-V-NEXT: slli a0, a0, 1 -; CHECK-RV64-V-NEXT: mv a1, a0 -; CHECK-RV64-V-NEXT: slli a0, a0, 1 -; CHECK-RV64-V-NEXT: add a1, a1, a0 -; CHECK-RV64-V-NEXT: slli a0, a0, 1 -; CHECK-RV64-V-NEXT: add a1, a1, a0 -; CHECK-RV64-V-NEXT: slli a0, a0, 1 -; CHECK-RV64-V-NEXT: add a0, a0, a1 -; CHECK-RV64-V-NEXT: add a0, sp, a0 -; CHECK-RV64-V-NEXT: addi a0, a0, 16 -; CHECK-RV64-V-NEXT: vl1r.v v1, (a0) # vscale x 8-byte Folded Reload -; CHECK-RV64-V-NEXT: csrr a0, vlenb -; CHECK-RV64-V-NEXT: mv a1, a0 -; CHECK-RV64-V-NEXT: slli a0, a0, 2 -; CHECK-RV64-V-NEXT: add a1, a1, a0 -; CHECK-RV64-V-NEXT: slli a0, a0, 1 -; CHECK-RV64-V-NEXT: add a1, a1, a0 -; CHECK-RV64-V-NEXT: slli a0, a0, 1 -; CHECK-RV64-V-NEXT: add a0, a0, a1 -; CHECK-RV64-V-NEXT: add a0, sp, a0 -; CHECK-RV64-V-NEXT: addi a0, a0, 16 -; CHECK-RV64-V-NEXT: vl1r.v v2, (a0) # vscale x 8-byte Folded Reload -; CHECK-RV64-V-NEXT: csrr a0, vlenb -; CHECK-RV64-V-NEXT: slli a0, a0, 2 -; CHECK-RV64-V-NEXT: mv a1, a0 -; CHECK-RV64-V-NEXT: slli a0, a0, 1 -; CHECK-RV64-V-NEXT: add a1, a1, a0 -; CHECK-RV64-V-NEXT: slli a0, a0, 1 -; CHECK-RV64-V-NEXT: add a0, a0, a1 -; CHECK-RV64-V-NEXT: add a0, sp, a0 -; CHECK-RV64-V-NEXT: addi a0, a0, 16 -; CHECK-RV64-V-NEXT: vl1r.v v3, (a0) # vscale x 8-byte Folded Reload -; CHECK-RV64-V-NEXT: csrr a0, vlenb -; CHECK-RV64-V-NEXT: mv a1, a0 -; CHECK-RV64-V-NEXT: slli a0, a0, 1 -; CHECK-RV64-V-NEXT: add a1, a1, a0 -; CHECK-RV64-V-NEXT: slli a0, a0, 2 -; CHECK-RV64-V-NEXT: add a1, a1, a0 -; CHECK-RV64-V-NEXT: slli a0, a0, 1 -; CHECK-RV64-V-NEXT: add a0, a0, a1 -; CHECK-RV64-V-NEXT: add a0, sp, a0 -; CHECK-RV64-V-NEXT: addi a0, a0, 16 -; CHECK-RV64-V-NEXT: vl1r.v v4, (a0) # vscale x 8-byte Folded Reload -; CHECK-RV64-V-NEXT: csrr a0, vlenb -; CHECK-RV64-V-NEXT: slli a0, a0, 1 -; CHECK-RV64-V-NEXT: mv a1, a0 -; CHECK-RV64-V-NEXT: slli a0, a0, 2 -; CHECK-RV64-V-NEXT: add a1, a1, a0 -; CHECK-RV64-V-NEXT: slli a0, a0, 1 -; CHECK-RV64-V-NEXT: add a0, a0, a1 -; CHECK-RV64-V-NEXT: add a0, sp, a0 -; CHECK-RV64-V-NEXT: addi a0, a0, 16 -; CHECK-RV64-V-NEXT: vl1r.v v5, (a0) # vscale x 8-byte Folded Reload -; CHECK-RV64-V-NEXT: csrr a0, vlenb -; CHECK-RV64-V-NEXT: mv a1, a0 ; CHECK-RV64-V-NEXT: slli a0, a0, 3 -; CHECK-RV64-V-NEXT: add a1, a1, a0 -; CHECK-RV64-V-NEXT: slli a0, a0, 1 -; CHECK-RV64-V-NEXT: add a0, a0, a1 -; CHECK-RV64-V-NEXT: add a0, sp, a0 -; CHECK-RV64-V-NEXT: addi a0, a0, 16 -; CHECK-RV64-V-NEXT: vl1r.v v6, (a0) # vscale x 8-byte Folded Reload -; CHECK-RV64-V-NEXT: csrr a0, vlenb -; CHECK-RV64-V-NEXT: slli a0, a0, 3 -; CHECK-RV64-V-NEXT: mv a1, a0 -; CHECK-RV64-V-NEXT: slli a0, a0, 1 -; CHECK-RV64-V-NEXT: add a0, a0, a1 -; CHECK-RV64-V-NEXT: add a0, sp, a0 -; CHECK-RV64-V-NEXT: addi a0, a0, 16 -; CHECK-RV64-V-NEXT: vl1r.v v7, (a0) # vscale x 8-byte Folded Reload -; CHECK-RV64-V-NEXT: csrr a0, vlenb -; CHECK-RV64-V-NEXT: mv a1, a0 -; CHECK-RV64-V-NEXT: slli a0, a0, 1 -; CHECK-RV64-V-NEXT: add a1, a1, a0 -; CHECK-RV64-V-NEXT: slli a0, a0, 1 -; CHECK-RV64-V-NEXT: add a1, a1, a0 -; CHECK-RV64-V-NEXT: slli a0, a0, 2 -; CHECK-RV64-V-NEXT: add a0, a0, a1 -; CHECK-RV64-V-NEXT: add a0, sp, a0 -; CHECK-RV64-V-NEXT: addi a0, a0, 16 -; CHECK-RV64-V-NEXT: vl1r.v v8, (a0) # vscale x 8-byte Folded Reload -; CHECK-RV64-V-NEXT: csrr a0, vlenb -; CHECK-RV64-V-NEXT: slli a0, a0, 1 -; CHECK-RV64-V-NEXT: mv a1, a0 -; CHECK-RV64-V-NEXT: slli a0, a0, 1 -; CHECK-RV64-V-NEXT: add a1, a1, a0 -; CHECK-RV64-V-NEXT: slli a0, a0, 2 -; CHECK-RV64-V-NEXT: add a0, a0, a1 -; CHECK-RV64-V-NEXT: add a0, sp, a0 -; CHECK-RV64-V-NEXT: addi a0, a0, 16 -; CHECK-RV64-V-NEXT: vl1r.v v9, (a0) # vscale x 8-byte Folded Reload -; CHECK-RV64-V-NEXT: csrr a0, vlenb -; CHECK-RV64-V-NEXT: mv a1, a0 -; CHECK-RV64-V-NEXT: slli a0, a0, 2 -; CHECK-RV64-V-NEXT: add a1, a1, a0 -; CHECK-RV64-V-NEXT: slli a0, a0, 2 -; CHECK-RV64-V-NEXT: add a0, a0, a1 -; CHECK-RV64-V-NEXT: add a0, sp, a0 -; CHECK-RV64-V-NEXT: addi a0, a0, 16 -; CHECK-RV64-V-NEXT: vl1r.v v10, (a0) # vscale x 8-byte Folded Reload -; CHECK-RV64-V-NEXT: csrr a0, vlenb -; CHECK-RV64-V-NEXT: slli a0, a0, 2 -; CHECK-RV64-V-NEXT: mv a1, a0 -; CHECK-RV64-V-NEXT: slli a0, a0, 2 -; CHECK-RV64-V-NEXT: add a0, a0, a1 -; CHECK-RV64-V-NEXT: add a0, sp, a0 -; CHECK-RV64-V-NEXT: addi a0, a0, 16 -; CHECK-RV64-V-NEXT: vl1r.v v11, (a0) # vscale x 8-byte Folded Reload -; CHECK-RV64-V-NEXT: csrr a0, vlenb ; CHECK-RV64-V-NEXT: mv a1, a0 ; CHECK-RV64-V-NEXT: slli a0, a0, 1 -; CHECK-RV64-V-NEXT: add a1, a1, a0 -; CHECK-RV64-V-NEXT: slli a0, a0, 3 ; CHECK-RV64-V-NEXT: add a0, a0, a1 ; CHECK-RV64-V-NEXT: add a0, sp, a0 ; CHECK-RV64-V-NEXT: addi a0, a0, 16 -; CHECK-RV64-V-NEXT: vl1r.v v12, (a0) # vscale x 8-byte Folded Reload -; CHECK-RV64-V-NEXT: csrr a0, vlenb -; CHECK-RV64-V-NEXT: slli a0, a0, 1 -; CHECK-RV64-V-NEXT: mv a1, a0 -; CHECK-RV64-V-NEXT: slli a0, a0, 3 -; CHECK-RV64-V-NEXT: add a0, a0, a1 -; CHECK-RV64-V-NEXT: add a0, sp, a0 -; CHECK-RV64-V-NEXT: addi a0, a0, 16 -; CHECK-RV64-V-NEXT: vl1r.v v13, (a0) # vscale x 8-byte Folded Reload -; CHECK-RV64-V-NEXT: csrr a0, vlenb -; CHECK-RV64-V-NEXT: slli a1, a0, 4 -; CHECK-RV64-V-NEXT: add a0, a1, a0 -; CHECK-RV64-V-NEXT: add a0, sp, a0 -; CHECK-RV64-V-NEXT: addi a0, a0, 16 -; CHECK-RV64-V-NEXT: vl1r.v v14, (a0) # vscale x 8-byte Folded Reload +; CHECK-RV64-V-NEXT: vl8r.v v0, (a0) # vscale x 64-byte Folded Reload ; CHECK-RV64-V-NEXT: csrr a0, vlenb ; CHECK-RV64-V-NEXT: slli a0, a0, 4 ; CHECK-RV64-V-NEXT: add a0, sp, a0 ; CHECK-RV64-V-NEXT: addi a0, a0, 16 -; CHECK-RV64-V-NEXT: vl1r.v v15, (a0) # vscale x 8-byte Folded Reload -; CHECK-RV64-V-NEXT: csrr a0, vlenb -; CHECK-RV64-V-NEXT: slli a1, a0, 4 -; CHECK-RV64-V-NEXT: sub a0, a1, a0 -; CHECK-RV64-V-NEXT: add a0, sp, a0 -; CHECK-RV64-V-NEXT: addi a0, a0, 16 -; CHECK-RV64-V-NEXT: vl1r.v v16, (a0) # vscale x 8-byte Folded Reload -; CHECK-RV64-V-NEXT: csrr a0, vlenb -; CHECK-RV64-V-NEXT: slli a0, a0, 1 -; CHECK-RV64-V-NEXT: mv a1, a0 -; CHECK-RV64-V-NEXT: slli a0, a0, 1 -; CHECK-RV64-V-NEXT: add a1, a1, a0 -; CHECK-RV64-V-NEXT: slli a0, a0, 1 -; CHECK-RV64-V-NEXT: add a0, a0, a1 -; CHECK-RV64-V-NEXT: add a0, sp, a0 -; CHECK-RV64-V-NEXT: addi a0, a0, 16 -; CHECK-RV64-V-NEXT: vl1r.v v17, (a0) # vscale x 8-byte Folded Reload -; CHECK-RV64-V-NEXT: csrr a0, vlenb -; CHECK-RV64-V-NEXT: mv a1, a0 -; CHECK-RV64-V-NEXT: slli a0, a0, 2 -; CHECK-RV64-V-NEXT: add a1, a1, a0 -; CHECK-RV64-V-NEXT: slli a0, a0, 1 -; CHECK-RV64-V-NEXT: add a0, a0, a1 -; CHECK-RV64-V-NEXT: add a0, sp, a0 -; CHECK-RV64-V-NEXT: addi a0, a0, 16 -; CHECK-RV64-V-NEXT: vl1r.v v18, (a0) # vscale x 8-byte Folded Reload -; CHECK-RV64-V-NEXT: csrr a0, vlenb -; CHECK-RV64-V-NEXT: slli a0, a0, 2 -; CHECK-RV64-V-NEXT: mv a1, a0 -; CHECK-RV64-V-NEXT: slli a0, a0, 1 -; CHECK-RV64-V-NEXT: add a0, a0, a1 -; CHECK-RV64-V-NEXT: add a0, sp, a0 -; CHECK-RV64-V-NEXT: addi a0, a0, 16 -; CHECK-RV64-V-NEXT: vl1r.v v19, (a0) # vscale x 8-byte Folded Reload -; CHECK-RV64-V-NEXT: csrr a0, vlenb -; CHECK-RV64-V-NEXT: mv a1, a0 -; CHECK-RV64-V-NEXT: slli a0, a0, 1 -; CHECK-RV64-V-NEXT: add a1, a1, a0 -; CHECK-RV64-V-NEXT: slli a0, a0, 2 -; CHECK-RV64-V-NEXT: add a0, a0, a1 -; CHECK-RV64-V-NEXT: add a0, sp, a0 -; CHECK-RV64-V-NEXT: addi a0, a0, 16 -; CHECK-RV64-V-NEXT: vl1r.v v20, (a0) # vscale x 8-byte Folded Reload -; CHECK-RV64-V-NEXT: csrr a0, vlenb -; CHECK-RV64-V-NEXT: slli a0, a0, 1 -; CHECK-RV64-V-NEXT: mv a1, a0 -; CHECK-RV64-V-NEXT: slli a0, a0, 2 -; CHECK-RV64-V-NEXT: add a0, a0, a1 -; CHECK-RV64-V-NEXT: add a0, sp, a0 -; CHECK-RV64-V-NEXT: addi a0, a0, 16 -; CHECK-RV64-V-NEXT: vl1r.v v21, (a0) # vscale x 8-byte Folded Reload -; CHECK-RV64-V-NEXT: csrr a0, vlenb -; CHECK-RV64-V-NEXT: slli a1, a0, 3 -; CHECK-RV64-V-NEXT: add a0, a1, a0 -; CHECK-RV64-V-NEXT: add a0, sp, a0 -; CHECK-RV64-V-NEXT: addi a0, a0, 16 -; CHECK-RV64-V-NEXT: vl1r.v v22, (a0) # vscale x 8-byte Folded Reload +; CHECK-RV64-V-NEXT: vl8r.v v8, (a0) # vscale x 64-byte Folded Reload ; CHECK-RV64-V-NEXT: csrr a0, vlenb ; CHECK-RV64-V-NEXT: slli a0, a0, 3 ; CHECK-RV64-V-NEXT: add a0, sp, a0 ; CHECK-RV64-V-NEXT: addi a0, a0, 16 -; CHECK-RV64-V-NEXT: vl1r.v v23, (a0) # vscale x 8-byte Folded Reload -; CHECK-RV64-V-NEXT: csrr a0, vlenb -; CHECK-RV64-V-NEXT: slli a1, a0, 3 -; CHECK-RV64-V-NEXT: sub a0, a1, a0 -; CHECK-RV64-V-NEXT: add a0, sp, a0 -; CHECK-RV64-V-NEXT: addi a0, a0, 16 -; CHECK-RV64-V-NEXT: vl1r.v v24, (a0) # vscale x 8-byte Folded Reload -; CHECK-RV64-V-NEXT: csrr a0, vlenb -; CHECK-RV64-V-NEXT: slli a0, a0, 1 -; CHECK-RV64-V-NEXT: mv a1, a0 -; CHECK-RV64-V-NEXT: slli a0, a0, 1 -; CHECK-RV64-V-NEXT: add a0, a0, a1 -; CHECK-RV64-V-NEXT: add a0, sp, a0 -; CHECK-RV64-V-NEXT: addi a0, a0, 16 -; CHECK-RV64-V-NEXT: vl1r.v v25, (a0) # vscale x 8-byte Folded Reload -; CHECK-RV64-V-NEXT: csrr a0, vlenb -; CHECK-RV64-V-NEXT: slli a1, a0, 2 -; CHECK-RV64-V-NEXT: add a0, a1, a0 -; CHECK-RV64-V-NEXT: add a0, sp, a0 -; CHECK-RV64-V-NEXT: addi a0, a0, 16 -; CHECK-RV64-V-NEXT: vl1r.v v26, (a0) # vscale x 8-byte Folded Reload -; CHECK-RV64-V-NEXT: csrr a0, vlenb -; CHECK-RV64-V-NEXT: slli a0, a0, 2 -; CHECK-RV64-V-NEXT: add a0, sp, a0 -; CHECK-RV64-V-NEXT: addi a0, a0, 16 -; CHECK-RV64-V-NEXT: vl1r.v v27, (a0) # vscale x 8-byte Folded Reload -; CHECK-RV64-V-NEXT: csrr a0, vlenb -; CHECK-RV64-V-NEXT: slli a1, a0, 1 -; CHECK-RV64-V-NEXT: add a0, a1, a0 -; CHECK-RV64-V-NEXT: add a0, sp, a0 -; CHECK-RV64-V-NEXT: addi a0, a0, 16 -; CHECK-RV64-V-NEXT: vl1r.v v28, (a0) # vscale x 8-byte Folded Reload -; CHECK-RV64-V-NEXT: csrr a0, vlenb -; CHECK-RV64-V-NEXT: slli a0, a0, 1 -; CHECK-RV64-V-NEXT: add a0, sp, a0 -; CHECK-RV64-V-NEXT: addi a0, a0, 16 -; CHECK-RV64-V-NEXT: vl1r.v v29, (a0) # vscale x 8-byte Folded Reload -; CHECK-RV64-V-NEXT: csrr a0, vlenb -; CHECK-RV64-V-NEXT: add a0, sp, a0 -; CHECK-RV64-V-NEXT: addi a0, a0, 16 -; CHECK-RV64-V-NEXT: vl1r.v v30, (a0) # vscale x 8-byte Folded Reload +; CHECK-RV64-V-NEXT: vl8r.v v16, (a0) # vscale x 64-byte Folded Reload ; CHECK-RV64-V-NEXT: addi a0, sp, 16 -; CHECK-RV64-V-NEXT: vl1r.v v31, (a0) # vscale x 8-byte Folded Reload +; CHECK-RV64-V-NEXT: vl8r.v v24, (a0) # vscale x 64-byte Folded Reload ; CHECK-RV64-V-NEXT: csrr a0, vlenb ; CHECK-RV64-V-NEXT: slli a0, a0, 5 ; CHECK-RV64-V-NEXT: add sp, sp, a0 @@ -3816,498 +2008,46 @@ define void @foo_with_call() #1 { ; CHECK-RV64-FV-NEXT: slli a0, a0, 5 ; CHECK-RV64-FV-NEXT: sub sp, sp, a0 ; CHECK-RV64-FV-NEXT: csrr a0, vlenb -; CHECK-RV64-FV-NEXT: slli a1, a0, 5 -; CHECK-RV64-FV-NEXT: sub a0, a1, a0 -; CHECK-RV64-FV-NEXT: add a0, sp, a0 -; CHECK-RV64-FV-NEXT: addi a0, a0, 16 -; CHECK-RV64-FV-NEXT: vs1r.v v0, (a0) # vscale x 8-byte Folded Spill -; CHECK-RV64-FV-NEXT: csrr a0, vlenb -; CHECK-RV64-FV-NEXT: slli a0, a0, 1 -; CHECK-RV64-FV-NEXT: mv a1, a0 -; CHECK-RV64-FV-NEXT: slli a0, a0, 1 -; CHECK-RV64-FV-NEXT: add a1, a1, a0 -; CHECK-RV64-FV-NEXT: slli a0, a0, 1 -; CHECK-RV64-FV-NEXT: add a1, a1, a0 -; CHECK-RV64-FV-NEXT: slli a0, a0, 1 -; CHECK-RV64-FV-NEXT: add a0, a0, a1 -; CHECK-RV64-FV-NEXT: add a0, sp, a0 -; CHECK-RV64-FV-NEXT: addi a0, a0, 16 -; CHECK-RV64-FV-NEXT: vs1r.v v1, (a0) # vscale x 8-byte Folded Spill -; CHECK-RV64-FV-NEXT: csrr a0, vlenb -; CHECK-RV64-FV-NEXT: mv a1, a0 -; CHECK-RV64-FV-NEXT: slli a0, a0, 2 -; CHECK-RV64-FV-NEXT: add a1, a1, a0 -; CHECK-RV64-FV-NEXT: slli a0, a0, 1 -; CHECK-RV64-FV-NEXT: add a1, a1, a0 -; CHECK-RV64-FV-NEXT: slli a0, a0, 1 -; CHECK-RV64-FV-NEXT: add a0, a0, a1 -; CHECK-RV64-FV-NEXT: add a0, sp, a0 -; CHECK-RV64-FV-NEXT: addi a0, a0, 16 -; CHECK-RV64-FV-NEXT: vs1r.v v2, (a0) # vscale x 8-byte Folded Spill -; CHECK-RV64-FV-NEXT: csrr a0, vlenb -; CHECK-RV64-FV-NEXT: slli a0, a0, 2 -; CHECK-RV64-FV-NEXT: mv a1, a0 -; CHECK-RV64-FV-NEXT: slli a0, a0, 1 -; CHECK-RV64-FV-NEXT: add a1, a1, a0 -; CHECK-RV64-FV-NEXT: slli a0, a0, 1 -; CHECK-RV64-FV-NEXT: add a0, a0, a1 -; CHECK-RV64-FV-NEXT: add a0, sp, a0 -; CHECK-RV64-FV-NEXT: addi a0, a0, 16 -; CHECK-RV64-FV-NEXT: vs1r.v v3, (a0) # vscale x 8-byte Folded Spill -; CHECK-RV64-FV-NEXT: csrr a0, vlenb -; CHECK-RV64-FV-NEXT: mv a1, a0 -; CHECK-RV64-FV-NEXT: slli a0, a0, 1 -; CHECK-RV64-FV-NEXT: add a1, a1, a0 -; CHECK-RV64-FV-NEXT: slli a0, a0, 2 -; CHECK-RV64-FV-NEXT: add a1, a1, a0 -; CHECK-RV64-FV-NEXT: slli a0, a0, 1 -; CHECK-RV64-FV-NEXT: add a0, a0, a1 -; CHECK-RV64-FV-NEXT: add a0, sp, a0 -; CHECK-RV64-FV-NEXT: addi a0, a0, 16 -; CHECK-RV64-FV-NEXT: vs1r.v v4, (a0) # vscale x 8-byte Folded Spill -; CHECK-RV64-FV-NEXT: csrr a0, vlenb -; CHECK-RV64-FV-NEXT: slli a0, a0, 1 -; CHECK-RV64-FV-NEXT: mv a1, a0 -; CHECK-RV64-FV-NEXT: slli a0, a0, 2 -; CHECK-RV64-FV-NEXT: add a1, a1, a0 -; CHECK-RV64-FV-NEXT: slli a0, a0, 1 -; CHECK-RV64-FV-NEXT: add a0, a0, a1 -; CHECK-RV64-FV-NEXT: add a0, sp, a0 -; CHECK-RV64-FV-NEXT: addi a0, a0, 16 -; CHECK-RV64-FV-NEXT: vs1r.v v5, (a0) # vscale x 8-byte Folded Spill -; CHECK-RV64-FV-NEXT: csrr a0, vlenb -; CHECK-RV64-FV-NEXT: mv a1, a0 -; CHECK-RV64-FV-NEXT: slli a0, a0, 3 -; CHECK-RV64-FV-NEXT: add a1, a1, a0 -; CHECK-RV64-FV-NEXT: slli a0, a0, 1 -; CHECK-RV64-FV-NEXT: add a0, a0, a1 -; CHECK-RV64-FV-NEXT: add a0, sp, a0 -; CHECK-RV64-FV-NEXT: addi a0, a0, 16 -; CHECK-RV64-FV-NEXT: vs1r.v v6, (a0) # vscale x 8-byte Folded Spill -; CHECK-RV64-FV-NEXT: csrr a0, vlenb ; CHECK-RV64-FV-NEXT: slli a0, a0, 3 ; CHECK-RV64-FV-NEXT: mv a1, a0 ; CHECK-RV64-FV-NEXT: slli a0, a0, 1 ; CHECK-RV64-FV-NEXT: add a0, a0, a1 ; CHECK-RV64-FV-NEXT: add a0, sp, a0 ; CHECK-RV64-FV-NEXT: addi a0, a0, 16 -; CHECK-RV64-FV-NEXT: vs1r.v v7, (a0) # vscale x 8-byte Folded Spill -; CHECK-RV64-FV-NEXT: csrr a0, vlenb -; CHECK-RV64-FV-NEXT: mv a1, a0 -; CHECK-RV64-FV-NEXT: slli a0, a0, 1 -; CHECK-RV64-FV-NEXT: add a1, a1, a0 -; CHECK-RV64-FV-NEXT: slli a0, a0, 1 -; CHECK-RV64-FV-NEXT: add a1, a1, a0 -; CHECK-RV64-FV-NEXT: slli a0, a0, 2 -; CHECK-RV64-FV-NEXT: add a0, a0, a1 -; CHECK-RV64-FV-NEXT: add a0, sp, a0 -; CHECK-RV64-FV-NEXT: addi a0, a0, 16 -; CHECK-RV64-FV-NEXT: vs1r.v v8, (a0) # vscale x 8-byte Folded Spill -; CHECK-RV64-FV-NEXT: csrr a0, vlenb -; CHECK-RV64-FV-NEXT: slli a0, a0, 1 -; CHECK-RV64-FV-NEXT: mv a1, a0 -; CHECK-RV64-FV-NEXT: slli a0, a0, 1 -; CHECK-RV64-FV-NEXT: add a1, a1, a0 -; CHECK-RV64-FV-NEXT: slli a0, a0, 2 -; CHECK-RV64-FV-NEXT: add a0, a0, a1 -; CHECK-RV64-FV-NEXT: add a0, sp, a0 -; CHECK-RV64-FV-NEXT: addi a0, a0, 16 -; CHECK-RV64-FV-NEXT: vs1r.v v9, (a0) # vscale x 8-byte Folded Spill -; CHECK-RV64-FV-NEXT: csrr a0, vlenb -; CHECK-RV64-FV-NEXT: mv a1, a0 -; CHECK-RV64-FV-NEXT: slli a0, a0, 2 -; CHECK-RV64-FV-NEXT: add a1, a1, a0 -; CHECK-RV64-FV-NEXT: slli a0, a0, 2 -; CHECK-RV64-FV-NEXT: add a0, a0, a1 -; CHECK-RV64-FV-NEXT: add a0, sp, a0 -; CHECK-RV64-FV-NEXT: addi a0, a0, 16 -; CHECK-RV64-FV-NEXT: vs1r.v v10, (a0) # vscale x 8-byte Folded Spill -; CHECK-RV64-FV-NEXT: csrr a0, vlenb -; CHECK-RV64-FV-NEXT: slli a0, a0, 2 -; CHECK-RV64-FV-NEXT: mv a1, a0 -; CHECK-RV64-FV-NEXT: slli a0, a0, 2 -; CHECK-RV64-FV-NEXT: add a0, a0, a1 -; CHECK-RV64-FV-NEXT: add a0, sp, a0 -; CHECK-RV64-FV-NEXT: addi a0, a0, 16 -; CHECK-RV64-FV-NEXT: vs1r.v v11, (a0) # vscale x 8-byte Folded Spill -; CHECK-RV64-FV-NEXT: csrr a0, vlenb -; CHECK-RV64-FV-NEXT: mv a1, a0 -; CHECK-RV64-FV-NEXT: slli a0, a0, 1 -; CHECK-RV64-FV-NEXT: add a1, a1, a0 -; CHECK-RV64-FV-NEXT: slli a0, a0, 3 -; CHECK-RV64-FV-NEXT: add a0, a0, a1 -; CHECK-RV64-FV-NEXT: add a0, sp, a0 -; CHECK-RV64-FV-NEXT: addi a0, a0, 16 -; CHECK-RV64-FV-NEXT: vs1r.v v12, (a0) # vscale x 8-byte Folded Spill -; CHECK-RV64-FV-NEXT: csrr a0, vlenb -; CHECK-RV64-FV-NEXT: slli a0, a0, 1 -; CHECK-RV64-FV-NEXT: mv a1, a0 -; CHECK-RV64-FV-NEXT: slli a0, a0, 3 -; CHECK-RV64-FV-NEXT: add a0, a0, a1 -; CHECK-RV64-FV-NEXT: add a0, sp, a0 -; CHECK-RV64-FV-NEXT: addi a0, a0, 16 -; CHECK-RV64-FV-NEXT: vs1r.v v13, (a0) # vscale x 8-byte Folded Spill -; CHECK-RV64-FV-NEXT: csrr a0, vlenb -; CHECK-RV64-FV-NEXT: slli a1, a0, 4 -; CHECK-RV64-FV-NEXT: add a0, a1, a0 -; CHECK-RV64-FV-NEXT: add a0, sp, a0 -; CHECK-RV64-FV-NEXT: addi a0, a0, 16 -; CHECK-RV64-FV-NEXT: vs1r.v v14, (a0) # vscale x 8-byte Folded Spill +; CHECK-RV64-FV-NEXT: vs8r.v v0, (a0) # vscale x 64-byte Folded Spill ; CHECK-RV64-FV-NEXT: csrr a0, vlenb ; CHECK-RV64-FV-NEXT: slli a0, a0, 4 ; CHECK-RV64-FV-NEXT: add a0, sp, a0 ; CHECK-RV64-FV-NEXT: addi a0, a0, 16 -; CHECK-RV64-FV-NEXT: vs1r.v v15, (a0) # vscale x 8-byte Folded Spill -; CHECK-RV64-FV-NEXT: csrr a0, vlenb -; CHECK-RV64-FV-NEXT: slli a1, a0, 4 -; CHECK-RV64-FV-NEXT: sub a0, a1, a0 -; CHECK-RV64-FV-NEXT: add a0, sp, a0 -; CHECK-RV64-FV-NEXT: addi a0, a0, 16 -; CHECK-RV64-FV-NEXT: vs1r.v v16, (a0) # vscale x 8-byte Folded Spill -; CHECK-RV64-FV-NEXT: csrr a0, vlenb -; CHECK-RV64-FV-NEXT: slli a0, a0, 1 -; CHECK-RV64-FV-NEXT: mv a1, a0 -; CHECK-RV64-FV-NEXT: slli a0, a0, 1 -; CHECK-RV64-FV-NEXT: add a1, a1, a0 -; CHECK-RV64-FV-NEXT: slli a0, a0, 1 -; CHECK-RV64-FV-NEXT: add a0, a0, a1 -; CHECK-RV64-FV-NEXT: add a0, sp, a0 -; CHECK-RV64-FV-NEXT: addi a0, a0, 16 -; CHECK-RV64-FV-NEXT: vs1r.v v17, (a0) # vscale x 8-byte Folded Spill -; CHECK-RV64-FV-NEXT: csrr a0, vlenb -; CHECK-RV64-FV-NEXT: mv a1, a0 -; CHECK-RV64-FV-NEXT: slli a0, a0, 2 -; CHECK-RV64-FV-NEXT: add a1, a1, a0 -; CHECK-RV64-FV-NEXT: slli a0, a0, 1 -; CHECK-RV64-FV-NEXT: add a0, a0, a1 -; CHECK-RV64-FV-NEXT: add a0, sp, a0 -; CHECK-RV64-FV-NEXT: addi a0, a0, 16 -; CHECK-RV64-FV-NEXT: vs1r.v v18, (a0) # vscale x 8-byte Folded Spill -; CHECK-RV64-FV-NEXT: csrr a0, vlenb -; CHECK-RV64-FV-NEXT: slli a0, a0, 2 -; CHECK-RV64-FV-NEXT: mv a1, a0 -; CHECK-RV64-FV-NEXT: slli a0, a0, 1 -; CHECK-RV64-FV-NEXT: add a0, a0, a1 -; CHECK-RV64-FV-NEXT: add a0, sp, a0 -; CHECK-RV64-FV-NEXT: addi a0, a0, 16 -; CHECK-RV64-FV-NEXT: vs1r.v v19, (a0) # vscale x 8-byte Folded Spill -; CHECK-RV64-FV-NEXT: csrr a0, vlenb -; CHECK-RV64-FV-NEXT: mv a1, a0 -; CHECK-RV64-FV-NEXT: slli a0, a0, 1 -; CHECK-RV64-FV-NEXT: add a1, a1, a0 -; CHECK-RV64-FV-NEXT: slli a0, a0, 2 -; CHECK-RV64-FV-NEXT: add a0, a0, a1 -; CHECK-RV64-FV-NEXT: add a0, sp, a0 -; CHECK-RV64-FV-NEXT: addi a0, a0, 16 -; CHECK-RV64-FV-NEXT: vs1r.v v20, (a0) # vscale x 8-byte Folded Spill -; CHECK-RV64-FV-NEXT: csrr a0, vlenb -; CHECK-RV64-FV-NEXT: slli a0, a0, 1 -; CHECK-RV64-FV-NEXT: mv a1, a0 -; CHECK-RV64-FV-NEXT: slli a0, a0, 2 -; CHECK-RV64-FV-NEXT: add a0, a0, a1 -; CHECK-RV64-FV-NEXT: add a0, sp, a0 -; CHECK-RV64-FV-NEXT: addi a0, a0, 16 -; CHECK-RV64-FV-NEXT: vs1r.v v21, (a0) # vscale x 8-byte Folded Spill -; CHECK-RV64-FV-NEXT: csrr a0, vlenb -; CHECK-RV64-FV-NEXT: slli a1, a0, 3 -; CHECK-RV64-FV-NEXT: add a0, a1, a0 -; CHECK-RV64-FV-NEXT: add a0, sp, a0 -; CHECK-RV64-FV-NEXT: addi a0, a0, 16 -; CHECK-RV64-FV-NEXT: vs1r.v v22, (a0) # vscale x 8-byte Folded Spill +; CHECK-RV64-FV-NEXT: vs8r.v v8, (a0) # vscale x 64-byte Folded Spill ; CHECK-RV64-FV-NEXT: csrr a0, vlenb ; CHECK-RV64-FV-NEXT: slli a0, a0, 3 ; CHECK-RV64-FV-NEXT: add a0, sp, a0 ; CHECK-RV64-FV-NEXT: addi a0, a0, 16 -; CHECK-RV64-FV-NEXT: vs1r.v v23, (a0) # vscale x 8-byte Folded Spill -; CHECK-RV64-FV-NEXT: csrr a0, vlenb -; CHECK-RV64-FV-NEXT: slli a1, a0, 3 -; CHECK-RV64-FV-NEXT: sub a0, a1, a0 -; CHECK-RV64-FV-NEXT: add a0, sp, a0 -; CHECK-RV64-FV-NEXT: addi a0, a0, 16 -; CHECK-RV64-FV-NEXT: vs1r.v v24, (a0) # vscale x 8-byte Folded Spill -; CHECK-RV64-FV-NEXT: csrr a0, vlenb -; CHECK-RV64-FV-NEXT: slli a0, a0, 1 -; CHECK-RV64-FV-NEXT: mv a1, a0 -; CHECK-RV64-FV-NEXT: slli a0, a0, 1 -; CHECK-RV64-FV-NEXT: add a0, a0, a1 -; CHECK-RV64-FV-NEXT: add a0, sp, a0 -; CHECK-RV64-FV-NEXT: addi a0, a0, 16 -; CHECK-RV64-FV-NEXT: vs1r.v v25, (a0) # vscale x 8-byte Folded Spill -; CHECK-RV64-FV-NEXT: csrr a0, vlenb -; CHECK-RV64-FV-NEXT: slli a1, a0, 2 -; CHECK-RV64-FV-NEXT: add a0, a1, a0 -; CHECK-RV64-FV-NEXT: add a0, sp, a0 -; CHECK-RV64-FV-NEXT: addi a0, a0, 16 -; CHECK-RV64-FV-NEXT: vs1r.v v26, (a0) # vscale x 8-byte Folded Spill -; CHECK-RV64-FV-NEXT: csrr a0, vlenb -; CHECK-RV64-FV-NEXT: slli a0, a0, 2 -; CHECK-RV64-FV-NEXT: add a0, sp, a0 -; CHECK-RV64-FV-NEXT: addi a0, a0, 16 -; CHECK-RV64-FV-NEXT: vs1r.v v27, (a0) # vscale x 8-byte Folded Spill -; CHECK-RV64-FV-NEXT: csrr a0, vlenb -; CHECK-RV64-FV-NEXT: slli a1, a0, 1 -; CHECK-RV64-FV-NEXT: add a0, a1, a0 -; CHECK-RV64-FV-NEXT: add a0, sp, a0 -; CHECK-RV64-FV-NEXT: addi a0, a0, 16 -; CHECK-RV64-FV-NEXT: vs1r.v v28, (a0) # vscale x 8-byte Folded Spill -; CHECK-RV64-FV-NEXT: csrr a0, vlenb -; CHECK-RV64-FV-NEXT: slli a0, a0, 1 -; CHECK-RV64-FV-NEXT: add a0, sp, a0 -; CHECK-RV64-FV-NEXT: addi a0, a0, 16 -; CHECK-RV64-FV-NEXT: vs1r.v v29, (a0) # vscale x 8-byte Folded Spill -; CHECK-RV64-FV-NEXT: csrr a0, vlenb -; CHECK-RV64-FV-NEXT: add a0, sp, a0 -; CHECK-RV64-FV-NEXT: addi a0, a0, 16 -; CHECK-RV64-FV-NEXT: vs1r.v v30, (a0) # vscale x 8-byte Folded Spill +; CHECK-RV64-FV-NEXT: vs8r.v v16, (a0) # vscale x 64-byte Folded Spill ; CHECK-RV64-FV-NEXT: addi a0, sp, 16 -; CHECK-RV64-FV-NEXT: vs1r.v v31, (a0) # vscale x 8-byte Folded Spill +; CHECK-RV64-FV-NEXT: vs8r.v v24, (a0) # vscale x 64-byte Folded Spill ; CHECK-RV64-FV-NEXT: call otherfoo ; CHECK-RV64-FV-NEXT: csrr a0, vlenb -; CHECK-RV64-FV-NEXT: slli a1, a0, 5 -; CHECK-RV64-FV-NEXT: sub a0, a1, a0 -; CHECK-RV64-FV-NEXT: add a0, sp, a0 -; CHECK-RV64-FV-NEXT: addi a0, a0, 16 -; CHECK-RV64-FV-NEXT: vl1r.v v0, (a0) # vscale x 8-byte Folded Reload -; CHECK-RV64-FV-NEXT: csrr a0, vlenb -; CHECK-RV64-FV-NEXT: slli a0, a0, 1 -; CHECK-RV64-FV-NEXT: mv a1, a0 -; CHECK-RV64-FV-NEXT: slli a0, a0, 1 -; CHECK-RV64-FV-NEXT: add a1, a1, a0 -; CHECK-RV64-FV-NEXT: slli a0, a0, 1 -; CHECK-RV64-FV-NEXT: add a1, a1, a0 -; CHECK-RV64-FV-NEXT: slli a0, a0, 1 -; CHECK-RV64-FV-NEXT: add a0, a0, a1 -; CHECK-RV64-FV-NEXT: add a0, sp, a0 -; CHECK-RV64-FV-NEXT: addi a0, a0, 16 -; CHECK-RV64-FV-NEXT: vl1r.v v1, (a0) # vscale x 8-byte Folded Reload -; CHECK-RV64-FV-NEXT: csrr a0, vlenb -; CHECK-RV64-FV-NEXT: mv a1, a0 -; CHECK-RV64-FV-NEXT: slli a0, a0, 2 -; CHECK-RV64-FV-NEXT: add a1, a1, a0 -; CHECK-RV64-FV-NEXT: slli a0, a0, 1 -; CHECK-RV64-FV-NEXT: add a1, a1, a0 -; CHECK-RV64-FV-NEXT: slli a0, a0, 1 -; CHECK-RV64-FV-NEXT: add a0, a0, a1 -; CHECK-RV64-FV-NEXT: add a0, sp, a0 -; CHECK-RV64-FV-NEXT: addi a0, a0, 16 -; CHECK-RV64-FV-NEXT: vl1r.v v2, (a0) # vscale x 8-byte Folded Reload -; CHECK-RV64-FV-NEXT: csrr a0, vlenb -; CHECK-RV64-FV-NEXT: slli a0, a0, 2 -; CHECK-RV64-FV-NEXT: mv a1, a0 -; CHECK-RV64-FV-NEXT: slli a0, a0, 1 -; CHECK-RV64-FV-NEXT: add a1, a1, a0 -; CHECK-RV64-FV-NEXT: slli a0, a0, 1 -; CHECK-RV64-FV-NEXT: add a0, a0, a1 -; CHECK-RV64-FV-NEXT: add a0, sp, a0 -; CHECK-RV64-FV-NEXT: addi a0, a0, 16 -; CHECK-RV64-FV-NEXT: vl1r.v v3, (a0) # vscale x 8-byte Folded Reload -; CHECK-RV64-FV-NEXT: csrr a0, vlenb -; CHECK-RV64-FV-NEXT: mv a1, a0 -; CHECK-RV64-FV-NEXT: slli a0, a0, 1 -; CHECK-RV64-FV-NEXT: add a1, a1, a0 -; CHECK-RV64-FV-NEXT: slli a0, a0, 2 -; CHECK-RV64-FV-NEXT: add a1, a1, a0 -; CHECK-RV64-FV-NEXT: slli a0, a0, 1 -; CHECK-RV64-FV-NEXT: add a0, a0, a1 -; CHECK-RV64-FV-NEXT: add a0, sp, a0 -; CHECK-RV64-FV-NEXT: addi a0, a0, 16 -; CHECK-RV64-FV-NEXT: vl1r.v v4, (a0) # vscale x 8-byte Folded Reload -; CHECK-RV64-FV-NEXT: csrr a0, vlenb -; CHECK-RV64-FV-NEXT: slli a0, a0, 1 -; CHECK-RV64-FV-NEXT: mv a1, a0 -; CHECK-RV64-FV-NEXT: slli a0, a0, 2 -; CHECK-RV64-FV-NEXT: add a1, a1, a0 -; CHECK-RV64-FV-NEXT: slli a0, a0, 1 -; CHECK-RV64-FV-NEXT: add a0, a0, a1 -; CHECK-RV64-FV-NEXT: add a0, sp, a0 -; CHECK-RV64-FV-NEXT: addi a0, a0, 16 -; CHECK-RV64-FV-NEXT: vl1r.v v5, (a0) # vscale x 8-byte Folded Reload -; CHECK-RV64-FV-NEXT: csrr a0, vlenb -; CHECK-RV64-FV-NEXT: mv a1, a0 -; CHECK-RV64-FV-NEXT: slli a0, a0, 3 -; CHECK-RV64-FV-NEXT: add a1, a1, a0 -; CHECK-RV64-FV-NEXT: slli a0, a0, 1 -; CHECK-RV64-FV-NEXT: add a0, a0, a1 -; CHECK-RV64-FV-NEXT: add a0, sp, a0 -; CHECK-RV64-FV-NEXT: addi a0, a0, 16 -; CHECK-RV64-FV-NEXT: vl1r.v v6, (a0) # vscale x 8-byte Folded Reload -; CHECK-RV64-FV-NEXT: csrr a0, vlenb ; CHECK-RV64-FV-NEXT: slli a0, a0, 3 ; CHECK-RV64-FV-NEXT: mv a1, a0 ; CHECK-RV64-FV-NEXT: slli a0, a0, 1 ; CHECK-RV64-FV-NEXT: add a0, a0, a1 ; CHECK-RV64-FV-NEXT: add a0, sp, a0 ; CHECK-RV64-FV-NEXT: addi a0, a0, 16 -; CHECK-RV64-FV-NEXT: vl1r.v v7, (a0) # vscale x 8-byte Folded Reload -; CHECK-RV64-FV-NEXT: csrr a0, vlenb -; CHECK-RV64-FV-NEXT: mv a1, a0 -; CHECK-RV64-FV-NEXT: slli a0, a0, 1 -; CHECK-RV64-FV-NEXT: add a1, a1, a0 -; CHECK-RV64-FV-NEXT: slli a0, a0, 1 -; CHECK-RV64-FV-NEXT: add a1, a1, a0 -; CHECK-RV64-FV-NEXT: slli a0, a0, 2 -; CHECK-RV64-FV-NEXT: add a0, a0, a1 -; CHECK-RV64-FV-NEXT: add a0, sp, a0 -; CHECK-RV64-FV-NEXT: addi a0, a0, 16 -; CHECK-RV64-FV-NEXT: vl1r.v v8, (a0) # vscale x 8-byte Folded Reload -; CHECK-RV64-FV-NEXT: csrr a0, vlenb -; CHECK-RV64-FV-NEXT: slli a0, a0, 1 -; CHECK-RV64-FV-NEXT: mv a1, a0 -; CHECK-RV64-FV-NEXT: slli a0, a0, 1 -; CHECK-RV64-FV-NEXT: add a1, a1, a0 -; CHECK-RV64-FV-NEXT: slli a0, a0, 2 -; CHECK-RV64-FV-NEXT: add a0, a0, a1 -; CHECK-RV64-FV-NEXT: add a0, sp, a0 -; CHECK-RV64-FV-NEXT: addi a0, a0, 16 -; CHECK-RV64-FV-NEXT: vl1r.v v9, (a0) # vscale x 8-byte Folded Reload -; CHECK-RV64-FV-NEXT: csrr a0, vlenb -; CHECK-RV64-FV-NEXT: mv a1, a0 -; CHECK-RV64-FV-NEXT: slli a0, a0, 2 -; CHECK-RV64-FV-NEXT: add a1, a1, a0 -; CHECK-RV64-FV-NEXT: slli a0, a0, 2 -; CHECK-RV64-FV-NEXT: add a0, a0, a1 -; CHECK-RV64-FV-NEXT: add a0, sp, a0 -; CHECK-RV64-FV-NEXT: addi a0, a0, 16 -; CHECK-RV64-FV-NEXT: vl1r.v v10, (a0) # vscale x 8-byte Folded Reload -; CHECK-RV64-FV-NEXT: csrr a0, vlenb -; CHECK-RV64-FV-NEXT: slli a0, a0, 2 -; CHECK-RV64-FV-NEXT: mv a1, a0 -; CHECK-RV64-FV-NEXT: slli a0, a0, 2 -; CHECK-RV64-FV-NEXT: add a0, a0, a1 -; CHECK-RV64-FV-NEXT: add a0, sp, a0 -; CHECK-RV64-FV-NEXT: addi a0, a0, 16 -; CHECK-RV64-FV-NEXT: vl1r.v v11, (a0) # vscale x 8-byte Folded Reload -; CHECK-RV64-FV-NEXT: csrr a0, vlenb -; CHECK-RV64-FV-NEXT: mv a1, a0 -; CHECK-RV64-FV-NEXT: slli a0, a0, 1 -; CHECK-RV64-FV-NEXT: add a1, a1, a0 -; CHECK-RV64-FV-NEXT: slli a0, a0, 3 -; CHECK-RV64-FV-NEXT: add a0, a0, a1 -; CHECK-RV64-FV-NEXT: add a0, sp, a0 -; CHECK-RV64-FV-NEXT: addi a0, a0, 16 -; CHECK-RV64-FV-NEXT: vl1r.v v12, (a0) # vscale x 8-byte Folded Reload -; CHECK-RV64-FV-NEXT: csrr a0, vlenb -; CHECK-RV64-FV-NEXT: slli a0, a0, 1 -; CHECK-RV64-FV-NEXT: mv a1, a0 -; CHECK-RV64-FV-NEXT: slli a0, a0, 3 -; CHECK-RV64-FV-NEXT: add a0, a0, a1 -; CHECK-RV64-FV-NEXT: add a0, sp, a0 -; CHECK-RV64-FV-NEXT: addi a0, a0, 16 -; CHECK-RV64-FV-NEXT: vl1r.v v13, (a0) # vscale x 8-byte Folded Reload -; CHECK-RV64-FV-NEXT: csrr a0, vlenb -; CHECK-RV64-FV-NEXT: slli a1, a0, 4 -; CHECK-RV64-FV-NEXT: add a0, a1, a0 -; CHECK-RV64-FV-NEXT: add a0, sp, a0 -; CHECK-RV64-FV-NEXT: addi a0, a0, 16 -; CHECK-RV64-FV-NEXT: vl1r.v v14, (a0) # vscale x 8-byte Folded Reload +; CHECK-RV64-FV-NEXT: vl8r.v v0, (a0) # vscale x 64-byte Folded Reload ; CHECK-RV64-FV-NEXT: csrr a0, vlenb ; CHECK-RV64-FV-NEXT: slli a0, a0, 4 ; CHECK-RV64-FV-NEXT: add a0, sp, a0 ; CHECK-RV64-FV-NEXT: addi a0, a0, 16 -; CHECK-RV64-FV-NEXT: vl1r.v v15, (a0) # vscale x 8-byte Folded Reload -; CHECK-RV64-FV-NEXT: csrr a0, vlenb -; CHECK-RV64-FV-NEXT: slli a1, a0, 4 -; CHECK-RV64-FV-NEXT: sub a0, a1, a0 -; CHECK-RV64-FV-NEXT: add a0, sp, a0 -; CHECK-RV64-FV-NEXT: addi a0, a0, 16 -; CHECK-RV64-FV-NEXT: vl1r.v v16, (a0) # vscale x 8-byte Folded Reload -; CHECK-RV64-FV-NEXT: csrr a0, vlenb -; CHECK-RV64-FV-NEXT: slli a0, a0, 1 -; CHECK-RV64-FV-NEXT: mv a1, a0 -; CHECK-RV64-FV-NEXT: slli a0, a0, 1 -; CHECK-RV64-FV-NEXT: add a1, a1, a0 -; CHECK-RV64-FV-NEXT: slli a0, a0, 1 -; CHECK-RV64-FV-NEXT: add a0, a0, a1 -; CHECK-RV64-FV-NEXT: add a0, sp, a0 -; CHECK-RV64-FV-NEXT: addi a0, a0, 16 -; CHECK-RV64-FV-NEXT: vl1r.v v17, (a0) # vscale x 8-byte Folded Reload -; CHECK-RV64-FV-NEXT: csrr a0, vlenb -; CHECK-RV64-FV-NEXT: mv a1, a0 -; CHECK-RV64-FV-NEXT: slli a0, a0, 2 -; CHECK-RV64-FV-NEXT: add a1, a1, a0 -; CHECK-RV64-FV-NEXT: slli a0, a0, 1 -; CHECK-RV64-FV-NEXT: add a0, a0, a1 -; CHECK-RV64-FV-NEXT: add a0, sp, a0 -; CHECK-RV64-FV-NEXT: addi a0, a0, 16 -; CHECK-RV64-FV-NEXT: vl1r.v v18, (a0) # vscale x 8-byte Folded Reload -; CHECK-RV64-FV-NEXT: csrr a0, vlenb -; CHECK-RV64-FV-NEXT: slli a0, a0, 2 -; CHECK-RV64-FV-NEXT: mv a1, a0 -; CHECK-RV64-FV-NEXT: slli a0, a0, 1 -; CHECK-RV64-FV-NEXT: add a0, a0, a1 -; CHECK-RV64-FV-NEXT: add a0, sp, a0 -; CHECK-RV64-FV-NEXT: addi a0, a0, 16 -; CHECK-RV64-FV-NEXT: vl1r.v v19, (a0) # vscale x 8-byte Folded Reload -; CHECK-RV64-FV-NEXT: csrr a0, vlenb -; CHECK-RV64-FV-NEXT: mv a1, a0 -; CHECK-RV64-FV-NEXT: slli a0, a0, 1 -; CHECK-RV64-FV-NEXT: add a1, a1, a0 -; CHECK-RV64-FV-NEXT: slli a0, a0, 2 -; CHECK-RV64-FV-NEXT: add a0, a0, a1 -; CHECK-RV64-FV-NEXT: add a0, sp, a0 -; CHECK-RV64-FV-NEXT: addi a0, a0, 16 -; CHECK-RV64-FV-NEXT: vl1r.v v20, (a0) # vscale x 8-byte Folded Reload -; CHECK-RV64-FV-NEXT: csrr a0, vlenb -; CHECK-RV64-FV-NEXT: slli a0, a0, 1 -; CHECK-RV64-FV-NEXT: mv a1, a0 -; CHECK-RV64-FV-NEXT: slli a0, a0, 2 -; CHECK-RV64-FV-NEXT: add a0, a0, a1 -; CHECK-RV64-FV-NEXT: add a0, sp, a0 -; CHECK-RV64-FV-NEXT: addi a0, a0, 16 -; CHECK-RV64-FV-NEXT: vl1r.v v21, (a0) # vscale x 8-byte Folded Reload -; CHECK-RV64-FV-NEXT: csrr a0, vlenb -; CHECK-RV64-FV-NEXT: slli a1, a0, 3 -; CHECK-RV64-FV-NEXT: add a0, a1, a0 -; CHECK-RV64-FV-NEXT: add a0, sp, a0 -; CHECK-RV64-FV-NEXT: addi a0, a0, 16 -; CHECK-RV64-FV-NEXT: vl1r.v v22, (a0) # vscale x 8-byte Folded Reload +; CHECK-RV64-FV-NEXT: vl8r.v v8, (a0) # vscale x 64-byte Folded Reload ; CHECK-RV64-FV-NEXT: csrr a0, vlenb ; CHECK-RV64-FV-NEXT: slli a0, a0, 3 ; CHECK-RV64-FV-NEXT: add a0, sp, a0 ; CHECK-RV64-FV-NEXT: addi a0, a0, 16 -; CHECK-RV64-FV-NEXT: vl1r.v v23, (a0) # vscale x 8-byte Folded Reload -; CHECK-RV64-FV-NEXT: csrr a0, vlenb -; CHECK-RV64-FV-NEXT: slli a1, a0, 3 -; CHECK-RV64-FV-NEXT: sub a0, a1, a0 -; CHECK-RV64-FV-NEXT: add a0, sp, a0 -; CHECK-RV64-FV-NEXT: addi a0, a0, 16 -; CHECK-RV64-FV-NEXT: vl1r.v v24, (a0) # vscale x 8-byte Folded Reload -; CHECK-RV64-FV-NEXT: csrr a0, vlenb -; CHECK-RV64-FV-NEXT: slli a0, a0, 1 -; CHECK-RV64-FV-NEXT: mv a1, a0 -; CHECK-RV64-FV-NEXT: slli a0, a0, 1 -; CHECK-RV64-FV-NEXT: add a0, a0, a1 -; CHECK-RV64-FV-NEXT: add a0, sp, a0 -; CHECK-RV64-FV-NEXT: addi a0, a0, 16 -; CHECK-RV64-FV-NEXT: vl1r.v v25, (a0) # vscale x 8-byte Folded Reload -; CHECK-RV64-FV-NEXT: csrr a0, vlenb -; CHECK-RV64-FV-NEXT: slli a1, a0, 2 -; CHECK-RV64-FV-NEXT: add a0, a1, a0 -; CHECK-RV64-FV-NEXT: add a0, sp, a0 -; CHECK-RV64-FV-NEXT: addi a0, a0, 16 -; CHECK-RV64-FV-NEXT: vl1r.v v26, (a0) # vscale x 8-byte Folded Reload -; CHECK-RV64-FV-NEXT: csrr a0, vlenb -; CHECK-RV64-FV-NEXT: slli a0, a0, 2 -; CHECK-RV64-FV-NEXT: add a0, sp, a0 -; CHECK-RV64-FV-NEXT: addi a0, a0, 16 -; CHECK-RV64-FV-NEXT: vl1r.v v27, (a0) # vscale x 8-byte Folded Reload -; CHECK-RV64-FV-NEXT: csrr a0, vlenb -; CHECK-RV64-FV-NEXT: slli a1, a0, 1 -; CHECK-RV64-FV-NEXT: add a0, a1, a0 -; CHECK-RV64-FV-NEXT: add a0, sp, a0 -; CHECK-RV64-FV-NEXT: addi a0, a0, 16 -; CHECK-RV64-FV-NEXT: vl1r.v v28, (a0) # vscale x 8-byte Folded Reload -; CHECK-RV64-FV-NEXT: csrr a0, vlenb -; CHECK-RV64-FV-NEXT: slli a0, a0, 1 -; CHECK-RV64-FV-NEXT: add a0, sp, a0 -; CHECK-RV64-FV-NEXT: addi a0, a0, 16 -; CHECK-RV64-FV-NEXT: vl1r.v v29, (a0) # vscale x 8-byte Folded Reload -; CHECK-RV64-FV-NEXT: csrr a0, vlenb -; CHECK-RV64-FV-NEXT: add a0, sp, a0 -; CHECK-RV64-FV-NEXT: addi a0, a0, 16 -; CHECK-RV64-FV-NEXT: vl1r.v v30, (a0) # vscale x 8-byte Folded Reload +; CHECK-RV64-FV-NEXT: vl8r.v v16, (a0) # vscale x 64-byte Folded Reload ; CHECK-RV64-FV-NEXT: addi a0, sp, 16 -; CHECK-RV64-FV-NEXT: vl1r.v v31, (a0) # vscale x 8-byte Folded Reload +; CHECK-RV64-FV-NEXT: vl8r.v v24, (a0) # vscale x 64-byte Folded Reload ; CHECK-RV64-FV-NEXT: csrr a0, vlenb ; CHECK-RV64-FV-NEXT: slli a0, a0, 5 ; CHECK-RV64-FV-NEXT: add sp, sp, a0 @@ -4393,498 +2133,46 @@ define void @foo_with_call() #1 { ; CHECK-RV64-FDV-NEXT: slli a0, a0, 5 ; CHECK-RV64-FDV-NEXT: sub sp, sp, a0 ; CHECK-RV64-FDV-NEXT: csrr a0, vlenb -; CHECK-RV64-FDV-NEXT: slli a1, a0, 5 -; CHECK-RV64-FDV-NEXT: sub a0, a1, a0 -; CHECK-RV64-FDV-NEXT: add a0, sp, a0 -; CHECK-RV64-FDV-NEXT: addi a0, a0, 16 -; CHECK-RV64-FDV-NEXT: vs1r.v v0, (a0) # vscale x 8-byte Folded Spill -; CHECK-RV64-FDV-NEXT: csrr a0, vlenb -; CHECK-RV64-FDV-NEXT: slli a0, a0, 1 -; CHECK-RV64-FDV-NEXT: mv a1, a0 -; CHECK-RV64-FDV-NEXT: slli a0, a0, 1 -; CHECK-RV64-FDV-NEXT: add a1, a1, a0 -; CHECK-RV64-FDV-NEXT: slli a0, a0, 1 -; CHECK-RV64-FDV-NEXT: add a1, a1, a0 -; CHECK-RV64-FDV-NEXT: slli a0, a0, 1 -; CHECK-RV64-FDV-NEXT: add a0, a0, a1 -; CHECK-RV64-FDV-NEXT: add a0, sp, a0 -; CHECK-RV64-FDV-NEXT: addi a0, a0, 16 -; CHECK-RV64-FDV-NEXT: vs1r.v v1, (a0) # vscale x 8-byte Folded Spill -; CHECK-RV64-FDV-NEXT: csrr a0, vlenb -; CHECK-RV64-FDV-NEXT: mv a1, a0 -; CHECK-RV64-FDV-NEXT: slli a0, a0, 2 -; CHECK-RV64-FDV-NEXT: add a1, a1, a0 -; CHECK-RV64-FDV-NEXT: slli a0, a0, 1 -; CHECK-RV64-FDV-NEXT: add a1, a1, a0 -; CHECK-RV64-FDV-NEXT: slli a0, a0, 1 -; CHECK-RV64-FDV-NEXT: add a0, a0, a1 -; CHECK-RV64-FDV-NEXT: add a0, sp, a0 -; CHECK-RV64-FDV-NEXT: addi a0, a0, 16 -; CHECK-RV64-FDV-NEXT: vs1r.v v2, (a0) # vscale x 8-byte Folded Spill -; CHECK-RV64-FDV-NEXT: csrr a0, vlenb -; CHECK-RV64-FDV-NEXT: slli a0, a0, 2 -; CHECK-RV64-FDV-NEXT: mv a1, a0 -; CHECK-RV64-FDV-NEXT: slli a0, a0, 1 -; CHECK-RV64-FDV-NEXT: add a1, a1, a0 -; CHECK-RV64-FDV-NEXT: slli a0, a0, 1 -; CHECK-RV64-FDV-NEXT: add a0, a0, a1 -; CHECK-RV64-FDV-NEXT: add a0, sp, a0 -; CHECK-RV64-FDV-NEXT: addi a0, a0, 16 -; CHECK-RV64-FDV-NEXT: vs1r.v v3, (a0) # vscale x 8-byte Folded Spill -; CHECK-RV64-FDV-NEXT: csrr a0, vlenb -; CHECK-RV64-FDV-NEXT: mv a1, a0 -; CHECK-RV64-FDV-NEXT: slli a0, a0, 1 -; CHECK-RV64-FDV-NEXT: add a1, a1, a0 -; CHECK-RV64-FDV-NEXT: slli a0, a0, 2 -; CHECK-RV64-FDV-NEXT: add a1, a1, a0 -; CHECK-RV64-FDV-NEXT: slli a0, a0, 1 -; CHECK-RV64-FDV-NEXT: add a0, a0, a1 -; CHECK-RV64-FDV-NEXT: add a0, sp, a0 -; CHECK-RV64-FDV-NEXT: addi a0, a0, 16 -; CHECK-RV64-FDV-NEXT: vs1r.v v4, (a0) # vscale x 8-byte Folded Spill -; CHECK-RV64-FDV-NEXT: csrr a0, vlenb -; CHECK-RV64-FDV-NEXT: slli a0, a0, 1 -; CHECK-RV64-FDV-NEXT: mv a1, a0 -; CHECK-RV64-FDV-NEXT: slli a0, a0, 2 -; CHECK-RV64-FDV-NEXT: add a1, a1, a0 -; CHECK-RV64-FDV-NEXT: slli a0, a0, 1 -; CHECK-RV64-FDV-NEXT: add a0, a0, a1 -; CHECK-RV64-FDV-NEXT: add a0, sp, a0 -; CHECK-RV64-FDV-NEXT: addi a0, a0, 16 -; CHECK-RV64-FDV-NEXT: vs1r.v v5, (a0) # vscale x 8-byte Folded Spill -; CHECK-RV64-FDV-NEXT: csrr a0, vlenb -; CHECK-RV64-FDV-NEXT: mv a1, a0 ; CHECK-RV64-FDV-NEXT: slli a0, a0, 3 -; CHECK-RV64-FDV-NEXT: add a1, a1, a0 -; CHECK-RV64-FDV-NEXT: slli a0, a0, 1 -; CHECK-RV64-FDV-NEXT: add a0, a0, a1 -; CHECK-RV64-FDV-NEXT: add a0, sp, a0 -; CHECK-RV64-FDV-NEXT: addi a0, a0, 16 -; CHECK-RV64-FDV-NEXT: vs1r.v v6, (a0) # vscale x 8-byte Folded Spill -; CHECK-RV64-FDV-NEXT: csrr a0, vlenb -; CHECK-RV64-FDV-NEXT: slli a0, a0, 3 -; CHECK-RV64-FDV-NEXT: mv a1, a0 -; CHECK-RV64-FDV-NEXT: slli a0, a0, 1 -; CHECK-RV64-FDV-NEXT: add a0, a0, a1 -; CHECK-RV64-FDV-NEXT: add a0, sp, a0 -; CHECK-RV64-FDV-NEXT: addi a0, a0, 16 -; CHECK-RV64-FDV-NEXT: vs1r.v v7, (a0) # vscale x 8-byte Folded Spill -; CHECK-RV64-FDV-NEXT: csrr a0, vlenb -; CHECK-RV64-FDV-NEXT: mv a1, a0 -; CHECK-RV64-FDV-NEXT: slli a0, a0, 1 -; CHECK-RV64-FDV-NEXT: add a1, a1, a0 -; CHECK-RV64-FDV-NEXT: slli a0, a0, 1 -; CHECK-RV64-FDV-NEXT: add a1, a1, a0 -; CHECK-RV64-FDV-NEXT: slli a0, a0, 2 -; CHECK-RV64-FDV-NEXT: add a0, a0, a1 -; CHECK-RV64-FDV-NEXT: add a0, sp, a0 -; CHECK-RV64-FDV-NEXT: addi a0, a0, 16 -; CHECK-RV64-FDV-NEXT: vs1r.v v8, (a0) # vscale x 8-byte Folded Spill -; CHECK-RV64-FDV-NEXT: csrr a0, vlenb -; CHECK-RV64-FDV-NEXT: slli a0, a0, 1 -; CHECK-RV64-FDV-NEXT: mv a1, a0 -; CHECK-RV64-FDV-NEXT: slli a0, a0, 1 -; CHECK-RV64-FDV-NEXT: add a1, a1, a0 -; CHECK-RV64-FDV-NEXT: slli a0, a0, 2 -; CHECK-RV64-FDV-NEXT: add a0, a0, a1 -; CHECK-RV64-FDV-NEXT: add a0, sp, a0 -; CHECK-RV64-FDV-NEXT: addi a0, a0, 16 -; CHECK-RV64-FDV-NEXT: vs1r.v v9, (a0) # vscale x 8-byte Folded Spill -; CHECK-RV64-FDV-NEXT: csrr a0, vlenb -; CHECK-RV64-FDV-NEXT: mv a1, a0 -; CHECK-RV64-FDV-NEXT: slli a0, a0, 2 -; CHECK-RV64-FDV-NEXT: add a1, a1, a0 -; CHECK-RV64-FDV-NEXT: slli a0, a0, 2 -; CHECK-RV64-FDV-NEXT: add a0, a0, a1 -; CHECK-RV64-FDV-NEXT: add a0, sp, a0 -; CHECK-RV64-FDV-NEXT: addi a0, a0, 16 -; CHECK-RV64-FDV-NEXT: vs1r.v v10, (a0) # vscale x 8-byte Folded Spill -; CHECK-RV64-FDV-NEXT: csrr a0, vlenb -; CHECK-RV64-FDV-NEXT: slli a0, a0, 2 -; CHECK-RV64-FDV-NEXT: mv a1, a0 -; CHECK-RV64-FDV-NEXT: slli a0, a0, 2 -; CHECK-RV64-FDV-NEXT: add a0, a0, a1 -; CHECK-RV64-FDV-NEXT: add a0, sp, a0 -; CHECK-RV64-FDV-NEXT: addi a0, a0, 16 -; CHECK-RV64-FDV-NEXT: vs1r.v v11, (a0) # vscale x 8-byte Folded Spill -; CHECK-RV64-FDV-NEXT: csrr a0, vlenb ; CHECK-RV64-FDV-NEXT: mv a1, a0 ; CHECK-RV64-FDV-NEXT: slli a0, a0, 1 -; CHECK-RV64-FDV-NEXT: add a1, a1, a0 -; CHECK-RV64-FDV-NEXT: slli a0, a0, 3 ; CHECK-RV64-FDV-NEXT: add a0, a0, a1 ; CHECK-RV64-FDV-NEXT: add a0, sp, a0 ; CHECK-RV64-FDV-NEXT: addi a0, a0, 16 -; CHECK-RV64-FDV-NEXT: vs1r.v v12, (a0) # vscale x 8-byte Folded Spill -; CHECK-RV64-FDV-NEXT: csrr a0, vlenb -; CHECK-RV64-FDV-NEXT: slli a0, a0, 1 -; CHECK-RV64-FDV-NEXT: mv a1, a0 -; CHECK-RV64-FDV-NEXT: slli a0, a0, 3 -; CHECK-RV64-FDV-NEXT: add a0, a0, a1 -; CHECK-RV64-FDV-NEXT: add a0, sp, a0 -; CHECK-RV64-FDV-NEXT: addi a0, a0, 16 -; CHECK-RV64-FDV-NEXT: vs1r.v v13, (a0) # vscale x 8-byte Folded Spill -; CHECK-RV64-FDV-NEXT: csrr a0, vlenb -; CHECK-RV64-FDV-NEXT: slli a1, a0, 4 -; CHECK-RV64-FDV-NEXT: add a0, a1, a0 -; CHECK-RV64-FDV-NEXT: add a0, sp, a0 -; CHECK-RV64-FDV-NEXT: addi a0, a0, 16 -; CHECK-RV64-FDV-NEXT: vs1r.v v14, (a0) # vscale x 8-byte Folded Spill +; CHECK-RV64-FDV-NEXT: vs8r.v v0, (a0) # vscale x 64-byte Folded Spill ; CHECK-RV64-FDV-NEXT: csrr a0, vlenb ; CHECK-RV64-FDV-NEXT: slli a0, a0, 4 ; CHECK-RV64-FDV-NEXT: add a0, sp, a0 ; CHECK-RV64-FDV-NEXT: addi a0, a0, 16 -; CHECK-RV64-FDV-NEXT: vs1r.v v15, (a0) # vscale x 8-byte Folded Spill -; CHECK-RV64-FDV-NEXT: csrr a0, vlenb -; CHECK-RV64-FDV-NEXT: slli a1, a0, 4 -; CHECK-RV64-FDV-NEXT: sub a0, a1, a0 -; CHECK-RV64-FDV-NEXT: add a0, sp, a0 -; CHECK-RV64-FDV-NEXT: addi a0, a0, 16 -; CHECK-RV64-FDV-NEXT: vs1r.v v16, (a0) # vscale x 8-byte Folded Spill -; CHECK-RV64-FDV-NEXT: csrr a0, vlenb -; CHECK-RV64-FDV-NEXT: slli a0, a0, 1 -; CHECK-RV64-FDV-NEXT: mv a1, a0 -; CHECK-RV64-FDV-NEXT: slli a0, a0, 1 -; CHECK-RV64-FDV-NEXT: add a1, a1, a0 -; CHECK-RV64-FDV-NEXT: slli a0, a0, 1 -; CHECK-RV64-FDV-NEXT: add a0, a0, a1 -; CHECK-RV64-FDV-NEXT: add a0, sp, a0 -; CHECK-RV64-FDV-NEXT: addi a0, a0, 16 -; CHECK-RV64-FDV-NEXT: vs1r.v v17, (a0) # vscale x 8-byte Folded Spill -; CHECK-RV64-FDV-NEXT: csrr a0, vlenb -; CHECK-RV64-FDV-NEXT: mv a1, a0 -; CHECK-RV64-FDV-NEXT: slli a0, a0, 2 -; CHECK-RV64-FDV-NEXT: add a1, a1, a0 -; CHECK-RV64-FDV-NEXT: slli a0, a0, 1 -; CHECK-RV64-FDV-NEXT: add a0, a0, a1 -; CHECK-RV64-FDV-NEXT: add a0, sp, a0 -; CHECK-RV64-FDV-NEXT: addi a0, a0, 16 -; CHECK-RV64-FDV-NEXT: vs1r.v v18, (a0) # vscale x 8-byte Folded Spill -; CHECK-RV64-FDV-NEXT: csrr a0, vlenb -; CHECK-RV64-FDV-NEXT: slli a0, a0, 2 -; CHECK-RV64-FDV-NEXT: mv a1, a0 -; CHECK-RV64-FDV-NEXT: slli a0, a0, 1 -; CHECK-RV64-FDV-NEXT: add a0, a0, a1 -; CHECK-RV64-FDV-NEXT: add a0, sp, a0 -; CHECK-RV64-FDV-NEXT: addi a0, a0, 16 -; CHECK-RV64-FDV-NEXT: vs1r.v v19, (a0) # vscale x 8-byte Folded Spill -; CHECK-RV64-FDV-NEXT: csrr a0, vlenb -; CHECK-RV64-FDV-NEXT: mv a1, a0 -; CHECK-RV64-FDV-NEXT: slli a0, a0, 1 -; CHECK-RV64-FDV-NEXT: add a1, a1, a0 -; CHECK-RV64-FDV-NEXT: slli a0, a0, 2 -; CHECK-RV64-FDV-NEXT: add a0, a0, a1 -; CHECK-RV64-FDV-NEXT: add a0, sp, a0 -; CHECK-RV64-FDV-NEXT: addi a0, a0, 16 -; CHECK-RV64-FDV-NEXT: vs1r.v v20, (a0) # vscale x 8-byte Folded Spill -; CHECK-RV64-FDV-NEXT: csrr a0, vlenb -; CHECK-RV64-FDV-NEXT: slli a0, a0, 1 -; CHECK-RV64-FDV-NEXT: mv a1, a0 -; CHECK-RV64-FDV-NEXT: slli a0, a0, 2 -; CHECK-RV64-FDV-NEXT: add a0, a0, a1 -; CHECK-RV64-FDV-NEXT: add a0, sp, a0 -; CHECK-RV64-FDV-NEXT: addi a0, a0, 16 -; CHECK-RV64-FDV-NEXT: vs1r.v v21, (a0) # vscale x 8-byte Folded Spill -; CHECK-RV64-FDV-NEXT: csrr a0, vlenb -; CHECK-RV64-FDV-NEXT: slli a1, a0, 3 -; CHECK-RV64-FDV-NEXT: add a0, a1, a0 -; CHECK-RV64-FDV-NEXT: add a0, sp, a0 -; CHECK-RV64-FDV-NEXT: addi a0, a0, 16 -; CHECK-RV64-FDV-NEXT: vs1r.v v22, (a0) # vscale x 8-byte Folded Spill +; CHECK-RV64-FDV-NEXT: vs8r.v v8, (a0) # vscale x 64-byte Folded Spill ; CHECK-RV64-FDV-NEXT: csrr a0, vlenb ; CHECK-RV64-FDV-NEXT: slli a0, a0, 3 ; CHECK-RV64-FDV-NEXT: add a0, sp, a0 ; CHECK-RV64-FDV-NEXT: addi a0, a0, 16 -; CHECK-RV64-FDV-NEXT: vs1r.v v23, (a0) # vscale x 8-byte Folded Spill -; CHECK-RV64-FDV-NEXT: csrr a0, vlenb -; CHECK-RV64-FDV-NEXT: slli a1, a0, 3 -; CHECK-RV64-FDV-NEXT: sub a0, a1, a0 -; CHECK-RV64-FDV-NEXT: add a0, sp, a0 -; CHECK-RV64-FDV-NEXT: addi a0, a0, 16 -; CHECK-RV64-FDV-NEXT: vs1r.v v24, (a0) # vscale x 8-byte Folded Spill -; CHECK-RV64-FDV-NEXT: csrr a0, vlenb -; CHECK-RV64-FDV-NEXT: slli a0, a0, 1 -; CHECK-RV64-FDV-NEXT: mv a1, a0 -; CHECK-RV64-FDV-NEXT: slli a0, a0, 1 -; CHECK-RV64-FDV-NEXT: add a0, a0, a1 -; CHECK-RV64-FDV-NEXT: add a0, sp, a0 -; CHECK-RV64-FDV-NEXT: addi a0, a0, 16 -; CHECK-RV64-FDV-NEXT: vs1r.v v25, (a0) # vscale x 8-byte Folded Spill -; CHECK-RV64-FDV-NEXT: csrr a0, vlenb -; CHECK-RV64-FDV-NEXT: slli a1, a0, 2 -; CHECK-RV64-FDV-NEXT: add a0, a1, a0 -; CHECK-RV64-FDV-NEXT: add a0, sp, a0 -; CHECK-RV64-FDV-NEXT: addi a0, a0, 16 -; CHECK-RV64-FDV-NEXT: vs1r.v v26, (a0) # vscale x 8-byte Folded Spill -; CHECK-RV64-FDV-NEXT: csrr a0, vlenb -; CHECK-RV64-FDV-NEXT: slli a0, a0, 2 -; CHECK-RV64-FDV-NEXT: add a0, sp, a0 -; CHECK-RV64-FDV-NEXT: addi a0, a0, 16 -; CHECK-RV64-FDV-NEXT: vs1r.v v27, (a0) # vscale x 8-byte Folded Spill -; CHECK-RV64-FDV-NEXT: csrr a0, vlenb -; CHECK-RV64-FDV-NEXT: slli a1, a0, 1 -; CHECK-RV64-FDV-NEXT: add a0, a1, a0 -; CHECK-RV64-FDV-NEXT: add a0, sp, a0 -; CHECK-RV64-FDV-NEXT: addi a0, a0, 16 -; CHECK-RV64-FDV-NEXT: vs1r.v v28, (a0) # vscale x 8-byte Folded Spill -; CHECK-RV64-FDV-NEXT: csrr a0, vlenb -; CHECK-RV64-FDV-NEXT: slli a0, a0, 1 -; CHECK-RV64-FDV-NEXT: add a0, sp, a0 -; CHECK-RV64-FDV-NEXT: addi a0, a0, 16 -; CHECK-RV64-FDV-NEXT: vs1r.v v29, (a0) # vscale x 8-byte Folded Spill -; CHECK-RV64-FDV-NEXT: csrr a0, vlenb -; CHECK-RV64-FDV-NEXT: add a0, sp, a0 -; CHECK-RV64-FDV-NEXT: addi a0, a0, 16 -; CHECK-RV64-FDV-NEXT: vs1r.v v30, (a0) # vscale x 8-byte Folded Spill +; CHECK-RV64-FDV-NEXT: vs8r.v v16, (a0) # vscale x 64-byte Folded Spill ; CHECK-RV64-FDV-NEXT: addi a0, sp, 16 -; CHECK-RV64-FDV-NEXT: vs1r.v v31, (a0) # vscale x 8-byte Folded Spill +; CHECK-RV64-FDV-NEXT: vs8r.v v24, (a0) # vscale x 64-byte Folded Spill ; CHECK-RV64-FDV-NEXT: call otherfoo ; CHECK-RV64-FDV-NEXT: csrr a0, vlenb -; CHECK-RV64-FDV-NEXT: slli a1, a0, 5 -; CHECK-RV64-FDV-NEXT: sub a0, a1, a0 -; CHECK-RV64-FDV-NEXT: add a0, sp, a0 -; CHECK-RV64-FDV-NEXT: addi a0, a0, 16 -; CHECK-RV64-FDV-NEXT: vl1r.v v0, (a0) # vscale x 8-byte Folded Reload -; CHECK-RV64-FDV-NEXT: csrr a0, vlenb -; CHECK-RV64-FDV-NEXT: slli a0, a0, 1 -; CHECK-RV64-FDV-NEXT: mv a1, a0 -; CHECK-RV64-FDV-NEXT: slli a0, a0, 1 -; CHECK-RV64-FDV-NEXT: add a1, a1, a0 -; CHECK-RV64-FDV-NEXT: slli a0, a0, 1 -; CHECK-RV64-FDV-NEXT: add a1, a1, a0 -; CHECK-RV64-FDV-NEXT: slli a0, a0, 1 -; CHECK-RV64-FDV-NEXT: add a0, a0, a1 -; CHECK-RV64-FDV-NEXT: add a0, sp, a0 -; CHECK-RV64-FDV-NEXT: addi a0, a0, 16 -; CHECK-RV64-FDV-NEXT: vl1r.v v1, (a0) # vscale x 8-byte Folded Reload -; CHECK-RV64-FDV-NEXT: csrr a0, vlenb -; CHECK-RV64-FDV-NEXT: mv a1, a0 -; CHECK-RV64-FDV-NEXT: slli a0, a0, 2 -; CHECK-RV64-FDV-NEXT: add a1, a1, a0 -; CHECK-RV64-FDV-NEXT: slli a0, a0, 1 -; CHECK-RV64-FDV-NEXT: add a1, a1, a0 -; CHECK-RV64-FDV-NEXT: slli a0, a0, 1 -; CHECK-RV64-FDV-NEXT: add a0, a0, a1 -; CHECK-RV64-FDV-NEXT: add a0, sp, a0 -; CHECK-RV64-FDV-NEXT: addi a0, a0, 16 -; CHECK-RV64-FDV-NEXT: vl1r.v v2, (a0) # vscale x 8-byte Folded Reload -; CHECK-RV64-FDV-NEXT: csrr a0, vlenb -; CHECK-RV64-FDV-NEXT: slli a0, a0, 2 -; CHECK-RV64-FDV-NEXT: mv a1, a0 -; CHECK-RV64-FDV-NEXT: slli a0, a0, 1 -; CHECK-RV64-FDV-NEXT: add a1, a1, a0 -; CHECK-RV64-FDV-NEXT: slli a0, a0, 1 -; CHECK-RV64-FDV-NEXT: add a0, a0, a1 -; CHECK-RV64-FDV-NEXT: add a0, sp, a0 -; CHECK-RV64-FDV-NEXT: addi a0, a0, 16 -; CHECK-RV64-FDV-NEXT: vl1r.v v3, (a0) # vscale x 8-byte Folded Reload -; CHECK-RV64-FDV-NEXT: csrr a0, vlenb -; CHECK-RV64-FDV-NEXT: mv a1, a0 -; CHECK-RV64-FDV-NEXT: slli a0, a0, 1 -; CHECK-RV64-FDV-NEXT: add a1, a1, a0 -; CHECK-RV64-FDV-NEXT: slli a0, a0, 2 -; CHECK-RV64-FDV-NEXT: add a1, a1, a0 -; CHECK-RV64-FDV-NEXT: slli a0, a0, 1 -; CHECK-RV64-FDV-NEXT: add a0, a0, a1 -; CHECK-RV64-FDV-NEXT: add a0, sp, a0 -; CHECK-RV64-FDV-NEXT: addi a0, a0, 16 -; CHECK-RV64-FDV-NEXT: vl1r.v v4, (a0) # vscale x 8-byte Folded Reload -; CHECK-RV64-FDV-NEXT: csrr a0, vlenb -; CHECK-RV64-FDV-NEXT: slli a0, a0, 1 -; CHECK-RV64-FDV-NEXT: mv a1, a0 -; CHECK-RV64-FDV-NEXT: slli a0, a0, 2 -; CHECK-RV64-FDV-NEXT: add a1, a1, a0 -; CHECK-RV64-FDV-NEXT: slli a0, a0, 1 -; CHECK-RV64-FDV-NEXT: add a0, a0, a1 -; CHECK-RV64-FDV-NEXT: add a0, sp, a0 -; CHECK-RV64-FDV-NEXT: addi a0, a0, 16 -; CHECK-RV64-FDV-NEXT: vl1r.v v5, (a0) # vscale x 8-byte Folded Reload -; CHECK-RV64-FDV-NEXT: csrr a0, vlenb -; CHECK-RV64-FDV-NEXT: mv a1, a0 -; CHECK-RV64-FDV-NEXT: slli a0, a0, 3 -; CHECK-RV64-FDV-NEXT: add a1, a1, a0 -; CHECK-RV64-FDV-NEXT: slli a0, a0, 1 -; CHECK-RV64-FDV-NEXT: add a0, a0, a1 -; CHECK-RV64-FDV-NEXT: add a0, sp, a0 -; CHECK-RV64-FDV-NEXT: addi a0, a0, 16 -; CHECK-RV64-FDV-NEXT: vl1r.v v6, (a0) # vscale x 8-byte Folded Reload -; CHECK-RV64-FDV-NEXT: csrr a0, vlenb ; CHECK-RV64-FDV-NEXT: slli a0, a0, 3 ; CHECK-RV64-FDV-NEXT: mv a1, a0 ; CHECK-RV64-FDV-NEXT: slli a0, a0, 1 ; CHECK-RV64-FDV-NEXT: add a0, a0, a1 ; CHECK-RV64-FDV-NEXT: add a0, sp, a0 ; CHECK-RV64-FDV-NEXT: addi a0, a0, 16 -; CHECK-RV64-FDV-NEXT: vl1r.v v7, (a0) # vscale x 8-byte Folded Reload -; CHECK-RV64-FDV-NEXT: csrr a0, vlenb -; CHECK-RV64-FDV-NEXT: mv a1, a0 -; CHECK-RV64-FDV-NEXT: slli a0, a0, 1 -; CHECK-RV64-FDV-NEXT: add a1, a1, a0 -; CHECK-RV64-FDV-NEXT: slli a0, a0, 1 -; CHECK-RV64-FDV-NEXT: add a1, a1, a0 -; CHECK-RV64-FDV-NEXT: slli a0, a0, 2 -; CHECK-RV64-FDV-NEXT: add a0, a0, a1 -; CHECK-RV64-FDV-NEXT: add a0, sp, a0 -; CHECK-RV64-FDV-NEXT: addi a0, a0, 16 -; CHECK-RV64-FDV-NEXT: vl1r.v v8, (a0) # vscale x 8-byte Folded Reload -; CHECK-RV64-FDV-NEXT: csrr a0, vlenb -; CHECK-RV64-FDV-NEXT: slli a0, a0, 1 -; CHECK-RV64-FDV-NEXT: mv a1, a0 -; CHECK-RV64-FDV-NEXT: slli a0, a0, 1 -; CHECK-RV64-FDV-NEXT: add a1, a1, a0 -; CHECK-RV64-FDV-NEXT: slli a0, a0, 2 -; CHECK-RV64-FDV-NEXT: add a0, a0, a1 -; CHECK-RV64-FDV-NEXT: add a0, sp, a0 -; CHECK-RV64-FDV-NEXT: addi a0, a0, 16 -; CHECK-RV64-FDV-NEXT: vl1r.v v9, (a0) # vscale x 8-byte Folded Reload -; CHECK-RV64-FDV-NEXT: csrr a0, vlenb -; CHECK-RV64-FDV-NEXT: mv a1, a0 -; CHECK-RV64-FDV-NEXT: slli a0, a0, 2 -; CHECK-RV64-FDV-NEXT: add a1, a1, a0 -; CHECK-RV64-FDV-NEXT: slli a0, a0, 2 -; CHECK-RV64-FDV-NEXT: add a0, a0, a1 -; CHECK-RV64-FDV-NEXT: add a0, sp, a0 -; CHECK-RV64-FDV-NEXT: addi a0, a0, 16 -; CHECK-RV64-FDV-NEXT: vl1r.v v10, (a0) # vscale x 8-byte Folded Reload -; CHECK-RV64-FDV-NEXT: csrr a0, vlenb -; CHECK-RV64-FDV-NEXT: slli a0, a0, 2 -; CHECK-RV64-FDV-NEXT: mv a1, a0 -; CHECK-RV64-FDV-NEXT: slli a0, a0, 2 -; CHECK-RV64-FDV-NEXT: add a0, a0, a1 -; CHECK-RV64-FDV-NEXT: add a0, sp, a0 -; CHECK-RV64-FDV-NEXT: addi a0, a0, 16 -; CHECK-RV64-FDV-NEXT: vl1r.v v11, (a0) # vscale x 8-byte Folded Reload -; CHECK-RV64-FDV-NEXT: csrr a0, vlenb -; CHECK-RV64-FDV-NEXT: mv a1, a0 -; CHECK-RV64-FDV-NEXT: slli a0, a0, 1 -; CHECK-RV64-FDV-NEXT: add a1, a1, a0 -; CHECK-RV64-FDV-NEXT: slli a0, a0, 3 -; CHECK-RV64-FDV-NEXT: add a0, a0, a1 -; CHECK-RV64-FDV-NEXT: add a0, sp, a0 -; CHECK-RV64-FDV-NEXT: addi a0, a0, 16 -; CHECK-RV64-FDV-NEXT: vl1r.v v12, (a0) # vscale x 8-byte Folded Reload -; CHECK-RV64-FDV-NEXT: csrr a0, vlenb -; CHECK-RV64-FDV-NEXT: slli a0, a0, 1 -; CHECK-RV64-FDV-NEXT: mv a1, a0 -; CHECK-RV64-FDV-NEXT: slli a0, a0, 3 -; CHECK-RV64-FDV-NEXT: add a0, a0, a1 -; CHECK-RV64-FDV-NEXT: add a0, sp, a0 -; CHECK-RV64-FDV-NEXT: addi a0, a0, 16 -; CHECK-RV64-FDV-NEXT: vl1r.v v13, (a0) # vscale x 8-byte Folded Reload -; CHECK-RV64-FDV-NEXT: csrr a0, vlenb -; CHECK-RV64-FDV-NEXT: slli a1, a0, 4 -; CHECK-RV64-FDV-NEXT: add a0, a1, a0 -; CHECK-RV64-FDV-NEXT: add a0, sp, a0 -; CHECK-RV64-FDV-NEXT: addi a0, a0, 16 -; CHECK-RV64-FDV-NEXT: vl1r.v v14, (a0) # vscale x 8-byte Folded Reload +; CHECK-RV64-FDV-NEXT: vl8r.v v0, (a0) # vscale x 64-byte Folded Reload ; CHECK-RV64-FDV-NEXT: csrr a0, vlenb ; CHECK-RV64-FDV-NEXT: slli a0, a0, 4 ; CHECK-RV64-FDV-NEXT: add a0, sp, a0 ; CHECK-RV64-FDV-NEXT: addi a0, a0, 16 -; CHECK-RV64-FDV-NEXT: vl1r.v v15, (a0) # vscale x 8-byte Folded Reload -; CHECK-RV64-FDV-NEXT: csrr a0, vlenb -; CHECK-RV64-FDV-NEXT: slli a1, a0, 4 -; CHECK-RV64-FDV-NEXT: sub a0, a1, a0 -; CHECK-RV64-FDV-NEXT: add a0, sp, a0 -; CHECK-RV64-FDV-NEXT: addi a0, a0, 16 -; CHECK-RV64-FDV-NEXT: vl1r.v v16, (a0) # vscale x 8-byte Folded Reload -; CHECK-RV64-FDV-NEXT: csrr a0, vlenb -; CHECK-RV64-FDV-NEXT: slli a0, a0, 1 -; CHECK-RV64-FDV-NEXT: mv a1, a0 -; CHECK-RV64-FDV-NEXT: slli a0, a0, 1 -; CHECK-RV64-FDV-NEXT: add a1, a1, a0 -; CHECK-RV64-FDV-NEXT: slli a0, a0, 1 -; CHECK-RV64-FDV-NEXT: add a0, a0, a1 -; CHECK-RV64-FDV-NEXT: add a0, sp, a0 -; CHECK-RV64-FDV-NEXT: addi a0, a0, 16 -; CHECK-RV64-FDV-NEXT: vl1r.v v17, (a0) # vscale x 8-byte Folded Reload -; CHECK-RV64-FDV-NEXT: csrr a0, vlenb -; CHECK-RV64-FDV-NEXT: mv a1, a0 -; CHECK-RV64-FDV-NEXT: slli a0, a0, 2 -; CHECK-RV64-FDV-NEXT: add a1, a1, a0 -; CHECK-RV64-FDV-NEXT: slli a0, a0, 1 -; CHECK-RV64-FDV-NEXT: add a0, a0, a1 -; CHECK-RV64-FDV-NEXT: add a0, sp, a0 -; CHECK-RV64-FDV-NEXT: addi a0, a0, 16 -; CHECK-RV64-FDV-NEXT: vl1r.v v18, (a0) # vscale x 8-byte Folded Reload -; CHECK-RV64-FDV-NEXT: csrr a0, vlenb -; CHECK-RV64-FDV-NEXT: slli a0, a0, 2 -; CHECK-RV64-FDV-NEXT: mv a1, a0 -; CHECK-RV64-FDV-NEXT: slli a0, a0, 1 -; CHECK-RV64-FDV-NEXT: add a0, a0, a1 -; CHECK-RV64-FDV-NEXT: add a0, sp, a0 -; CHECK-RV64-FDV-NEXT: addi a0, a0, 16 -; CHECK-RV64-FDV-NEXT: vl1r.v v19, (a0) # vscale x 8-byte Folded Reload -; CHECK-RV64-FDV-NEXT: csrr a0, vlenb -; CHECK-RV64-FDV-NEXT: mv a1, a0 -; CHECK-RV64-FDV-NEXT: slli a0, a0, 1 -; CHECK-RV64-FDV-NEXT: add a1, a1, a0 -; CHECK-RV64-FDV-NEXT: slli a0, a0, 2 -; CHECK-RV64-FDV-NEXT: add a0, a0, a1 -; CHECK-RV64-FDV-NEXT: add a0, sp, a0 -; CHECK-RV64-FDV-NEXT: addi a0, a0, 16 -; CHECK-RV64-FDV-NEXT: vl1r.v v20, (a0) # vscale x 8-byte Folded Reload -; CHECK-RV64-FDV-NEXT: csrr a0, vlenb -; CHECK-RV64-FDV-NEXT: slli a0, a0, 1 -; CHECK-RV64-FDV-NEXT: mv a1, a0 -; CHECK-RV64-FDV-NEXT: slli a0, a0, 2 -; CHECK-RV64-FDV-NEXT: add a0, a0, a1 -; CHECK-RV64-FDV-NEXT: add a0, sp, a0 -; CHECK-RV64-FDV-NEXT: addi a0, a0, 16 -; CHECK-RV64-FDV-NEXT: vl1r.v v21, (a0) # vscale x 8-byte Folded Reload -; CHECK-RV64-FDV-NEXT: csrr a0, vlenb -; CHECK-RV64-FDV-NEXT: slli a1, a0, 3 -; CHECK-RV64-FDV-NEXT: add a0, a1, a0 -; CHECK-RV64-FDV-NEXT: add a0, sp, a0 -; CHECK-RV64-FDV-NEXT: addi a0, a0, 16 -; CHECK-RV64-FDV-NEXT: vl1r.v v22, (a0) # vscale x 8-byte Folded Reload +; CHECK-RV64-FDV-NEXT: vl8r.v v8, (a0) # vscale x 64-byte Folded Reload ; CHECK-RV64-FDV-NEXT: csrr a0, vlenb ; CHECK-RV64-FDV-NEXT: slli a0, a0, 3 ; CHECK-RV64-FDV-NEXT: add a0, sp, a0 ; CHECK-RV64-FDV-NEXT: addi a0, a0, 16 -; CHECK-RV64-FDV-NEXT: vl1r.v v23, (a0) # vscale x 8-byte Folded Reload -; CHECK-RV64-FDV-NEXT: csrr a0, vlenb -; CHECK-RV64-FDV-NEXT: slli a1, a0, 3 -; CHECK-RV64-FDV-NEXT: sub a0, a1, a0 -; CHECK-RV64-FDV-NEXT: add a0, sp, a0 -; CHECK-RV64-FDV-NEXT: addi a0, a0, 16 -; CHECK-RV64-FDV-NEXT: vl1r.v v24, (a0) # vscale x 8-byte Folded Reload -; CHECK-RV64-FDV-NEXT: csrr a0, vlenb -; CHECK-RV64-FDV-NEXT: slli a0, a0, 1 -; CHECK-RV64-FDV-NEXT: mv a1, a0 -; CHECK-RV64-FDV-NEXT: slli a0, a0, 1 -; CHECK-RV64-FDV-NEXT: add a0, a0, a1 -; CHECK-RV64-FDV-NEXT: add a0, sp, a0 -; CHECK-RV64-FDV-NEXT: addi a0, a0, 16 -; CHECK-RV64-FDV-NEXT: vl1r.v v25, (a0) # vscale x 8-byte Folded Reload -; CHECK-RV64-FDV-NEXT: csrr a0, vlenb -; CHECK-RV64-FDV-NEXT: slli a1, a0, 2 -; CHECK-RV64-FDV-NEXT: add a0, a1, a0 -; CHECK-RV64-FDV-NEXT: add a0, sp, a0 -; CHECK-RV64-FDV-NEXT: addi a0, a0, 16 -; CHECK-RV64-FDV-NEXT: vl1r.v v26, (a0) # vscale x 8-byte Folded Reload -; CHECK-RV64-FDV-NEXT: csrr a0, vlenb -; CHECK-RV64-FDV-NEXT: slli a0, a0, 2 -; CHECK-RV64-FDV-NEXT: add a0, sp, a0 -; CHECK-RV64-FDV-NEXT: addi a0, a0, 16 -; CHECK-RV64-FDV-NEXT: vl1r.v v27, (a0) # vscale x 8-byte Folded Reload -; CHECK-RV64-FDV-NEXT: csrr a0, vlenb -; CHECK-RV64-FDV-NEXT: slli a1, a0, 1 -; CHECK-RV64-FDV-NEXT: add a0, a1, a0 -; CHECK-RV64-FDV-NEXT: add a0, sp, a0 -; CHECK-RV64-FDV-NEXT: addi a0, a0, 16 -; CHECK-RV64-FDV-NEXT: vl1r.v v28, (a0) # vscale x 8-byte Folded Reload -; CHECK-RV64-FDV-NEXT: csrr a0, vlenb -; CHECK-RV64-FDV-NEXT: slli a0, a0, 1 -; CHECK-RV64-FDV-NEXT: add a0, sp, a0 -; CHECK-RV64-FDV-NEXT: addi a0, a0, 16 -; CHECK-RV64-FDV-NEXT: vl1r.v v29, (a0) # vscale x 8-byte Folded Reload -; CHECK-RV64-FDV-NEXT: csrr a0, vlenb -; CHECK-RV64-FDV-NEXT: add a0, sp, a0 -; CHECK-RV64-FDV-NEXT: addi a0, a0, 16 -; CHECK-RV64-FDV-NEXT: vl1r.v v30, (a0) # vscale x 8-byte Folded Reload +; CHECK-RV64-FDV-NEXT: vl8r.v v16, (a0) # vscale x 64-byte Folded Reload ; CHECK-RV64-FDV-NEXT: addi a0, sp, 16 -; CHECK-RV64-FDV-NEXT: vl1r.v v31, (a0) # vscale x 8-byte Folded Reload +; CHECK-RV64-FDV-NEXT: vl8r.v v24, (a0) # vscale x 64-byte Folded Reload ; CHECK-RV64-FDV-NEXT: csrr a0, vlenb ; CHECK-RV64-FDV-NEXT: slli a0, a0, 5 ; CHECK-RV64-FDV-NEXT: add sp, sp, a0 @@ -5670,422 +2958,39 @@ define void @foo_fp_with_call() #2 { ; CHECK-RV32-V-NEXT: slli a0, a0, 5 ; CHECK-RV32-V-NEXT: sub sp, sp, a0 ; CHECK-RV32-V-NEXT: csrr a0, vlenb -; CHECK-RV32-V-NEXT: sub a0, s0, a0 -; CHECK-RV32-V-NEXT: addi a0, a0, -80 -; CHECK-RV32-V-NEXT: vs1r.v v0, (a0) # vscale x 8-byte Folded Spill -; CHECK-RV32-V-NEXT: csrr a0, vlenb -; CHECK-RV32-V-NEXT: slli a0, a0, 1 -; CHECK-RV32-V-NEXT: sub a0, s0, a0 -; CHECK-RV32-V-NEXT: addi a0, a0, -80 -; CHECK-RV32-V-NEXT: vs1r.v v1, (a0) # vscale x 8-byte Folded Spill -; CHECK-RV32-V-NEXT: csrr a0, vlenb -; CHECK-RV32-V-NEXT: slli a1, a0, 1 -; CHECK-RV32-V-NEXT: add a0, a1, a0 -; CHECK-RV32-V-NEXT: sub a0, s0, a0 -; CHECK-RV32-V-NEXT: addi a0, a0, -80 -; CHECK-RV32-V-NEXT: vs1r.v v2, (a0) # vscale x 8-byte Folded Spill -; CHECK-RV32-V-NEXT: csrr a0, vlenb -; CHECK-RV32-V-NEXT: slli a0, a0, 2 -; CHECK-RV32-V-NEXT: sub a0, s0, a0 -; CHECK-RV32-V-NEXT: addi a0, a0, -80 -; CHECK-RV32-V-NEXT: vs1r.v v3, (a0) # vscale x 8-byte Folded Spill -; CHECK-RV32-V-NEXT: csrr a0, vlenb -; CHECK-RV32-V-NEXT: slli a1, a0, 2 -; CHECK-RV32-V-NEXT: add a0, a1, a0 -; CHECK-RV32-V-NEXT: sub a0, s0, a0 -; CHECK-RV32-V-NEXT: addi a0, a0, -80 -; CHECK-RV32-V-NEXT: vs1r.v v4, (a0) # vscale x 8-byte Folded Spill -; CHECK-RV32-V-NEXT: csrr a0, vlenb -; CHECK-RV32-V-NEXT: slli a0, a0, 1 -; CHECK-RV32-V-NEXT: mv a1, a0 -; CHECK-RV32-V-NEXT: slli a0, a0, 1 -; CHECK-RV32-V-NEXT: add a0, a0, a1 -; CHECK-RV32-V-NEXT: sub a0, s0, a0 -; CHECK-RV32-V-NEXT: addi a0, a0, -80 -; CHECK-RV32-V-NEXT: vs1r.v v5, (a0) # vscale x 8-byte Folded Spill -; CHECK-RV32-V-NEXT: csrr a0, vlenb -; CHECK-RV32-V-NEXT: slli a1, a0, 3 -; CHECK-RV32-V-NEXT: sub a0, a1, a0 -; CHECK-RV32-V-NEXT: sub a0, s0, a0 -; CHECK-RV32-V-NEXT: addi a0, a0, -80 -; CHECK-RV32-V-NEXT: vs1r.v v6, (a0) # vscale x 8-byte Folded Spill -; CHECK-RV32-V-NEXT: csrr a0, vlenb ; CHECK-RV32-V-NEXT: slli a0, a0, 3 ; CHECK-RV32-V-NEXT: sub a0, s0, a0 ; CHECK-RV32-V-NEXT: addi a0, a0, -80 -; CHECK-RV32-V-NEXT: vs1r.v v7, (a0) # vscale x 8-byte Folded Spill -; CHECK-RV32-V-NEXT: csrr a0, vlenb -; CHECK-RV32-V-NEXT: slli a1, a0, 3 -; CHECK-RV32-V-NEXT: add a0, a1, a0 -; CHECK-RV32-V-NEXT: sub a0, s0, a0 -; CHECK-RV32-V-NEXT: addi a0, a0, -80 -; CHECK-RV32-V-NEXT: vs1r.v v8, (a0) # vscale x 8-byte Folded Spill -; CHECK-RV32-V-NEXT: csrr a0, vlenb -; CHECK-RV32-V-NEXT: slli a0, a0, 1 -; CHECK-RV32-V-NEXT: mv a1, a0 -; CHECK-RV32-V-NEXT: slli a0, a0, 2 -; CHECK-RV32-V-NEXT: add a0, a0, a1 -; CHECK-RV32-V-NEXT: sub a0, s0, a0 -; CHECK-RV32-V-NEXT: addi a0, a0, -80 -; CHECK-RV32-V-NEXT: vs1r.v v9, (a0) # vscale x 8-byte Folded Spill -; CHECK-RV32-V-NEXT: csrr a0, vlenb -; CHECK-RV32-V-NEXT: mv a1, a0 -; CHECK-RV32-V-NEXT: slli a0, a0, 1 -; CHECK-RV32-V-NEXT: add a1, a1, a0 -; CHECK-RV32-V-NEXT: slli a0, a0, 2 -; CHECK-RV32-V-NEXT: add a0, a0, a1 -; CHECK-RV32-V-NEXT: sub a0, s0, a0 -; CHECK-RV32-V-NEXT: addi a0, a0, -80 -; CHECK-RV32-V-NEXT: vs1r.v v10, (a0) # vscale x 8-byte Folded Spill -; CHECK-RV32-V-NEXT: csrr a0, vlenb -; CHECK-RV32-V-NEXT: slli a0, a0, 2 -; CHECK-RV32-V-NEXT: mv a1, a0 -; CHECK-RV32-V-NEXT: slli a0, a0, 1 -; CHECK-RV32-V-NEXT: add a0, a0, a1 -; CHECK-RV32-V-NEXT: sub a0, s0, a0 -; CHECK-RV32-V-NEXT: addi a0, a0, -80 -; CHECK-RV32-V-NEXT: vs1r.v v11, (a0) # vscale x 8-byte Folded Spill -; CHECK-RV32-V-NEXT: csrr a0, vlenb -; CHECK-RV32-V-NEXT: mv a1, a0 -; CHECK-RV32-V-NEXT: slli a0, a0, 2 -; CHECK-RV32-V-NEXT: add a1, a1, a0 -; CHECK-RV32-V-NEXT: slli a0, a0, 1 -; CHECK-RV32-V-NEXT: add a0, a0, a1 -; CHECK-RV32-V-NEXT: sub a0, s0, a0 -; CHECK-RV32-V-NEXT: addi a0, a0, -80 -; CHECK-RV32-V-NEXT: vs1r.v v12, (a0) # vscale x 8-byte Folded Spill -; CHECK-RV32-V-NEXT: csrr a0, vlenb -; CHECK-RV32-V-NEXT: slli a0, a0, 1 -; CHECK-RV32-V-NEXT: mv a1, a0 -; CHECK-RV32-V-NEXT: slli a0, a0, 1 -; CHECK-RV32-V-NEXT: add a1, a1, a0 -; CHECK-RV32-V-NEXT: slli a0, a0, 1 -; CHECK-RV32-V-NEXT: add a0, a0, a1 -; CHECK-RV32-V-NEXT: sub a0, s0, a0 -; CHECK-RV32-V-NEXT: addi a0, a0, -80 -; CHECK-RV32-V-NEXT: vs1r.v v13, (a0) # vscale x 8-byte Folded Spill -; CHECK-RV32-V-NEXT: csrr a0, vlenb -; CHECK-RV32-V-NEXT: slli a1, a0, 4 -; CHECK-RV32-V-NEXT: sub a0, a1, a0 -; CHECK-RV32-V-NEXT: sub a0, s0, a0 -; CHECK-RV32-V-NEXT: addi a0, a0, -80 -; CHECK-RV32-V-NEXT: vs1r.v v14, (a0) # vscale x 8-byte Folded Spill +; CHECK-RV32-V-NEXT: vs8r.v v0, (a0) # vscale x 64-byte Folded Spill ; CHECK-RV32-V-NEXT: csrr a0, vlenb ; CHECK-RV32-V-NEXT: slli a0, a0, 4 ; CHECK-RV32-V-NEXT: sub a0, s0, a0 ; CHECK-RV32-V-NEXT: addi a0, a0, -80 -; CHECK-RV32-V-NEXT: vs1r.v v15, (a0) # vscale x 8-byte Folded Spill +; CHECK-RV32-V-NEXT: vs8r.v v8, (a0) # vscale x 64-byte Folded Spill ; CHECK-RV32-V-NEXT: csrr a0, vlenb -; CHECK-RV32-V-NEXT: slli a1, a0, 4 -; CHECK-RV32-V-NEXT: add a0, a1, a0 -; CHECK-RV32-V-NEXT: sub a0, s0, a0 -; CHECK-RV32-V-NEXT: addi a0, a0, -80 -; CHECK-RV32-V-NEXT: vs1r.v v16, (a0) # vscale x 8-byte Folded Spill -; CHECK-RV32-V-NEXT: csrr a0, vlenb -; CHECK-RV32-V-NEXT: slli a0, a0, 1 -; CHECK-RV32-V-NEXT: mv a1, a0 ; CHECK-RV32-V-NEXT: slli a0, a0, 3 -; CHECK-RV32-V-NEXT: add a0, a0, a1 -; CHECK-RV32-V-NEXT: sub a0, s0, a0 -; CHECK-RV32-V-NEXT: addi a0, a0, -80 -; CHECK-RV32-V-NEXT: vs1r.v v17, (a0) # vscale x 8-byte Folded Spill -; CHECK-RV32-V-NEXT: csrr a0, vlenb -; CHECK-RV32-V-NEXT: mv a1, a0 -; CHECK-RV32-V-NEXT: slli a0, a0, 1 -; CHECK-RV32-V-NEXT: add a1, a1, a0 -; CHECK-RV32-V-NEXT: slli a0, a0, 3 -; CHECK-RV32-V-NEXT: add a0, a0, a1 -; CHECK-RV32-V-NEXT: sub a0, s0, a0 -; CHECK-RV32-V-NEXT: addi a0, a0, -80 -; CHECK-RV32-V-NEXT: vs1r.v v18, (a0) # vscale x 8-byte Folded Spill -; CHECK-RV32-V-NEXT: csrr a0, vlenb -; CHECK-RV32-V-NEXT: slli a0, a0, 2 -; CHECK-RV32-V-NEXT: mv a1, a0 -; CHECK-RV32-V-NEXT: slli a0, a0, 2 -; CHECK-RV32-V-NEXT: add a0, a0, a1 -; CHECK-RV32-V-NEXT: sub a0, s0, a0 -; CHECK-RV32-V-NEXT: addi a0, a0, -80 -; CHECK-RV32-V-NEXT: vs1r.v v19, (a0) # vscale x 8-byte Folded Spill -; CHECK-RV32-V-NEXT: csrr a0, vlenb -; CHECK-RV32-V-NEXT: mv a1, a0 -; CHECK-RV32-V-NEXT: slli a0, a0, 2 -; CHECK-RV32-V-NEXT: add a1, a1, a0 -; CHECK-RV32-V-NEXT: slli a0, a0, 2 -; CHECK-RV32-V-NEXT: add a0, a0, a1 -; CHECK-RV32-V-NEXT: sub a0, s0, a0 -; CHECK-RV32-V-NEXT: addi a0, a0, -80 -; CHECK-RV32-V-NEXT: vs1r.v v20, (a0) # vscale x 8-byte Folded Spill -; CHECK-RV32-V-NEXT: csrr a0, vlenb -; CHECK-RV32-V-NEXT: slli a0, a0, 1 -; CHECK-RV32-V-NEXT: mv a1, a0 -; CHECK-RV32-V-NEXT: slli a0, a0, 1 -; CHECK-RV32-V-NEXT: add a1, a1, a0 -; CHECK-RV32-V-NEXT: slli a0, a0, 2 -; CHECK-RV32-V-NEXT: add a0, a0, a1 -; CHECK-RV32-V-NEXT: sub a0, s0, a0 -; CHECK-RV32-V-NEXT: addi a0, a0, -80 -; CHECK-RV32-V-NEXT: vs1r.v v21, (a0) # vscale x 8-byte Folded Spill -; CHECK-RV32-V-NEXT: csrr a0, vlenb -; CHECK-RV32-V-NEXT: mv a1, a0 -; CHECK-RV32-V-NEXT: slli a0, a0, 1 -; CHECK-RV32-V-NEXT: add a1, a1, a0 -; CHECK-RV32-V-NEXT: slli a0, a0, 1 -; CHECK-RV32-V-NEXT: add a1, a1, a0 -; CHECK-RV32-V-NEXT: slli a0, a0, 2 -; CHECK-RV32-V-NEXT: add a0, a0, a1 -; CHECK-RV32-V-NEXT: sub a0, s0, a0 -; CHECK-RV32-V-NEXT: addi a0, a0, -80 -; CHECK-RV32-V-NEXT: vs1r.v v22, (a0) # vscale x 8-byte Folded Spill -; CHECK-RV32-V-NEXT: csrr a0, vlenb -; CHECK-RV32-V-NEXT: slli a0, a0, 3 -; CHECK-RV32-V-NEXT: mv a1, a0 -; CHECK-RV32-V-NEXT: slli a0, a0, 1 -; CHECK-RV32-V-NEXT: add a0, a0, a1 -; CHECK-RV32-V-NEXT: sub a0, s0, a0 -; CHECK-RV32-V-NEXT: addi a0, a0, -80 -; CHECK-RV32-V-NEXT: vs1r.v v23, (a0) # vscale x 8-byte Folded Spill -; CHECK-RV32-V-NEXT: csrr a0, vlenb -; CHECK-RV32-V-NEXT: mv a1, a0 -; CHECK-RV32-V-NEXT: slli a0, a0, 3 -; CHECK-RV32-V-NEXT: add a1, a1, a0 -; CHECK-RV32-V-NEXT: slli a0, a0, 1 -; CHECK-RV32-V-NEXT: add a0, a0, a1 -; CHECK-RV32-V-NEXT: sub a0, s0, a0 -; CHECK-RV32-V-NEXT: addi a0, a0, -80 -; CHECK-RV32-V-NEXT: vs1r.v v24, (a0) # vscale x 8-byte Folded Spill -; CHECK-RV32-V-NEXT: csrr a0, vlenb -; CHECK-RV32-V-NEXT: slli a0, a0, 1 ; CHECK-RV32-V-NEXT: mv a1, a0 -; CHECK-RV32-V-NEXT: slli a0, a0, 2 -; CHECK-RV32-V-NEXT: add a1, a1, a0 ; CHECK-RV32-V-NEXT: slli a0, a0, 1 ; CHECK-RV32-V-NEXT: add a0, a0, a1 ; CHECK-RV32-V-NEXT: sub a0, s0, a0 ; CHECK-RV32-V-NEXT: addi a0, a0, -80 -; CHECK-RV32-V-NEXT: vs1r.v v25, (a0) # vscale x 8-byte Folded Spill -; CHECK-RV32-V-NEXT: csrr a0, vlenb -; CHECK-RV32-V-NEXT: mv a1, a0 -; CHECK-RV32-V-NEXT: slli a0, a0, 1 -; CHECK-RV32-V-NEXT: add a1, a1, a0 -; CHECK-RV32-V-NEXT: slli a0, a0, 2 -; CHECK-RV32-V-NEXT: add a1, a1, a0 -; CHECK-RV32-V-NEXT: slli a0, a0, 1 -; CHECK-RV32-V-NEXT: add a0, a0, a1 -; CHECK-RV32-V-NEXT: sub a0, s0, a0 -; CHECK-RV32-V-NEXT: addi a0, a0, -80 -; CHECK-RV32-V-NEXT: vs1r.v v26, (a0) # vscale x 8-byte Folded Spill -; CHECK-RV32-V-NEXT: csrr a0, vlenb -; CHECK-RV32-V-NEXT: slli a0, a0, 2 -; CHECK-RV32-V-NEXT: mv a1, a0 -; CHECK-RV32-V-NEXT: slli a0, a0, 1 -; CHECK-RV32-V-NEXT: add a1, a1, a0 -; CHECK-RV32-V-NEXT: slli a0, a0, 1 -; CHECK-RV32-V-NEXT: add a0, a0, a1 -; CHECK-RV32-V-NEXT: sub a0, s0, a0 -; CHECK-RV32-V-NEXT: addi a0, a0, -80 -; CHECK-RV32-V-NEXT: vs1r.v v27, (a0) # vscale x 8-byte Folded Spill -; CHECK-RV32-V-NEXT: csrr a0, vlenb -; CHECK-RV32-V-NEXT: mv a1, a0 -; CHECK-RV32-V-NEXT: slli a0, a0, 2 -; CHECK-RV32-V-NEXT: add a1, a1, a0 -; CHECK-RV32-V-NEXT: slli a0, a0, 1 -; CHECK-RV32-V-NEXT: add a1, a1, a0 -; CHECK-RV32-V-NEXT: slli a0, a0, 1 -; CHECK-RV32-V-NEXT: add a0, a0, a1 -; CHECK-RV32-V-NEXT: sub a0, s0, a0 -; CHECK-RV32-V-NEXT: addi a0, a0, -80 -; CHECK-RV32-V-NEXT: vs1r.v v28, (a0) # vscale x 8-byte Folded Spill -; CHECK-RV32-V-NEXT: csrr a0, vlenb -; CHECK-RV32-V-NEXT: slli a0, a0, 1 -; CHECK-RV32-V-NEXT: mv a1, a0 -; CHECK-RV32-V-NEXT: slli a0, a0, 1 -; CHECK-RV32-V-NEXT: add a1, a1, a0 -; CHECK-RV32-V-NEXT: slli a0, a0, 1 -; CHECK-RV32-V-NEXT: add a1, a1, a0 -; CHECK-RV32-V-NEXT: slli a0, a0, 1 -; CHECK-RV32-V-NEXT: add a0, a0, a1 -; CHECK-RV32-V-NEXT: sub a0, s0, a0 -; CHECK-RV32-V-NEXT: addi a0, a0, -80 -; CHECK-RV32-V-NEXT: vs1r.v v29, (a0) # vscale x 8-byte Folded Spill -; CHECK-RV32-V-NEXT: csrr a0, vlenb -; CHECK-RV32-V-NEXT: slli a1, a0, 5 -; CHECK-RV32-V-NEXT: sub a0, a1, a0 -; CHECK-RV32-V-NEXT: sub a0, s0, a0 -; CHECK-RV32-V-NEXT: addi a0, a0, -80 -; CHECK-RV32-V-NEXT: vs1r.v v30, (a0) # vscale x 8-byte Folded Spill +; CHECK-RV32-V-NEXT: vs8r.v v16, (a0) # vscale x 64-byte Folded Spill ; CHECK-RV32-V-NEXT: csrr a0, vlenb ; CHECK-RV32-V-NEXT: slli a0, a0, 5 ; CHECK-RV32-V-NEXT: sub a0, s0, a0 ; CHECK-RV32-V-NEXT: addi a0, a0, -80 -; CHECK-RV32-V-NEXT: vs1r.v v31, (a0) # vscale x 8-byte Folded Spill +; CHECK-RV32-V-NEXT: vs8r.v v24, (a0) # vscale x 64-byte Folded Spill ; CHECK-RV32-V-NEXT: call otherfoo ; CHECK-RV32-V-NEXT: csrr a0, vlenb -; CHECK-RV32-V-NEXT: sub a0, s0, a0 -; CHECK-RV32-V-NEXT: addi a0, a0, -80 -; CHECK-RV32-V-NEXT: vl1r.v v0, (a0) # vscale x 8-byte Folded Reload -; CHECK-RV32-V-NEXT: csrr a0, vlenb -; CHECK-RV32-V-NEXT: slli a0, a0, 1 -; CHECK-RV32-V-NEXT: sub a0, s0, a0 -; CHECK-RV32-V-NEXT: addi a0, a0, -80 -; CHECK-RV32-V-NEXT: vl1r.v v1, (a0) # vscale x 8-byte Folded Reload -; CHECK-RV32-V-NEXT: csrr a0, vlenb -; CHECK-RV32-V-NEXT: slli a1, a0, 1 -; CHECK-RV32-V-NEXT: add a0, a1, a0 -; CHECK-RV32-V-NEXT: sub a0, s0, a0 -; CHECK-RV32-V-NEXT: addi a0, a0, -80 -; CHECK-RV32-V-NEXT: vl1r.v v2, (a0) # vscale x 8-byte Folded Reload -; CHECK-RV32-V-NEXT: csrr a0, vlenb -; CHECK-RV32-V-NEXT: slli a0, a0, 2 -; CHECK-RV32-V-NEXT: sub a0, s0, a0 -; CHECK-RV32-V-NEXT: addi a0, a0, -80 -; CHECK-RV32-V-NEXT: vl1r.v v3, (a0) # vscale x 8-byte Folded Reload -; CHECK-RV32-V-NEXT: csrr a0, vlenb -; CHECK-RV32-V-NEXT: slli a1, a0, 2 -; CHECK-RV32-V-NEXT: add a0, a1, a0 -; CHECK-RV32-V-NEXT: sub a0, s0, a0 -; CHECK-RV32-V-NEXT: addi a0, a0, -80 -; CHECK-RV32-V-NEXT: vl1r.v v4, (a0) # vscale x 8-byte Folded Reload -; CHECK-RV32-V-NEXT: csrr a0, vlenb -; CHECK-RV32-V-NEXT: slli a0, a0, 1 -; CHECK-RV32-V-NEXT: mv a1, a0 -; CHECK-RV32-V-NEXT: slli a0, a0, 1 -; CHECK-RV32-V-NEXT: add a0, a0, a1 -; CHECK-RV32-V-NEXT: sub a0, s0, a0 -; CHECK-RV32-V-NEXT: addi a0, a0, -80 -; CHECK-RV32-V-NEXT: vl1r.v v5, (a0) # vscale x 8-byte Folded Reload -; CHECK-RV32-V-NEXT: csrr a0, vlenb -; CHECK-RV32-V-NEXT: slli a1, a0, 3 -; CHECK-RV32-V-NEXT: sub a0, a1, a0 -; CHECK-RV32-V-NEXT: sub a0, s0, a0 -; CHECK-RV32-V-NEXT: addi a0, a0, -80 -; CHECK-RV32-V-NEXT: vl1r.v v6, (a0) # vscale x 8-byte Folded Reload -; CHECK-RV32-V-NEXT: csrr a0, vlenb ; CHECK-RV32-V-NEXT: slli a0, a0, 3 ; CHECK-RV32-V-NEXT: sub a0, s0, a0 ; CHECK-RV32-V-NEXT: addi a0, a0, -80 -; CHECK-RV32-V-NEXT: vl1r.v v7, (a0) # vscale x 8-byte Folded Reload -; CHECK-RV32-V-NEXT: csrr a0, vlenb -; CHECK-RV32-V-NEXT: slli a1, a0, 3 -; CHECK-RV32-V-NEXT: add a0, a1, a0 -; CHECK-RV32-V-NEXT: sub a0, s0, a0 -; CHECK-RV32-V-NEXT: addi a0, a0, -80 -; CHECK-RV32-V-NEXT: vl1r.v v8, (a0) # vscale x 8-byte Folded Reload -; CHECK-RV32-V-NEXT: csrr a0, vlenb -; CHECK-RV32-V-NEXT: slli a0, a0, 1 -; CHECK-RV32-V-NEXT: mv a1, a0 -; CHECK-RV32-V-NEXT: slli a0, a0, 2 -; CHECK-RV32-V-NEXT: add a0, a0, a1 -; CHECK-RV32-V-NEXT: sub a0, s0, a0 -; CHECK-RV32-V-NEXT: addi a0, a0, -80 -; CHECK-RV32-V-NEXT: vl1r.v v9, (a0) # vscale x 8-byte Folded Reload -; CHECK-RV32-V-NEXT: csrr a0, vlenb -; CHECK-RV32-V-NEXT: mv a1, a0 -; CHECK-RV32-V-NEXT: slli a0, a0, 1 -; CHECK-RV32-V-NEXT: add a1, a1, a0 -; CHECK-RV32-V-NEXT: slli a0, a0, 2 -; CHECK-RV32-V-NEXT: add a0, a0, a1 -; CHECK-RV32-V-NEXT: sub a0, s0, a0 -; CHECK-RV32-V-NEXT: addi a0, a0, -80 -; CHECK-RV32-V-NEXT: vl1r.v v10, (a0) # vscale x 8-byte Folded Reload -; CHECK-RV32-V-NEXT: csrr a0, vlenb -; CHECK-RV32-V-NEXT: slli a0, a0, 2 -; CHECK-RV32-V-NEXT: mv a1, a0 -; CHECK-RV32-V-NEXT: slli a0, a0, 1 -; CHECK-RV32-V-NEXT: add a0, a0, a1 -; CHECK-RV32-V-NEXT: sub a0, s0, a0 -; CHECK-RV32-V-NEXT: addi a0, a0, -80 -; CHECK-RV32-V-NEXT: vl1r.v v11, (a0) # vscale x 8-byte Folded Reload -; CHECK-RV32-V-NEXT: csrr a0, vlenb -; CHECK-RV32-V-NEXT: mv a1, a0 -; CHECK-RV32-V-NEXT: slli a0, a0, 2 -; CHECK-RV32-V-NEXT: add a1, a1, a0 -; CHECK-RV32-V-NEXT: slli a0, a0, 1 -; CHECK-RV32-V-NEXT: add a0, a0, a1 -; CHECK-RV32-V-NEXT: sub a0, s0, a0 -; CHECK-RV32-V-NEXT: addi a0, a0, -80 -; CHECK-RV32-V-NEXT: vl1r.v v12, (a0) # vscale x 8-byte Folded Reload -; CHECK-RV32-V-NEXT: csrr a0, vlenb -; CHECK-RV32-V-NEXT: slli a0, a0, 1 -; CHECK-RV32-V-NEXT: mv a1, a0 -; CHECK-RV32-V-NEXT: slli a0, a0, 1 -; CHECK-RV32-V-NEXT: add a1, a1, a0 -; CHECK-RV32-V-NEXT: slli a0, a0, 1 -; CHECK-RV32-V-NEXT: add a0, a0, a1 -; CHECK-RV32-V-NEXT: sub a0, s0, a0 -; CHECK-RV32-V-NEXT: addi a0, a0, -80 -; CHECK-RV32-V-NEXT: vl1r.v v13, (a0) # vscale x 8-byte Folded Reload -; CHECK-RV32-V-NEXT: csrr a0, vlenb -; CHECK-RV32-V-NEXT: slli a1, a0, 4 -; CHECK-RV32-V-NEXT: sub a0, a1, a0 -; CHECK-RV32-V-NEXT: sub a0, s0, a0 -; CHECK-RV32-V-NEXT: addi a0, a0, -80 -; CHECK-RV32-V-NEXT: vl1r.v v14, (a0) # vscale x 8-byte Folded Reload +; CHECK-RV32-V-NEXT: vl8r.v v0, (a0) # vscale x 64-byte Folded Reload ; CHECK-RV32-V-NEXT: csrr a0, vlenb ; CHECK-RV32-V-NEXT: slli a0, a0, 4 ; CHECK-RV32-V-NEXT: sub a0, s0, a0 ; CHECK-RV32-V-NEXT: addi a0, a0, -80 -; CHECK-RV32-V-NEXT: vl1r.v v15, (a0) # vscale x 8-byte Folded Reload -; CHECK-RV32-V-NEXT: csrr a0, vlenb -; CHECK-RV32-V-NEXT: slli a1, a0, 4 -; CHECK-RV32-V-NEXT: add a0, a1, a0 -; CHECK-RV32-V-NEXT: sub a0, s0, a0 -; CHECK-RV32-V-NEXT: addi a0, a0, -80 -; CHECK-RV32-V-NEXT: vl1r.v v16, (a0) # vscale x 8-byte Folded Reload -; CHECK-RV32-V-NEXT: csrr a0, vlenb -; CHECK-RV32-V-NEXT: slli a0, a0, 1 -; CHECK-RV32-V-NEXT: mv a1, a0 -; CHECK-RV32-V-NEXT: slli a0, a0, 3 -; CHECK-RV32-V-NEXT: add a0, a0, a1 -; CHECK-RV32-V-NEXT: sub a0, s0, a0 -; CHECK-RV32-V-NEXT: addi a0, a0, -80 -; CHECK-RV32-V-NEXT: vl1r.v v17, (a0) # vscale x 8-byte Folded Reload -; CHECK-RV32-V-NEXT: csrr a0, vlenb -; CHECK-RV32-V-NEXT: mv a1, a0 -; CHECK-RV32-V-NEXT: slli a0, a0, 1 -; CHECK-RV32-V-NEXT: add a1, a1, a0 -; CHECK-RV32-V-NEXT: slli a0, a0, 3 -; CHECK-RV32-V-NEXT: add a0, a0, a1 -; CHECK-RV32-V-NEXT: sub a0, s0, a0 -; CHECK-RV32-V-NEXT: addi a0, a0, -80 -; CHECK-RV32-V-NEXT: vl1r.v v18, (a0) # vscale x 8-byte Folded Reload -; CHECK-RV32-V-NEXT: csrr a0, vlenb -; CHECK-RV32-V-NEXT: slli a0, a0, 2 -; CHECK-RV32-V-NEXT: mv a1, a0 -; CHECK-RV32-V-NEXT: slli a0, a0, 2 -; CHECK-RV32-V-NEXT: add a0, a0, a1 -; CHECK-RV32-V-NEXT: sub a0, s0, a0 -; CHECK-RV32-V-NEXT: addi a0, a0, -80 -; CHECK-RV32-V-NEXT: vl1r.v v19, (a0) # vscale x 8-byte Folded Reload -; CHECK-RV32-V-NEXT: csrr a0, vlenb -; CHECK-RV32-V-NEXT: mv a1, a0 -; CHECK-RV32-V-NEXT: slli a0, a0, 2 -; CHECK-RV32-V-NEXT: add a1, a1, a0 -; CHECK-RV32-V-NEXT: slli a0, a0, 2 -; CHECK-RV32-V-NEXT: add a0, a0, a1 -; CHECK-RV32-V-NEXT: sub a0, s0, a0 -; CHECK-RV32-V-NEXT: addi a0, a0, -80 -; CHECK-RV32-V-NEXT: vl1r.v v20, (a0) # vscale x 8-byte Folded Reload -; CHECK-RV32-V-NEXT: csrr a0, vlenb -; CHECK-RV32-V-NEXT: slli a0, a0, 1 -; CHECK-RV32-V-NEXT: mv a1, a0 -; CHECK-RV32-V-NEXT: slli a0, a0, 1 -; CHECK-RV32-V-NEXT: add a1, a1, a0 -; CHECK-RV32-V-NEXT: slli a0, a0, 2 -; CHECK-RV32-V-NEXT: add a0, a0, a1 -; CHECK-RV32-V-NEXT: sub a0, s0, a0 -; CHECK-RV32-V-NEXT: addi a0, a0, -80 -; CHECK-RV32-V-NEXT: vl1r.v v21, (a0) # vscale x 8-byte Folded Reload -; CHECK-RV32-V-NEXT: csrr a0, vlenb -; CHECK-RV32-V-NEXT: mv a1, a0 -; CHECK-RV32-V-NEXT: slli a0, a0, 1 -; CHECK-RV32-V-NEXT: add a1, a1, a0 -; CHECK-RV32-V-NEXT: slli a0, a0, 1 -; CHECK-RV32-V-NEXT: add a1, a1, a0 -; CHECK-RV32-V-NEXT: slli a0, a0, 2 -; CHECK-RV32-V-NEXT: add a0, a0, a1 -; CHECK-RV32-V-NEXT: sub a0, s0, a0 -; CHECK-RV32-V-NEXT: addi a0, a0, -80 -; CHECK-RV32-V-NEXT: vl1r.v v22, (a0) # vscale x 8-byte Folded Reload +; CHECK-RV32-V-NEXT: vl8r.v v8, (a0) # vscale x 64-byte Folded Reload ; CHECK-RV32-V-NEXT: csrr a0, vlenb ; CHECK-RV32-V-NEXT: slli a0, a0, 3 ; CHECK-RV32-V-NEXT: mv a1, a0 @@ -6093,81 +2998,12 @@ define void @foo_fp_with_call() #2 { ; CHECK-RV32-V-NEXT: add a0, a0, a1 ; CHECK-RV32-V-NEXT: sub a0, s0, a0 ; CHECK-RV32-V-NEXT: addi a0, a0, -80 -; CHECK-RV32-V-NEXT: vl1r.v v23, (a0) # vscale x 8-byte Folded Reload -; CHECK-RV32-V-NEXT: csrr a0, vlenb -; CHECK-RV32-V-NEXT: mv a1, a0 -; CHECK-RV32-V-NEXT: slli a0, a0, 3 -; CHECK-RV32-V-NEXT: add a1, a1, a0 -; CHECK-RV32-V-NEXT: slli a0, a0, 1 -; CHECK-RV32-V-NEXT: add a0, a0, a1 -; CHECK-RV32-V-NEXT: sub a0, s0, a0 -; CHECK-RV32-V-NEXT: addi a0, a0, -80 -; CHECK-RV32-V-NEXT: vl1r.v v24, (a0) # vscale x 8-byte Folded Reload -; CHECK-RV32-V-NEXT: csrr a0, vlenb -; CHECK-RV32-V-NEXT: slli a0, a0, 1 -; CHECK-RV32-V-NEXT: mv a1, a0 -; CHECK-RV32-V-NEXT: slli a0, a0, 2 -; CHECK-RV32-V-NEXT: add a1, a1, a0 -; CHECK-RV32-V-NEXT: slli a0, a0, 1 -; CHECK-RV32-V-NEXT: add a0, a0, a1 -; CHECK-RV32-V-NEXT: sub a0, s0, a0 -; CHECK-RV32-V-NEXT: addi a0, a0, -80 -; CHECK-RV32-V-NEXT: vl1r.v v25, (a0) # vscale x 8-byte Folded Reload -; CHECK-RV32-V-NEXT: csrr a0, vlenb -; CHECK-RV32-V-NEXT: mv a1, a0 -; CHECK-RV32-V-NEXT: slli a0, a0, 1 -; CHECK-RV32-V-NEXT: add a1, a1, a0 -; CHECK-RV32-V-NEXT: slli a0, a0, 2 -; CHECK-RV32-V-NEXT: add a1, a1, a0 -; CHECK-RV32-V-NEXT: slli a0, a0, 1 -; CHECK-RV32-V-NEXT: add a0, a0, a1 -; CHECK-RV32-V-NEXT: sub a0, s0, a0 -; CHECK-RV32-V-NEXT: addi a0, a0, -80 -; CHECK-RV32-V-NEXT: vl1r.v v26, (a0) # vscale x 8-byte Folded Reload -; CHECK-RV32-V-NEXT: csrr a0, vlenb -; CHECK-RV32-V-NEXT: slli a0, a0, 2 -; CHECK-RV32-V-NEXT: mv a1, a0 -; CHECK-RV32-V-NEXT: slli a0, a0, 1 -; CHECK-RV32-V-NEXT: add a1, a1, a0 -; CHECK-RV32-V-NEXT: slli a0, a0, 1 -; CHECK-RV32-V-NEXT: add a0, a0, a1 -; CHECK-RV32-V-NEXT: sub a0, s0, a0 -; CHECK-RV32-V-NEXT: addi a0, a0, -80 -; CHECK-RV32-V-NEXT: vl1r.v v27, (a0) # vscale x 8-byte Folded Reload -; CHECK-RV32-V-NEXT: csrr a0, vlenb -; CHECK-RV32-V-NEXT: mv a1, a0 -; CHECK-RV32-V-NEXT: slli a0, a0, 2 -; CHECK-RV32-V-NEXT: add a1, a1, a0 -; CHECK-RV32-V-NEXT: slli a0, a0, 1 -; CHECK-RV32-V-NEXT: add a1, a1, a0 -; CHECK-RV32-V-NEXT: slli a0, a0, 1 -; CHECK-RV32-V-NEXT: add a0, a0, a1 -; CHECK-RV32-V-NEXT: sub a0, s0, a0 -; CHECK-RV32-V-NEXT: addi a0, a0, -80 -; CHECK-RV32-V-NEXT: vl1r.v v28, (a0) # vscale x 8-byte Folded Reload -; CHECK-RV32-V-NEXT: csrr a0, vlenb -; CHECK-RV32-V-NEXT: slli a0, a0, 1 -; CHECK-RV32-V-NEXT: mv a1, a0 -; CHECK-RV32-V-NEXT: slli a0, a0, 1 -; CHECK-RV32-V-NEXT: add a1, a1, a0 -; CHECK-RV32-V-NEXT: slli a0, a0, 1 -; CHECK-RV32-V-NEXT: add a1, a1, a0 -; CHECK-RV32-V-NEXT: slli a0, a0, 1 -; CHECK-RV32-V-NEXT: add a0, a0, a1 -; CHECK-RV32-V-NEXT: sub a0, s0, a0 -; CHECK-RV32-V-NEXT: addi a0, a0, -80 -; CHECK-RV32-V-NEXT: vl1r.v v29, (a0) # vscale x 8-byte Folded Reload -; CHECK-RV32-V-NEXT: csrr a0, vlenb -; CHECK-RV32-V-NEXT: slli a1, a0, 5 -; CHECK-RV32-V-NEXT: sub a0, a1, a0 -; CHECK-RV32-V-NEXT: sub a0, s0, a0 -; CHECK-RV32-V-NEXT: addi a0, a0, -80 -; CHECK-RV32-V-NEXT: vl1r.v v30, (a0) # vscale x 8-byte Folded Reload +; CHECK-RV32-V-NEXT: vl8r.v v16, (a0) # vscale x 64-byte Folded Reload ; CHECK-RV32-V-NEXT: csrr a0, vlenb ; CHECK-RV32-V-NEXT: slli a0, a0, 5 ; CHECK-RV32-V-NEXT: sub a0, s0, a0 ; CHECK-RV32-V-NEXT: addi a0, a0, -80 -; CHECK-RV32-V-NEXT: vl1r.v v31, (a0) # vscale x 8-byte Folded Reload +; CHECK-RV32-V-NEXT: vl8r.v v24, (a0) # vscale x 64-byte Folded Reload ; CHECK-RV32-V-NEXT: addi sp, s0, -80 ; CHECK-RV32-V-NEXT: lw ra, 76(sp) # 4-byte Folded Reload ; CHECK-RV32-V-NEXT: lw t0, 72(sp) # 4-byte Folded Reload @@ -6234,172 +3070,15 @@ define void @foo_fp_with_call() #2 { ; CHECK-RV32-FV-NEXT: slli a0, a0, 5 ; CHECK-RV32-FV-NEXT: sub sp, sp, a0 ; CHECK-RV32-FV-NEXT: csrr a0, vlenb -; CHECK-RV32-FV-NEXT: sub a0, s0, a0 -; CHECK-RV32-FV-NEXT: addi a0, a0, -160 -; CHECK-RV32-FV-NEXT: vs1r.v v0, (a0) # vscale x 8-byte Folded Spill -; CHECK-RV32-FV-NEXT: csrr a0, vlenb -; CHECK-RV32-FV-NEXT: slli a0, a0, 1 -; CHECK-RV32-FV-NEXT: sub a0, s0, a0 -; CHECK-RV32-FV-NEXT: addi a0, a0, -160 -; CHECK-RV32-FV-NEXT: vs1r.v v1, (a0) # vscale x 8-byte Folded Spill -; CHECK-RV32-FV-NEXT: csrr a0, vlenb -; CHECK-RV32-FV-NEXT: slli a1, a0, 1 -; CHECK-RV32-FV-NEXT: add a0, a1, a0 -; CHECK-RV32-FV-NEXT: sub a0, s0, a0 -; CHECK-RV32-FV-NEXT: addi a0, a0, -160 -; CHECK-RV32-FV-NEXT: vs1r.v v2, (a0) # vscale x 8-byte Folded Spill -; CHECK-RV32-FV-NEXT: csrr a0, vlenb -; CHECK-RV32-FV-NEXT: slli a0, a0, 2 -; CHECK-RV32-FV-NEXT: sub a0, s0, a0 -; CHECK-RV32-FV-NEXT: addi a0, a0, -160 -; CHECK-RV32-FV-NEXT: vs1r.v v3, (a0) # vscale x 8-byte Folded Spill -; CHECK-RV32-FV-NEXT: csrr a0, vlenb -; CHECK-RV32-FV-NEXT: slli a1, a0, 2 -; CHECK-RV32-FV-NEXT: add a0, a1, a0 -; CHECK-RV32-FV-NEXT: sub a0, s0, a0 -; CHECK-RV32-FV-NEXT: addi a0, a0, -160 -; CHECK-RV32-FV-NEXT: vs1r.v v4, (a0) # vscale x 8-byte Folded Spill -; CHECK-RV32-FV-NEXT: csrr a0, vlenb -; CHECK-RV32-FV-NEXT: slli a0, a0, 1 -; CHECK-RV32-FV-NEXT: mv a1, a0 -; CHECK-RV32-FV-NEXT: slli a0, a0, 1 -; CHECK-RV32-FV-NEXT: add a0, a0, a1 -; CHECK-RV32-FV-NEXT: sub a0, s0, a0 -; CHECK-RV32-FV-NEXT: addi a0, a0, -160 -; CHECK-RV32-FV-NEXT: vs1r.v v5, (a0) # vscale x 8-byte Folded Spill -; CHECK-RV32-FV-NEXT: csrr a0, vlenb -; CHECK-RV32-FV-NEXT: slli a1, a0, 3 -; CHECK-RV32-FV-NEXT: sub a0, a1, a0 -; CHECK-RV32-FV-NEXT: sub a0, s0, a0 -; CHECK-RV32-FV-NEXT: addi a0, a0, -160 -; CHECK-RV32-FV-NEXT: vs1r.v v6, (a0) # vscale x 8-byte Folded Spill -; CHECK-RV32-FV-NEXT: csrr a0, vlenb ; CHECK-RV32-FV-NEXT: slli a0, a0, 3 ; CHECK-RV32-FV-NEXT: sub a0, s0, a0 ; CHECK-RV32-FV-NEXT: addi a0, a0, -160 -; CHECK-RV32-FV-NEXT: vs1r.v v7, (a0) # vscale x 8-byte Folded Spill -; CHECK-RV32-FV-NEXT: csrr a0, vlenb -; CHECK-RV32-FV-NEXT: slli a1, a0, 3 -; CHECK-RV32-FV-NEXT: add a0, a1, a0 -; CHECK-RV32-FV-NEXT: sub a0, s0, a0 -; CHECK-RV32-FV-NEXT: addi a0, a0, -160 -; CHECK-RV32-FV-NEXT: vs1r.v v8, (a0) # vscale x 8-byte Folded Spill -; CHECK-RV32-FV-NEXT: csrr a0, vlenb -; CHECK-RV32-FV-NEXT: slli a0, a0, 1 -; CHECK-RV32-FV-NEXT: mv a1, a0 -; CHECK-RV32-FV-NEXT: slli a0, a0, 2 -; CHECK-RV32-FV-NEXT: add a0, a0, a1 -; CHECK-RV32-FV-NEXT: sub a0, s0, a0 -; CHECK-RV32-FV-NEXT: addi a0, a0, -160 -; CHECK-RV32-FV-NEXT: vs1r.v v9, (a0) # vscale x 8-byte Folded Spill -; CHECK-RV32-FV-NEXT: csrr a0, vlenb -; CHECK-RV32-FV-NEXT: mv a1, a0 -; CHECK-RV32-FV-NEXT: slli a0, a0, 1 -; CHECK-RV32-FV-NEXT: add a1, a1, a0 -; CHECK-RV32-FV-NEXT: slli a0, a0, 2 -; CHECK-RV32-FV-NEXT: add a0, a0, a1 -; CHECK-RV32-FV-NEXT: sub a0, s0, a0 -; CHECK-RV32-FV-NEXT: addi a0, a0, -160 -; CHECK-RV32-FV-NEXT: vs1r.v v10, (a0) # vscale x 8-byte Folded Spill -; CHECK-RV32-FV-NEXT: csrr a0, vlenb -; CHECK-RV32-FV-NEXT: slli a0, a0, 2 -; CHECK-RV32-FV-NEXT: mv a1, a0 -; CHECK-RV32-FV-NEXT: slli a0, a0, 1 -; CHECK-RV32-FV-NEXT: add a0, a0, a1 -; CHECK-RV32-FV-NEXT: sub a0, s0, a0 -; CHECK-RV32-FV-NEXT: addi a0, a0, -160 -; CHECK-RV32-FV-NEXT: vs1r.v v11, (a0) # vscale x 8-byte Folded Spill -; CHECK-RV32-FV-NEXT: csrr a0, vlenb -; CHECK-RV32-FV-NEXT: mv a1, a0 -; CHECK-RV32-FV-NEXT: slli a0, a0, 2 -; CHECK-RV32-FV-NEXT: add a1, a1, a0 -; CHECK-RV32-FV-NEXT: slli a0, a0, 1 -; CHECK-RV32-FV-NEXT: add a0, a0, a1 -; CHECK-RV32-FV-NEXT: sub a0, s0, a0 -; CHECK-RV32-FV-NEXT: addi a0, a0, -160 -; CHECK-RV32-FV-NEXT: vs1r.v v12, (a0) # vscale x 8-byte Folded Spill -; CHECK-RV32-FV-NEXT: csrr a0, vlenb -; CHECK-RV32-FV-NEXT: slli a0, a0, 1 -; CHECK-RV32-FV-NEXT: mv a1, a0 -; CHECK-RV32-FV-NEXT: slli a0, a0, 1 -; CHECK-RV32-FV-NEXT: add a1, a1, a0 -; CHECK-RV32-FV-NEXT: slli a0, a0, 1 -; CHECK-RV32-FV-NEXT: add a0, a0, a1 -; CHECK-RV32-FV-NEXT: sub a0, s0, a0 -; CHECK-RV32-FV-NEXT: addi a0, a0, -160 -; CHECK-RV32-FV-NEXT: vs1r.v v13, (a0) # vscale x 8-byte Folded Spill -; CHECK-RV32-FV-NEXT: csrr a0, vlenb -; CHECK-RV32-FV-NEXT: slli a1, a0, 4 -; CHECK-RV32-FV-NEXT: sub a0, a1, a0 -; CHECK-RV32-FV-NEXT: sub a0, s0, a0 -; CHECK-RV32-FV-NEXT: addi a0, a0, -160 -; CHECK-RV32-FV-NEXT: vs1r.v v14, (a0) # vscale x 8-byte Folded Spill +; CHECK-RV32-FV-NEXT: vs8r.v v0, (a0) # vscale x 64-byte Folded Spill ; CHECK-RV32-FV-NEXT: csrr a0, vlenb ; CHECK-RV32-FV-NEXT: slli a0, a0, 4 ; CHECK-RV32-FV-NEXT: sub a0, s0, a0 ; CHECK-RV32-FV-NEXT: addi a0, a0, -160 -; CHECK-RV32-FV-NEXT: vs1r.v v15, (a0) # vscale x 8-byte Folded Spill -; CHECK-RV32-FV-NEXT: csrr a0, vlenb -; CHECK-RV32-FV-NEXT: slli a1, a0, 4 -; CHECK-RV32-FV-NEXT: add a0, a1, a0 -; CHECK-RV32-FV-NEXT: sub a0, s0, a0 -; CHECK-RV32-FV-NEXT: addi a0, a0, -160 -; CHECK-RV32-FV-NEXT: vs1r.v v16, (a0) # vscale x 8-byte Folded Spill -; CHECK-RV32-FV-NEXT: csrr a0, vlenb -; CHECK-RV32-FV-NEXT: slli a0, a0, 1 -; CHECK-RV32-FV-NEXT: mv a1, a0 -; CHECK-RV32-FV-NEXT: slli a0, a0, 3 -; CHECK-RV32-FV-NEXT: add a0, a0, a1 -; CHECK-RV32-FV-NEXT: sub a0, s0, a0 -; CHECK-RV32-FV-NEXT: addi a0, a0, -160 -; CHECK-RV32-FV-NEXT: vs1r.v v17, (a0) # vscale x 8-byte Folded Spill -; CHECK-RV32-FV-NEXT: csrr a0, vlenb -; CHECK-RV32-FV-NEXT: mv a1, a0 -; CHECK-RV32-FV-NEXT: slli a0, a0, 1 -; CHECK-RV32-FV-NEXT: add a1, a1, a0 -; CHECK-RV32-FV-NEXT: slli a0, a0, 3 -; CHECK-RV32-FV-NEXT: add a0, a0, a1 -; CHECK-RV32-FV-NEXT: sub a0, s0, a0 -; CHECK-RV32-FV-NEXT: addi a0, a0, -160 -; CHECK-RV32-FV-NEXT: vs1r.v v18, (a0) # vscale x 8-byte Folded Spill -; CHECK-RV32-FV-NEXT: csrr a0, vlenb -; CHECK-RV32-FV-NEXT: slli a0, a0, 2 -; CHECK-RV32-FV-NEXT: mv a1, a0 -; CHECK-RV32-FV-NEXT: slli a0, a0, 2 -; CHECK-RV32-FV-NEXT: add a0, a0, a1 -; CHECK-RV32-FV-NEXT: sub a0, s0, a0 -; CHECK-RV32-FV-NEXT: addi a0, a0, -160 -; CHECK-RV32-FV-NEXT: vs1r.v v19, (a0) # vscale x 8-byte Folded Spill -; CHECK-RV32-FV-NEXT: csrr a0, vlenb -; CHECK-RV32-FV-NEXT: mv a1, a0 -; CHECK-RV32-FV-NEXT: slli a0, a0, 2 -; CHECK-RV32-FV-NEXT: add a1, a1, a0 -; CHECK-RV32-FV-NEXT: slli a0, a0, 2 -; CHECK-RV32-FV-NEXT: add a0, a0, a1 -; CHECK-RV32-FV-NEXT: sub a0, s0, a0 -; CHECK-RV32-FV-NEXT: addi a0, a0, -160 -; CHECK-RV32-FV-NEXT: vs1r.v v20, (a0) # vscale x 8-byte Folded Spill -; CHECK-RV32-FV-NEXT: csrr a0, vlenb -; CHECK-RV32-FV-NEXT: slli a0, a0, 1 -; CHECK-RV32-FV-NEXT: mv a1, a0 -; CHECK-RV32-FV-NEXT: slli a0, a0, 1 -; CHECK-RV32-FV-NEXT: add a1, a1, a0 -; CHECK-RV32-FV-NEXT: slli a0, a0, 2 -; CHECK-RV32-FV-NEXT: add a0, a0, a1 -; CHECK-RV32-FV-NEXT: sub a0, s0, a0 -; CHECK-RV32-FV-NEXT: addi a0, a0, -160 -; CHECK-RV32-FV-NEXT: vs1r.v v21, (a0) # vscale x 8-byte Folded Spill -; CHECK-RV32-FV-NEXT: csrr a0, vlenb -; CHECK-RV32-FV-NEXT: mv a1, a0 -; CHECK-RV32-FV-NEXT: slli a0, a0, 1 -; CHECK-RV32-FV-NEXT: add a1, a1, a0 -; CHECK-RV32-FV-NEXT: slli a0, a0, 1 -; CHECK-RV32-FV-NEXT: add a1, a1, a0 -; CHECK-RV32-FV-NEXT: slli a0, a0, 2 -; CHECK-RV32-FV-NEXT: add a0, a0, a1 -; CHECK-RV32-FV-NEXT: sub a0, s0, a0 -; CHECK-RV32-FV-NEXT: addi a0, a0, -160 -; CHECK-RV32-FV-NEXT: vs1r.v v22, (a0) # vscale x 8-byte Folded Spill +; CHECK-RV32-FV-NEXT: vs8r.v v8, (a0) # vscale x 64-byte Folded Spill ; CHECK-RV32-FV-NEXT: csrr a0, vlenb ; CHECK-RV32-FV-NEXT: slli a0, a0, 3 ; CHECK-RV32-FV-NEXT: mv a1, a0 @@ -6407,331 +3086,36 @@ define void @foo_fp_with_call() #2 { ; CHECK-RV32-FV-NEXT: add a0, a0, a1 ; CHECK-RV32-FV-NEXT: sub a0, s0, a0 ; CHECK-RV32-FV-NEXT: addi a0, a0, -160 -; CHECK-RV32-FV-NEXT: vs1r.v v23, (a0) # vscale x 8-byte Folded Spill -; CHECK-RV32-FV-NEXT: csrr a0, vlenb -; CHECK-RV32-FV-NEXT: mv a1, a0 -; CHECK-RV32-FV-NEXT: slli a0, a0, 3 -; CHECK-RV32-FV-NEXT: add a1, a1, a0 -; CHECK-RV32-FV-NEXT: slli a0, a0, 1 -; CHECK-RV32-FV-NEXT: add a0, a0, a1 -; CHECK-RV32-FV-NEXT: sub a0, s0, a0 -; CHECK-RV32-FV-NEXT: addi a0, a0, -160 -; CHECK-RV32-FV-NEXT: vs1r.v v24, (a0) # vscale x 8-byte Folded Spill -; CHECK-RV32-FV-NEXT: csrr a0, vlenb -; CHECK-RV32-FV-NEXT: slli a0, a0, 1 -; CHECK-RV32-FV-NEXT: mv a1, a0 -; CHECK-RV32-FV-NEXT: slli a0, a0, 2 -; CHECK-RV32-FV-NEXT: add a1, a1, a0 -; CHECK-RV32-FV-NEXT: slli a0, a0, 1 -; CHECK-RV32-FV-NEXT: add a0, a0, a1 -; CHECK-RV32-FV-NEXT: sub a0, s0, a0 -; CHECK-RV32-FV-NEXT: addi a0, a0, -160 -; CHECK-RV32-FV-NEXT: vs1r.v v25, (a0) # vscale x 8-byte Folded Spill -; CHECK-RV32-FV-NEXT: csrr a0, vlenb -; CHECK-RV32-FV-NEXT: mv a1, a0 -; CHECK-RV32-FV-NEXT: slli a0, a0, 1 -; CHECK-RV32-FV-NEXT: add a1, a1, a0 -; CHECK-RV32-FV-NEXT: slli a0, a0, 2 -; CHECK-RV32-FV-NEXT: add a1, a1, a0 -; CHECK-RV32-FV-NEXT: slli a0, a0, 1 -; CHECK-RV32-FV-NEXT: add a0, a0, a1 -; CHECK-RV32-FV-NEXT: sub a0, s0, a0 -; CHECK-RV32-FV-NEXT: addi a0, a0, -160 -; CHECK-RV32-FV-NEXT: vs1r.v v26, (a0) # vscale x 8-byte Folded Spill -; CHECK-RV32-FV-NEXT: csrr a0, vlenb -; CHECK-RV32-FV-NEXT: slli a0, a0, 2 -; CHECK-RV32-FV-NEXT: mv a1, a0 -; CHECK-RV32-FV-NEXT: slli a0, a0, 1 -; CHECK-RV32-FV-NEXT: add a1, a1, a0 -; CHECK-RV32-FV-NEXT: slli a0, a0, 1 -; CHECK-RV32-FV-NEXT: add a0, a0, a1 -; CHECK-RV32-FV-NEXT: sub a0, s0, a0 -; CHECK-RV32-FV-NEXT: addi a0, a0, -160 -; CHECK-RV32-FV-NEXT: vs1r.v v27, (a0) # vscale x 8-byte Folded Spill -; CHECK-RV32-FV-NEXT: csrr a0, vlenb -; CHECK-RV32-FV-NEXT: mv a1, a0 -; CHECK-RV32-FV-NEXT: slli a0, a0, 2 -; CHECK-RV32-FV-NEXT: add a1, a1, a0 -; CHECK-RV32-FV-NEXT: slli a0, a0, 1 -; CHECK-RV32-FV-NEXT: add a1, a1, a0 -; CHECK-RV32-FV-NEXT: slli a0, a0, 1 -; CHECK-RV32-FV-NEXT: add a0, a0, a1 -; CHECK-RV32-FV-NEXT: sub a0, s0, a0 -; CHECK-RV32-FV-NEXT: addi a0, a0, -160 -; CHECK-RV32-FV-NEXT: vs1r.v v28, (a0) # vscale x 8-byte Folded Spill -; CHECK-RV32-FV-NEXT: csrr a0, vlenb -; CHECK-RV32-FV-NEXT: slli a0, a0, 1 -; CHECK-RV32-FV-NEXT: mv a1, a0 -; CHECK-RV32-FV-NEXT: slli a0, a0, 1 -; CHECK-RV32-FV-NEXT: add a1, a1, a0 -; CHECK-RV32-FV-NEXT: slli a0, a0, 1 -; CHECK-RV32-FV-NEXT: add a1, a1, a0 -; CHECK-RV32-FV-NEXT: slli a0, a0, 1 -; CHECK-RV32-FV-NEXT: add a0, a0, a1 -; CHECK-RV32-FV-NEXT: sub a0, s0, a0 -; CHECK-RV32-FV-NEXT: addi a0, a0, -160 -; CHECK-RV32-FV-NEXT: vs1r.v v29, (a0) # vscale x 8-byte Folded Spill -; CHECK-RV32-FV-NEXT: csrr a0, vlenb -; CHECK-RV32-FV-NEXT: slli a1, a0, 5 -; CHECK-RV32-FV-NEXT: sub a0, a1, a0 -; CHECK-RV32-FV-NEXT: sub a0, s0, a0 -; CHECK-RV32-FV-NEXT: addi a0, a0, -160 -; CHECK-RV32-FV-NEXT: vs1r.v v30, (a0) # vscale x 8-byte Folded Spill +; CHECK-RV32-FV-NEXT: vs8r.v v16, (a0) # vscale x 64-byte Folded Spill ; CHECK-RV32-FV-NEXT: csrr a0, vlenb ; CHECK-RV32-FV-NEXT: slli a0, a0, 5 ; CHECK-RV32-FV-NEXT: sub a0, s0, a0 ; CHECK-RV32-FV-NEXT: addi a0, a0, -160 -; CHECK-RV32-FV-NEXT: vs1r.v v31, (a0) # vscale x 8-byte Folded Spill +; CHECK-RV32-FV-NEXT: vs8r.v v24, (a0) # vscale x 64-byte Folded Spill ; CHECK-RV32-FV-NEXT: call otherfoo ; CHECK-RV32-FV-NEXT: csrr a0, vlenb -; CHECK-RV32-FV-NEXT: sub a0, s0, a0 -; CHECK-RV32-FV-NEXT: addi a0, a0, -160 -; CHECK-RV32-FV-NEXT: vl1r.v v0, (a0) # vscale x 8-byte Folded Reload -; CHECK-RV32-FV-NEXT: csrr a0, vlenb -; CHECK-RV32-FV-NEXT: slli a0, a0, 1 -; CHECK-RV32-FV-NEXT: sub a0, s0, a0 -; CHECK-RV32-FV-NEXT: addi a0, a0, -160 -; CHECK-RV32-FV-NEXT: vl1r.v v1, (a0) # vscale x 8-byte Folded Reload -; CHECK-RV32-FV-NEXT: csrr a0, vlenb -; CHECK-RV32-FV-NEXT: slli a1, a0, 1 -; CHECK-RV32-FV-NEXT: add a0, a1, a0 -; CHECK-RV32-FV-NEXT: sub a0, s0, a0 -; CHECK-RV32-FV-NEXT: addi a0, a0, -160 -; CHECK-RV32-FV-NEXT: vl1r.v v2, (a0) # vscale x 8-byte Folded Reload -; CHECK-RV32-FV-NEXT: csrr a0, vlenb -; CHECK-RV32-FV-NEXT: slli a0, a0, 2 -; CHECK-RV32-FV-NEXT: sub a0, s0, a0 -; CHECK-RV32-FV-NEXT: addi a0, a0, -160 -; CHECK-RV32-FV-NEXT: vl1r.v v3, (a0) # vscale x 8-byte Folded Reload -; CHECK-RV32-FV-NEXT: csrr a0, vlenb -; CHECK-RV32-FV-NEXT: slli a1, a0, 2 -; CHECK-RV32-FV-NEXT: add a0, a1, a0 -; CHECK-RV32-FV-NEXT: sub a0, s0, a0 -; CHECK-RV32-FV-NEXT: addi a0, a0, -160 -; CHECK-RV32-FV-NEXT: vl1r.v v4, (a0) # vscale x 8-byte Folded Reload -; CHECK-RV32-FV-NEXT: csrr a0, vlenb -; CHECK-RV32-FV-NEXT: slli a0, a0, 1 -; CHECK-RV32-FV-NEXT: mv a1, a0 -; CHECK-RV32-FV-NEXT: slli a0, a0, 1 -; CHECK-RV32-FV-NEXT: add a0, a0, a1 -; CHECK-RV32-FV-NEXT: sub a0, s0, a0 -; CHECK-RV32-FV-NEXT: addi a0, a0, -160 -; CHECK-RV32-FV-NEXT: vl1r.v v5, (a0) # vscale x 8-byte Folded Reload -; CHECK-RV32-FV-NEXT: csrr a0, vlenb -; CHECK-RV32-FV-NEXT: slli a1, a0, 3 -; CHECK-RV32-FV-NEXT: sub a0, a1, a0 -; CHECK-RV32-FV-NEXT: sub a0, s0, a0 -; CHECK-RV32-FV-NEXT: addi a0, a0, -160 -; CHECK-RV32-FV-NEXT: vl1r.v v6, (a0) # vscale x 8-byte Folded Reload -; CHECK-RV32-FV-NEXT: csrr a0, vlenb ; CHECK-RV32-FV-NEXT: slli a0, a0, 3 ; CHECK-RV32-FV-NEXT: sub a0, s0, a0 ; CHECK-RV32-FV-NEXT: addi a0, a0, -160 -; CHECK-RV32-FV-NEXT: vl1r.v v7, (a0) # vscale x 8-byte Folded Reload -; CHECK-RV32-FV-NEXT: csrr a0, vlenb -; CHECK-RV32-FV-NEXT: slli a1, a0, 3 -; CHECK-RV32-FV-NEXT: add a0, a1, a0 -; CHECK-RV32-FV-NEXT: sub a0, s0, a0 -; CHECK-RV32-FV-NEXT: addi a0, a0, -160 -; CHECK-RV32-FV-NEXT: vl1r.v v8, (a0) # vscale x 8-byte Folded Reload -; CHECK-RV32-FV-NEXT: csrr a0, vlenb -; CHECK-RV32-FV-NEXT: slli a0, a0, 1 -; CHECK-RV32-FV-NEXT: mv a1, a0 -; CHECK-RV32-FV-NEXT: slli a0, a0, 2 -; CHECK-RV32-FV-NEXT: add a0, a0, a1 -; CHECK-RV32-FV-NEXT: sub a0, s0, a0 -; CHECK-RV32-FV-NEXT: addi a0, a0, -160 -; CHECK-RV32-FV-NEXT: vl1r.v v9, (a0) # vscale x 8-byte Folded Reload -; CHECK-RV32-FV-NEXT: csrr a0, vlenb -; CHECK-RV32-FV-NEXT: mv a1, a0 -; CHECK-RV32-FV-NEXT: slli a0, a0, 1 -; CHECK-RV32-FV-NEXT: add a1, a1, a0 -; CHECK-RV32-FV-NEXT: slli a0, a0, 2 -; CHECK-RV32-FV-NEXT: add a0, a0, a1 -; CHECK-RV32-FV-NEXT: sub a0, s0, a0 -; CHECK-RV32-FV-NEXT: addi a0, a0, -160 -; CHECK-RV32-FV-NEXT: vl1r.v v10, (a0) # vscale x 8-byte Folded Reload -; CHECK-RV32-FV-NEXT: csrr a0, vlenb -; CHECK-RV32-FV-NEXT: slli a0, a0, 2 -; CHECK-RV32-FV-NEXT: mv a1, a0 -; CHECK-RV32-FV-NEXT: slli a0, a0, 1 -; CHECK-RV32-FV-NEXT: add a0, a0, a1 -; CHECK-RV32-FV-NEXT: sub a0, s0, a0 -; CHECK-RV32-FV-NEXT: addi a0, a0, -160 -; CHECK-RV32-FV-NEXT: vl1r.v v11, (a0) # vscale x 8-byte Folded Reload -; CHECK-RV32-FV-NEXT: csrr a0, vlenb -; CHECK-RV32-FV-NEXT: mv a1, a0 -; CHECK-RV32-FV-NEXT: slli a0, a0, 2 -; CHECK-RV32-FV-NEXT: add a1, a1, a0 -; CHECK-RV32-FV-NEXT: slli a0, a0, 1 -; CHECK-RV32-FV-NEXT: add a0, a0, a1 -; CHECK-RV32-FV-NEXT: sub a0, s0, a0 -; CHECK-RV32-FV-NEXT: addi a0, a0, -160 -; CHECK-RV32-FV-NEXT: vl1r.v v12, (a0) # vscale x 8-byte Folded Reload -; CHECK-RV32-FV-NEXT: csrr a0, vlenb -; CHECK-RV32-FV-NEXT: slli a0, a0, 1 -; CHECK-RV32-FV-NEXT: mv a1, a0 -; CHECK-RV32-FV-NEXT: slli a0, a0, 1 -; CHECK-RV32-FV-NEXT: add a1, a1, a0 -; CHECK-RV32-FV-NEXT: slli a0, a0, 1 -; CHECK-RV32-FV-NEXT: add a0, a0, a1 -; CHECK-RV32-FV-NEXT: sub a0, s0, a0 -; CHECK-RV32-FV-NEXT: addi a0, a0, -160 -; CHECK-RV32-FV-NEXT: vl1r.v v13, (a0) # vscale x 8-byte Folded Reload -; CHECK-RV32-FV-NEXT: csrr a0, vlenb -; CHECK-RV32-FV-NEXT: slli a1, a0, 4 -; CHECK-RV32-FV-NEXT: sub a0, a1, a0 -; CHECK-RV32-FV-NEXT: sub a0, s0, a0 -; CHECK-RV32-FV-NEXT: addi a0, a0, -160 -; CHECK-RV32-FV-NEXT: vl1r.v v14, (a0) # vscale x 8-byte Folded Reload +; CHECK-RV32-FV-NEXT: vl8r.v v0, (a0) # vscale x 64-byte Folded Reload ; CHECK-RV32-FV-NEXT: csrr a0, vlenb ; CHECK-RV32-FV-NEXT: slli a0, a0, 4 ; CHECK-RV32-FV-NEXT: sub a0, s0, a0 ; CHECK-RV32-FV-NEXT: addi a0, a0, -160 -; CHECK-RV32-FV-NEXT: vl1r.v v15, (a0) # vscale x 8-byte Folded Reload +; CHECK-RV32-FV-NEXT: vl8r.v v8, (a0) # vscale x 64-byte Folded Reload ; CHECK-RV32-FV-NEXT: csrr a0, vlenb -; CHECK-RV32-FV-NEXT: slli a1, a0, 4 -; CHECK-RV32-FV-NEXT: add a0, a1, a0 -; CHECK-RV32-FV-NEXT: sub a0, s0, a0 -; CHECK-RV32-FV-NEXT: addi a0, a0, -160 -; CHECK-RV32-FV-NEXT: vl1r.v v16, (a0) # vscale x 8-byte Folded Reload -; CHECK-RV32-FV-NEXT: csrr a0, vlenb -; CHECK-RV32-FV-NEXT: slli a0, a0, 1 -; CHECK-RV32-FV-NEXT: mv a1, a0 ; CHECK-RV32-FV-NEXT: slli a0, a0, 3 -; CHECK-RV32-FV-NEXT: add a0, a0, a1 -; CHECK-RV32-FV-NEXT: sub a0, s0, a0 -; CHECK-RV32-FV-NEXT: addi a0, a0, -160 -; CHECK-RV32-FV-NEXT: vl1r.v v17, (a0) # vscale x 8-byte Folded Reload -; CHECK-RV32-FV-NEXT: csrr a0, vlenb -; CHECK-RV32-FV-NEXT: mv a1, a0 -; CHECK-RV32-FV-NEXT: slli a0, a0, 1 -; CHECK-RV32-FV-NEXT: add a1, a1, a0 -; CHECK-RV32-FV-NEXT: slli a0, a0, 3 -; CHECK-RV32-FV-NEXT: add a0, a0, a1 -; CHECK-RV32-FV-NEXT: sub a0, s0, a0 -; CHECK-RV32-FV-NEXT: addi a0, a0, -160 -; CHECK-RV32-FV-NEXT: vl1r.v v18, (a0) # vscale x 8-byte Folded Reload -; CHECK-RV32-FV-NEXT: csrr a0, vlenb -; CHECK-RV32-FV-NEXT: slli a0, a0, 2 -; CHECK-RV32-FV-NEXT: mv a1, a0 -; CHECK-RV32-FV-NEXT: slli a0, a0, 2 -; CHECK-RV32-FV-NEXT: add a0, a0, a1 -; CHECK-RV32-FV-NEXT: sub a0, s0, a0 -; CHECK-RV32-FV-NEXT: addi a0, a0, -160 -; CHECK-RV32-FV-NEXT: vl1r.v v19, (a0) # vscale x 8-byte Folded Reload -; CHECK-RV32-FV-NEXT: csrr a0, vlenb -; CHECK-RV32-FV-NEXT: mv a1, a0 -; CHECK-RV32-FV-NEXT: slli a0, a0, 2 -; CHECK-RV32-FV-NEXT: add a1, a1, a0 -; CHECK-RV32-FV-NEXT: slli a0, a0, 2 -; CHECK-RV32-FV-NEXT: add a0, a0, a1 -; CHECK-RV32-FV-NEXT: sub a0, s0, a0 -; CHECK-RV32-FV-NEXT: addi a0, a0, -160 -; CHECK-RV32-FV-NEXT: vl1r.v v20, (a0) # vscale x 8-byte Folded Reload -; CHECK-RV32-FV-NEXT: csrr a0, vlenb -; CHECK-RV32-FV-NEXT: slli a0, a0, 1 -; CHECK-RV32-FV-NEXT: mv a1, a0 -; CHECK-RV32-FV-NEXT: slli a0, a0, 1 -; CHECK-RV32-FV-NEXT: add a1, a1, a0 -; CHECK-RV32-FV-NEXT: slli a0, a0, 2 -; CHECK-RV32-FV-NEXT: add a0, a0, a1 -; CHECK-RV32-FV-NEXT: sub a0, s0, a0 -; CHECK-RV32-FV-NEXT: addi a0, a0, -160 -; CHECK-RV32-FV-NEXT: vl1r.v v21, (a0) # vscale x 8-byte Folded Reload -; CHECK-RV32-FV-NEXT: csrr a0, vlenb -; CHECK-RV32-FV-NEXT: mv a1, a0 -; CHECK-RV32-FV-NEXT: slli a0, a0, 1 -; CHECK-RV32-FV-NEXT: add a1, a1, a0 -; CHECK-RV32-FV-NEXT: slli a0, a0, 1 -; CHECK-RV32-FV-NEXT: add a1, a1, a0 -; CHECK-RV32-FV-NEXT: slli a0, a0, 2 -; CHECK-RV32-FV-NEXT: add a0, a0, a1 -; CHECK-RV32-FV-NEXT: sub a0, s0, a0 -; CHECK-RV32-FV-NEXT: addi a0, a0, -160 -; CHECK-RV32-FV-NEXT: vl1r.v v22, (a0) # vscale x 8-byte Folded Reload -; CHECK-RV32-FV-NEXT: csrr a0, vlenb -; CHECK-RV32-FV-NEXT: slli a0, a0, 3 -; CHECK-RV32-FV-NEXT: mv a1, a0 -; CHECK-RV32-FV-NEXT: slli a0, a0, 1 -; CHECK-RV32-FV-NEXT: add a0, a0, a1 -; CHECK-RV32-FV-NEXT: sub a0, s0, a0 -; CHECK-RV32-FV-NEXT: addi a0, a0, -160 -; CHECK-RV32-FV-NEXT: vl1r.v v23, (a0) # vscale x 8-byte Folded Reload -; CHECK-RV32-FV-NEXT: csrr a0, vlenb -; CHECK-RV32-FV-NEXT: mv a1, a0 -; CHECK-RV32-FV-NEXT: slli a0, a0, 3 -; CHECK-RV32-FV-NEXT: add a1, a1, a0 -; CHECK-RV32-FV-NEXT: slli a0, a0, 1 -; CHECK-RV32-FV-NEXT: add a0, a0, a1 -; CHECK-RV32-FV-NEXT: sub a0, s0, a0 -; CHECK-RV32-FV-NEXT: addi a0, a0, -160 -; CHECK-RV32-FV-NEXT: vl1r.v v24, (a0) # vscale x 8-byte Folded Reload -; CHECK-RV32-FV-NEXT: csrr a0, vlenb -; CHECK-RV32-FV-NEXT: slli a0, a0, 1 ; CHECK-RV32-FV-NEXT: mv a1, a0 -; CHECK-RV32-FV-NEXT: slli a0, a0, 2 -; CHECK-RV32-FV-NEXT: add a1, a1, a0 ; CHECK-RV32-FV-NEXT: slli a0, a0, 1 ; CHECK-RV32-FV-NEXT: add a0, a0, a1 ; CHECK-RV32-FV-NEXT: sub a0, s0, a0 ; CHECK-RV32-FV-NEXT: addi a0, a0, -160 -; CHECK-RV32-FV-NEXT: vl1r.v v25, (a0) # vscale x 8-byte Folded Reload -; CHECK-RV32-FV-NEXT: csrr a0, vlenb -; CHECK-RV32-FV-NEXT: mv a1, a0 -; CHECK-RV32-FV-NEXT: slli a0, a0, 1 -; CHECK-RV32-FV-NEXT: add a1, a1, a0 -; CHECK-RV32-FV-NEXT: slli a0, a0, 2 -; CHECK-RV32-FV-NEXT: add a1, a1, a0 -; CHECK-RV32-FV-NEXT: slli a0, a0, 1 -; CHECK-RV32-FV-NEXT: add a0, a0, a1 -; CHECK-RV32-FV-NEXT: sub a0, s0, a0 -; CHECK-RV32-FV-NEXT: addi a0, a0, -160 -; CHECK-RV32-FV-NEXT: vl1r.v v26, (a0) # vscale x 8-byte Folded Reload -; CHECK-RV32-FV-NEXT: csrr a0, vlenb -; CHECK-RV32-FV-NEXT: slli a0, a0, 2 -; CHECK-RV32-FV-NEXT: mv a1, a0 -; CHECK-RV32-FV-NEXT: slli a0, a0, 1 -; CHECK-RV32-FV-NEXT: add a1, a1, a0 -; CHECK-RV32-FV-NEXT: slli a0, a0, 1 -; CHECK-RV32-FV-NEXT: add a0, a0, a1 -; CHECK-RV32-FV-NEXT: sub a0, s0, a0 -; CHECK-RV32-FV-NEXT: addi a0, a0, -160 -; CHECK-RV32-FV-NEXT: vl1r.v v27, (a0) # vscale x 8-byte Folded Reload -; CHECK-RV32-FV-NEXT: csrr a0, vlenb -; CHECK-RV32-FV-NEXT: mv a1, a0 -; CHECK-RV32-FV-NEXT: slli a0, a0, 2 -; CHECK-RV32-FV-NEXT: add a1, a1, a0 -; CHECK-RV32-FV-NEXT: slli a0, a0, 1 -; CHECK-RV32-FV-NEXT: add a1, a1, a0 -; CHECK-RV32-FV-NEXT: slli a0, a0, 1 -; CHECK-RV32-FV-NEXT: add a0, a0, a1 -; CHECK-RV32-FV-NEXT: sub a0, s0, a0 -; CHECK-RV32-FV-NEXT: addi a0, a0, -160 -; CHECK-RV32-FV-NEXT: vl1r.v v28, (a0) # vscale x 8-byte Folded Reload -; CHECK-RV32-FV-NEXT: csrr a0, vlenb -; CHECK-RV32-FV-NEXT: slli a0, a0, 1 -; CHECK-RV32-FV-NEXT: mv a1, a0 -; CHECK-RV32-FV-NEXT: slli a0, a0, 1 -; CHECK-RV32-FV-NEXT: add a1, a1, a0 -; CHECK-RV32-FV-NEXT: slli a0, a0, 1 -; CHECK-RV32-FV-NEXT: add a1, a1, a0 -; CHECK-RV32-FV-NEXT: slli a0, a0, 1 -; CHECK-RV32-FV-NEXT: add a0, a0, a1 -; CHECK-RV32-FV-NEXT: sub a0, s0, a0 -; CHECK-RV32-FV-NEXT: addi a0, a0, -160 -; CHECK-RV32-FV-NEXT: vl1r.v v29, (a0) # vscale x 8-byte Folded Reload -; CHECK-RV32-FV-NEXT: csrr a0, vlenb -; CHECK-RV32-FV-NEXT: slli a1, a0, 5 -; CHECK-RV32-FV-NEXT: sub a0, a1, a0 -; CHECK-RV32-FV-NEXT: sub a0, s0, a0 -; CHECK-RV32-FV-NEXT: addi a0, a0, -160 -; CHECK-RV32-FV-NEXT: vl1r.v v30, (a0) # vscale x 8-byte Folded Reload +; CHECK-RV32-FV-NEXT: vl8r.v v16, (a0) # vscale x 64-byte Folded Reload ; CHECK-RV32-FV-NEXT: csrr a0, vlenb ; CHECK-RV32-FV-NEXT: slli a0, a0, 5 ; CHECK-RV32-FV-NEXT: sub a0, s0, a0 ; CHECK-RV32-FV-NEXT: addi a0, a0, -160 -; CHECK-RV32-FV-NEXT: vl1r.v v31, (a0) # vscale x 8-byte Folded Reload +; CHECK-RV32-FV-NEXT: vl8r.v v24, (a0) # vscale x 64-byte Folded Reload ; CHECK-RV32-FV-NEXT: addi sp, s0, -160 ; CHECK-RV32-FV-NEXT: lw ra, 156(sp) # 4-byte Folded Reload ; CHECK-RV32-FV-NEXT: lw t0, 152(sp) # 4-byte Folded Reload @@ -6818,172 +3202,15 @@ define void @foo_fp_with_call() #2 { ; CHECK-RV32-FDV-NEXT: slli a0, a0, 5 ; CHECK-RV32-FDV-NEXT: sub sp, sp, a0 ; CHECK-RV32-FDV-NEXT: csrr a0, vlenb -; CHECK-RV32-FDV-NEXT: sub a0, s0, a0 -; CHECK-RV32-FDV-NEXT: addi a0, a0, -240 -; CHECK-RV32-FDV-NEXT: vs1r.v v0, (a0) # vscale x 8-byte Folded Spill -; CHECK-RV32-FDV-NEXT: csrr a0, vlenb -; CHECK-RV32-FDV-NEXT: slli a0, a0, 1 -; CHECK-RV32-FDV-NEXT: sub a0, s0, a0 -; CHECK-RV32-FDV-NEXT: addi a0, a0, -240 -; CHECK-RV32-FDV-NEXT: vs1r.v v1, (a0) # vscale x 8-byte Folded Spill -; CHECK-RV32-FDV-NEXT: csrr a0, vlenb -; CHECK-RV32-FDV-NEXT: slli a1, a0, 1 -; CHECK-RV32-FDV-NEXT: add a0, a1, a0 -; CHECK-RV32-FDV-NEXT: sub a0, s0, a0 -; CHECK-RV32-FDV-NEXT: addi a0, a0, -240 -; CHECK-RV32-FDV-NEXT: vs1r.v v2, (a0) # vscale x 8-byte Folded Spill -; CHECK-RV32-FDV-NEXT: csrr a0, vlenb -; CHECK-RV32-FDV-NEXT: slli a0, a0, 2 -; CHECK-RV32-FDV-NEXT: sub a0, s0, a0 -; CHECK-RV32-FDV-NEXT: addi a0, a0, -240 -; CHECK-RV32-FDV-NEXT: vs1r.v v3, (a0) # vscale x 8-byte Folded Spill -; CHECK-RV32-FDV-NEXT: csrr a0, vlenb -; CHECK-RV32-FDV-NEXT: slli a1, a0, 2 -; CHECK-RV32-FDV-NEXT: add a0, a1, a0 -; CHECK-RV32-FDV-NEXT: sub a0, s0, a0 -; CHECK-RV32-FDV-NEXT: addi a0, a0, -240 -; CHECK-RV32-FDV-NEXT: vs1r.v v4, (a0) # vscale x 8-byte Folded Spill -; CHECK-RV32-FDV-NEXT: csrr a0, vlenb -; CHECK-RV32-FDV-NEXT: slli a0, a0, 1 -; CHECK-RV32-FDV-NEXT: mv a1, a0 -; CHECK-RV32-FDV-NEXT: slli a0, a0, 1 -; CHECK-RV32-FDV-NEXT: add a0, a0, a1 -; CHECK-RV32-FDV-NEXT: sub a0, s0, a0 -; CHECK-RV32-FDV-NEXT: addi a0, a0, -240 -; CHECK-RV32-FDV-NEXT: vs1r.v v5, (a0) # vscale x 8-byte Folded Spill -; CHECK-RV32-FDV-NEXT: csrr a0, vlenb -; CHECK-RV32-FDV-NEXT: slli a1, a0, 3 -; CHECK-RV32-FDV-NEXT: sub a0, a1, a0 -; CHECK-RV32-FDV-NEXT: sub a0, s0, a0 -; CHECK-RV32-FDV-NEXT: addi a0, a0, -240 -; CHECK-RV32-FDV-NEXT: vs1r.v v6, (a0) # vscale x 8-byte Folded Spill -; CHECK-RV32-FDV-NEXT: csrr a0, vlenb ; CHECK-RV32-FDV-NEXT: slli a0, a0, 3 ; CHECK-RV32-FDV-NEXT: sub a0, s0, a0 ; CHECK-RV32-FDV-NEXT: addi a0, a0, -240 -; CHECK-RV32-FDV-NEXT: vs1r.v v7, (a0) # vscale x 8-byte Folded Spill -; CHECK-RV32-FDV-NEXT: csrr a0, vlenb -; CHECK-RV32-FDV-NEXT: slli a1, a0, 3 -; CHECK-RV32-FDV-NEXT: add a0, a1, a0 -; CHECK-RV32-FDV-NEXT: sub a0, s0, a0 -; CHECK-RV32-FDV-NEXT: addi a0, a0, -240 -; CHECK-RV32-FDV-NEXT: vs1r.v v8, (a0) # vscale x 8-byte Folded Spill -; CHECK-RV32-FDV-NEXT: csrr a0, vlenb -; CHECK-RV32-FDV-NEXT: slli a0, a0, 1 -; CHECK-RV32-FDV-NEXT: mv a1, a0 -; CHECK-RV32-FDV-NEXT: slli a0, a0, 2 -; CHECK-RV32-FDV-NEXT: add a0, a0, a1 -; CHECK-RV32-FDV-NEXT: sub a0, s0, a0 -; CHECK-RV32-FDV-NEXT: addi a0, a0, -240 -; CHECK-RV32-FDV-NEXT: vs1r.v v9, (a0) # vscale x 8-byte Folded Spill -; CHECK-RV32-FDV-NEXT: csrr a0, vlenb -; CHECK-RV32-FDV-NEXT: mv a1, a0 -; CHECK-RV32-FDV-NEXT: slli a0, a0, 1 -; CHECK-RV32-FDV-NEXT: add a1, a1, a0 -; CHECK-RV32-FDV-NEXT: slli a0, a0, 2 -; CHECK-RV32-FDV-NEXT: add a0, a0, a1 -; CHECK-RV32-FDV-NEXT: sub a0, s0, a0 -; CHECK-RV32-FDV-NEXT: addi a0, a0, -240 -; CHECK-RV32-FDV-NEXT: vs1r.v v10, (a0) # vscale x 8-byte Folded Spill -; CHECK-RV32-FDV-NEXT: csrr a0, vlenb -; CHECK-RV32-FDV-NEXT: slli a0, a0, 2 -; CHECK-RV32-FDV-NEXT: mv a1, a0 -; CHECK-RV32-FDV-NEXT: slli a0, a0, 1 -; CHECK-RV32-FDV-NEXT: add a0, a0, a1 -; CHECK-RV32-FDV-NEXT: sub a0, s0, a0 -; CHECK-RV32-FDV-NEXT: addi a0, a0, -240 -; CHECK-RV32-FDV-NEXT: vs1r.v v11, (a0) # vscale x 8-byte Folded Spill -; CHECK-RV32-FDV-NEXT: csrr a0, vlenb -; CHECK-RV32-FDV-NEXT: mv a1, a0 -; CHECK-RV32-FDV-NEXT: slli a0, a0, 2 -; CHECK-RV32-FDV-NEXT: add a1, a1, a0 -; CHECK-RV32-FDV-NEXT: slli a0, a0, 1 -; CHECK-RV32-FDV-NEXT: add a0, a0, a1 -; CHECK-RV32-FDV-NEXT: sub a0, s0, a0 -; CHECK-RV32-FDV-NEXT: addi a0, a0, -240 -; CHECK-RV32-FDV-NEXT: vs1r.v v12, (a0) # vscale x 8-byte Folded Spill -; CHECK-RV32-FDV-NEXT: csrr a0, vlenb -; CHECK-RV32-FDV-NEXT: slli a0, a0, 1 -; CHECK-RV32-FDV-NEXT: mv a1, a0 -; CHECK-RV32-FDV-NEXT: slli a0, a0, 1 -; CHECK-RV32-FDV-NEXT: add a1, a1, a0 -; CHECK-RV32-FDV-NEXT: slli a0, a0, 1 -; CHECK-RV32-FDV-NEXT: add a0, a0, a1 -; CHECK-RV32-FDV-NEXT: sub a0, s0, a0 -; CHECK-RV32-FDV-NEXT: addi a0, a0, -240 -; CHECK-RV32-FDV-NEXT: vs1r.v v13, (a0) # vscale x 8-byte Folded Spill -; CHECK-RV32-FDV-NEXT: csrr a0, vlenb -; CHECK-RV32-FDV-NEXT: slli a1, a0, 4 -; CHECK-RV32-FDV-NEXT: sub a0, a1, a0 -; CHECK-RV32-FDV-NEXT: sub a0, s0, a0 -; CHECK-RV32-FDV-NEXT: addi a0, a0, -240 -; CHECK-RV32-FDV-NEXT: vs1r.v v14, (a0) # vscale x 8-byte Folded Spill +; CHECK-RV32-FDV-NEXT: vs8r.v v0, (a0) # vscale x 64-byte Folded Spill ; CHECK-RV32-FDV-NEXT: csrr a0, vlenb ; CHECK-RV32-FDV-NEXT: slli a0, a0, 4 ; CHECK-RV32-FDV-NEXT: sub a0, s0, a0 ; CHECK-RV32-FDV-NEXT: addi a0, a0, -240 -; CHECK-RV32-FDV-NEXT: vs1r.v v15, (a0) # vscale x 8-byte Folded Spill -; CHECK-RV32-FDV-NEXT: csrr a0, vlenb -; CHECK-RV32-FDV-NEXT: slli a1, a0, 4 -; CHECK-RV32-FDV-NEXT: add a0, a1, a0 -; CHECK-RV32-FDV-NEXT: sub a0, s0, a0 -; CHECK-RV32-FDV-NEXT: addi a0, a0, -240 -; CHECK-RV32-FDV-NEXT: vs1r.v v16, (a0) # vscale x 8-byte Folded Spill -; CHECK-RV32-FDV-NEXT: csrr a0, vlenb -; CHECK-RV32-FDV-NEXT: slli a0, a0, 1 -; CHECK-RV32-FDV-NEXT: mv a1, a0 -; CHECK-RV32-FDV-NEXT: slli a0, a0, 3 -; CHECK-RV32-FDV-NEXT: add a0, a0, a1 -; CHECK-RV32-FDV-NEXT: sub a0, s0, a0 -; CHECK-RV32-FDV-NEXT: addi a0, a0, -240 -; CHECK-RV32-FDV-NEXT: vs1r.v v17, (a0) # vscale x 8-byte Folded Spill -; CHECK-RV32-FDV-NEXT: csrr a0, vlenb -; CHECK-RV32-FDV-NEXT: mv a1, a0 -; CHECK-RV32-FDV-NEXT: slli a0, a0, 1 -; CHECK-RV32-FDV-NEXT: add a1, a1, a0 -; CHECK-RV32-FDV-NEXT: slli a0, a0, 3 -; CHECK-RV32-FDV-NEXT: add a0, a0, a1 -; CHECK-RV32-FDV-NEXT: sub a0, s0, a0 -; CHECK-RV32-FDV-NEXT: addi a0, a0, -240 -; CHECK-RV32-FDV-NEXT: vs1r.v v18, (a0) # vscale x 8-byte Folded Spill -; CHECK-RV32-FDV-NEXT: csrr a0, vlenb -; CHECK-RV32-FDV-NEXT: slli a0, a0, 2 -; CHECK-RV32-FDV-NEXT: mv a1, a0 -; CHECK-RV32-FDV-NEXT: slli a0, a0, 2 -; CHECK-RV32-FDV-NEXT: add a0, a0, a1 -; CHECK-RV32-FDV-NEXT: sub a0, s0, a0 -; CHECK-RV32-FDV-NEXT: addi a0, a0, -240 -; CHECK-RV32-FDV-NEXT: vs1r.v v19, (a0) # vscale x 8-byte Folded Spill -; CHECK-RV32-FDV-NEXT: csrr a0, vlenb -; CHECK-RV32-FDV-NEXT: mv a1, a0 -; CHECK-RV32-FDV-NEXT: slli a0, a0, 2 -; CHECK-RV32-FDV-NEXT: add a1, a1, a0 -; CHECK-RV32-FDV-NEXT: slli a0, a0, 2 -; CHECK-RV32-FDV-NEXT: add a0, a0, a1 -; CHECK-RV32-FDV-NEXT: sub a0, s0, a0 -; CHECK-RV32-FDV-NEXT: addi a0, a0, -240 -; CHECK-RV32-FDV-NEXT: vs1r.v v20, (a0) # vscale x 8-byte Folded Spill -; CHECK-RV32-FDV-NEXT: csrr a0, vlenb -; CHECK-RV32-FDV-NEXT: slli a0, a0, 1 -; CHECK-RV32-FDV-NEXT: mv a1, a0 -; CHECK-RV32-FDV-NEXT: slli a0, a0, 1 -; CHECK-RV32-FDV-NEXT: add a1, a1, a0 -; CHECK-RV32-FDV-NEXT: slli a0, a0, 2 -; CHECK-RV32-FDV-NEXT: add a0, a0, a1 -; CHECK-RV32-FDV-NEXT: sub a0, s0, a0 -; CHECK-RV32-FDV-NEXT: addi a0, a0, -240 -; CHECK-RV32-FDV-NEXT: vs1r.v v21, (a0) # vscale x 8-byte Folded Spill -; CHECK-RV32-FDV-NEXT: csrr a0, vlenb -; CHECK-RV32-FDV-NEXT: mv a1, a0 -; CHECK-RV32-FDV-NEXT: slli a0, a0, 1 -; CHECK-RV32-FDV-NEXT: add a1, a1, a0 -; CHECK-RV32-FDV-NEXT: slli a0, a0, 1 -; CHECK-RV32-FDV-NEXT: add a1, a1, a0 -; CHECK-RV32-FDV-NEXT: slli a0, a0, 2 -; CHECK-RV32-FDV-NEXT: add a0, a0, a1 -; CHECK-RV32-FDV-NEXT: sub a0, s0, a0 -; CHECK-RV32-FDV-NEXT: addi a0, a0, -240 -; CHECK-RV32-FDV-NEXT: vs1r.v v22, (a0) # vscale x 8-byte Folded Spill +; CHECK-RV32-FDV-NEXT: vs8r.v v8, (a0) # vscale x 64-byte Folded Spill ; CHECK-RV32-FDV-NEXT: csrr a0, vlenb ; CHECK-RV32-FDV-NEXT: slli a0, a0, 3 ; CHECK-RV32-FDV-NEXT: mv a1, a0 @@ -6991,249 +3218,23 @@ define void @foo_fp_with_call() #2 { ; CHECK-RV32-FDV-NEXT: add a0, a0, a1 ; CHECK-RV32-FDV-NEXT: sub a0, s0, a0 ; CHECK-RV32-FDV-NEXT: addi a0, a0, -240 -; CHECK-RV32-FDV-NEXT: vs1r.v v23, (a0) # vscale x 8-byte Folded Spill -; CHECK-RV32-FDV-NEXT: csrr a0, vlenb -; CHECK-RV32-FDV-NEXT: mv a1, a0 -; CHECK-RV32-FDV-NEXT: slli a0, a0, 3 -; CHECK-RV32-FDV-NEXT: add a1, a1, a0 -; CHECK-RV32-FDV-NEXT: slli a0, a0, 1 -; CHECK-RV32-FDV-NEXT: add a0, a0, a1 -; CHECK-RV32-FDV-NEXT: sub a0, s0, a0 -; CHECK-RV32-FDV-NEXT: addi a0, a0, -240 -; CHECK-RV32-FDV-NEXT: vs1r.v v24, (a0) # vscale x 8-byte Folded Spill -; CHECK-RV32-FDV-NEXT: csrr a0, vlenb -; CHECK-RV32-FDV-NEXT: slli a0, a0, 1 -; CHECK-RV32-FDV-NEXT: mv a1, a0 -; CHECK-RV32-FDV-NEXT: slli a0, a0, 2 -; CHECK-RV32-FDV-NEXT: add a1, a1, a0 -; CHECK-RV32-FDV-NEXT: slli a0, a0, 1 -; CHECK-RV32-FDV-NEXT: add a0, a0, a1 -; CHECK-RV32-FDV-NEXT: sub a0, s0, a0 -; CHECK-RV32-FDV-NEXT: addi a0, a0, -240 -; CHECK-RV32-FDV-NEXT: vs1r.v v25, (a0) # vscale x 8-byte Folded Spill -; CHECK-RV32-FDV-NEXT: csrr a0, vlenb -; CHECK-RV32-FDV-NEXT: mv a1, a0 -; CHECK-RV32-FDV-NEXT: slli a0, a0, 1 -; CHECK-RV32-FDV-NEXT: add a1, a1, a0 -; CHECK-RV32-FDV-NEXT: slli a0, a0, 2 -; CHECK-RV32-FDV-NEXT: add a1, a1, a0 -; CHECK-RV32-FDV-NEXT: slli a0, a0, 1 -; CHECK-RV32-FDV-NEXT: add a0, a0, a1 -; CHECK-RV32-FDV-NEXT: sub a0, s0, a0 -; CHECK-RV32-FDV-NEXT: addi a0, a0, -240 -; CHECK-RV32-FDV-NEXT: vs1r.v v26, (a0) # vscale x 8-byte Folded Spill -; CHECK-RV32-FDV-NEXT: csrr a0, vlenb -; CHECK-RV32-FDV-NEXT: slli a0, a0, 2 -; CHECK-RV32-FDV-NEXT: mv a1, a0 -; CHECK-RV32-FDV-NEXT: slli a0, a0, 1 -; CHECK-RV32-FDV-NEXT: add a1, a1, a0 -; CHECK-RV32-FDV-NEXT: slli a0, a0, 1 -; CHECK-RV32-FDV-NEXT: add a0, a0, a1 -; CHECK-RV32-FDV-NEXT: sub a0, s0, a0 -; CHECK-RV32-FDV-NEXT: addi a0, a0, -240 -; CHECK-RV32-FDV-NEXT: vs1r.v v27, (a0) # vscale x 8-byte Folded Spill -; CHECK-RV32-FDV-NEXT: csrr a0, vlenb -; CHECK-RV32-FDV-NEXT: mv a1, a0 -; CHECK-RV32-FDV-NEXT: slli a0, a0, 2 -; CHECK-RV32-FDV-NEXT: add a1, a1, a0 -; CHECK-RV32-FDV-NEXT: slli a0, a0, 1 -; CHECK-RV32-FDV-NEXT: add a1, a1, a0 -; CHECK-RV32-FDV-NEXT: slli a0, a0, 1 -; CHECK-RV32-FDV-NEXT: add a0, a0, a1 -; CHECK-RV32-FDV-NEXT: sub a0, s0, a0 -; CHECK-RV32-FDV-NEXT: addi a0, a0, -240 -; CHECK-RV32-FDV-NEXT: vs1r.v v28, (a0) # vscale x 8-byte Folded Spill -; CHECK-RV32-FDV-NEXT: csrr a0, vlenb -; CHECK-RV32-FDV-NEXT: slli a0, a0, 1 -; CHECK-RV32-FDV-NEXT: mv a1, a0 -; CHECK-RV32-FDV-NEXT: slli a0, a0, 1 -; CHECK-RV32-FDV-NEXT: add a1, a1, a0 -; CHECK-RV32-FDV-NEXT: slli a0, a0, 1 -; CHECK-RV32-FDV-NEXT: add a1, a1, a0 -; CHECK-RV32-FDV-NEXT: slli a0, a0, 1 -; CHECK-RV32-FDV-NEXT: add a0, a0, a1 -; CHECK-RV32-FDV-NEXT: sub a0, s0, a0 -; CHECK-RV32-FDV-NEXT: addi a0, a0, -240 -; CHECK-RV32-FDV-NEXT: vs1r.v v29, (a0) # vscale x 8-byte Folded Spill -; CHECK-RV32-FDV-NEXT: csrr a0, vlenb -; CHECK-RV32-FDV-NEXT: slli a1, a0, 5 -; CHECK-RV32-FDV-NEXT: sub a0, a1, a0 -; CHECK-RV32-FDV-NEXT: sub a0, s0, a0 -; CHECK-RV32-FDV-NEXT: addi a0, a0, -240 -; CHECK-RV32-FDV-NEXT: vs1r.v v30, (a0) # vscale x 8-byte Folded Spill +; CHECK-RV32-FDV-NEXT: vs8r.v v16, (a0) # vscale x 64-byte Folded Spill ; CHECK-RV32-FDV-NEXT: csrr a0, vlenb ; CHECK-RV32-FDV-NEXT: slli a0, a0, 5 ; CHECK-RV32-FDV-NEXT: sub a0, s0, a0 ; CHECK-RV32-FDV-NEXT: addi a0, a0, -240 -; CHECK-RV32-FDV-NEXT: vs1r.v v31, (a0) # vscale x 8-byte Folded Spill +; CHECK-RV32-FDV-NEXT: vs8r.v v24, (a0) # vscale x 64-byte Folded Spill ; CHECK-RV32-FDV-NEXT: call otherfoo ; CHECK-RV32-FDV-NEXT: csrr a0, vlenb -; CHECK-RV32-FDV-NEXT: sub a0, s0, a0 -; CHECK-RV32-FDV-NEXT: addi a0, a0, -240 -; CHECK-RV32-FDV-NEXT: vl1r.v v0, (a0) # vscale x 8-byte Folded Reload -; CHECK-RV32-FDV-NEXT: csrr a0, vlenb -; CHECK-RV32-FDV-NEXT: slli a0, a0, 1 -; CHECK-RV32-FDV-NEXT: sub a0, s0, a0 -; CHECK-RV32-FDV-NEXT: addi a0, a0, -240 -; CHECK-RV32-FDV-NEXT: vl1r.v v1, (a0) # vscale x 8-byte Folded Reload -; CHECK-RV32-FDV-NEXT: csrr a0, vlenb -; CHECK-RV32-FDV-NEXT: slli a1, a0, 1 -; CHECK-RV32-FDV-NEXT: add a0, a1, a0 -; CHECK-RV32-FDV-NEXT: sub a0, s0, a0 -; CHECK-RV32-FDV-NEXT: addi a0, a0, -240 -; CHECK-RV32-FDV-NEXT: vl1r.v v2, (a0) # vscale x 8-byte Folded Reload -; CHECK-RV32-FDV-NEXT: csrr a0, vlenb -; CHECK-RV32-FDV-NEXT: slli a0, a0, 2 -; CHECK-RV32-FDV-NEXT: sub a0, s0, a0 -; CHECK-RV32-FDV-NEXT: addi a0, a0, -240 -; CHECK-RV32-FDV-NEXT: vl1r.v v3, (a0) # vscale x 8-byte Folded Reload -; CHECK-RV32-FDV-NEXT: csrr a0, vlenb -; CHECK-RV32-FDV-NEXT: slli a1, a0, 2 -; CHECK-RV32-FDV-NEXT: add a0, a1, a0 -; CHECK-RV32-FDV-NEXT: sub a0, s0, a0 -; CHECK-RV32-FDV-NEXT: addi a0, a0, -240 -; CHECK-RV32-FDV-NEXT: vl1r.v v4, (a0) # vscale x 8-byte Folded Reload -; CHECK-RV32-FDV-NEXT: csrr a0, vlenb -; CHECK-RV32-FDV-NEXT: slli a0, a0, 1 -; CHECK-RV32-FDV-NEXT: mv a1, a0 -; CHECK-RV32-FDV-NEXT: slli a0, a0, 1 -; CHECK-RV32-FDV-NEXT: add a0, a0, a1 -; CHECK-RV32-FDV-NEXT: sub a0, s0, a0 -; CHECK-RV32-FDV-NEXT: addi a0, a0, -240 -; CHECK-RV32-FDV-NEXT: vl1r.v v5, (a0) # vscale x 8-byte Folded Reload -; CHECK-RV32-FDV-NEXT: csrr a0, vlenb -; CHECK-RV32-FDV-NEXT: slli a1, a0, 3 -; CHECK-RV32-FDV-NEXT: sub a0, a1, a0 -; CHECK-RV32-FDV-NEXT: sub a0, s0, a0 -; CHECK-RV32-FDV-NEXT: addi a0, a0, -240 -; CHECK-RV32-FDV-NEXT: vl1r.v v6, (a0) # vscale x 8-byte Folded Reload -; CHECK-RV32-FDV-NEXT: csrr a0, vlenb ; CHECK-RV32-FDV-NEXT: slli a0, a0, 3 ; CHECK-RV32-FDV-NEXT: sub a0, s0, a0 ; CHECK-RV32-FDV-NEXT: addi a0, a0, -240 -; CHECK-RV32-FDV-NEXT: vl1r.v v7, (a0) # vscale x 8-byte Folded Reload -; CHECK-RV32-FDV-NEXT: csrr a0, vlenb -; CHECK-RV32-FDV-NEXT: slli a1, a0, 3 -; CHECK-RV32-FDV-NEXT: add a0, a1, a0 -; CHECK-RV32-FDV-NEXT: sub a0, s0, a0 -; CHECK-RV32-FDV-NEXT: addi a0, a0, -240 -; CHECK-RV32-FDV-NEXT: vl1r.v v8, (a0) # vscale x 8-byte Folded Reload -; CHECK-RV32-FDV-NEXT: csrr a0, vlenb -; CHECK-RV32-FDV-NEXT: slli a0, a0, 1 -; CHECK-RV32-FDV-NEXT: mv a1, a0 -; CHECK-RV32-FDV-NEXT: slli a0, a0, 2 -; CHECK-RV32-FDV-NEXT: add a0, a0, a1 -; CHECK-RV32-FDV-NEXT: sub a0, s0, a0 -; CHECK-RV32-FDV-NEXT: addi a0, a0, -240 -; CHECK-RV32-FDV-NEXT: vl1r.v v9, (a0) # vscale x 8-byte Folded Reload -; CHECK-RV32-FDV-NEXT: csrr a0, vlenb -; CHECK-RV32-FDV-NEXT: mv a1, a0 -; CHECK-RV32-FDV-NEXT: slli a0, a0, 1 -; CHECK-RV32-FDV-NEXT: add a1, a1, a0 -; CHECK-RV32-FDV-NEXT: slli a0, a0, 2 -; CHECK-RV32-FDV-NEXT: add a0, a0, a1 -; CHECK-RV32-FDV-NEXT: sub a0, s0, a0 -; CHECK-RV32-FDV-NEXT: addi a0, a0, -240 -; CHECK-RV32-FDV-NEXT: vl1r.v v10, (a0) # vscale x 8-byte Folded Reload -; CHECK-RV32-FDV-NEXT: csrr a0, vlenb -; CHECK-RV32-FDV-NEXT: slli a0, a0, 2 -; CHECK-RV32-FDV-NEXT: mv a1, a0 -; CHECK-RV32-FDV-NEXT: slli a0, a0, 1 -; CHECK-RV32-FDV-NEXT: add a0, a0, a1 -; CHECK-RV32-FDV-NEXT: sub a0, s0, a0 -; CHECK-RV32-FDV-NEXT: addi a0, a0, -240 -; CHECK-RV32-FDV-NEXT: vl1r.v v11, (a0) # vscale x 8-byte Folded Reload -; CHECK-RV32-FDV-NEXT: csrr a0, vlenb -; CHECK-RV32-FDV-NEXT: mv a1, a0 -; CHECK-RV32-FDV-NEXT: slli a0, a0, 2 -; CHECK-RV32-FDV-NEXT: add a1, a1, a0 -; CHECK-RV32-FDV-NEXT: slli a0, a0, 1 -; CHECK-RV32-FDV-NEXT: add a0, a0, a1 -; CHECK-RV32-FDV-NEXT: sub a0, s0, a0 -; CHECK-RV32-FDV-NEXT: addi a0, a0, -240 -; CHECK-RV32-FDV-NEXT: vl1r.v v12, (a0) # vscale x 8-byte Folded Reload -; CHECK-RV32-FDV-NEXT: csrr a0, vlenb -; CHECK-RV32-FDV-NEXT: slli a0, a0, 1 -; CHECK-RV32-FDV-NEXT: mv a1, a0 -; CHECK-RV32-FDV-NEXT: slli a0, a0, 1 -; CHECK-RV32-FDV-NEXT: add a1, a1, a0 -; CHECK-RV32-FDV-NEXT: slli a0, a0, 1 -; CHECK-RV32-FDV-NEXT: add a0, a0, a1 -; CHECK-RV32-FDV-NEXT: sub a0, s0, a0 -; CHECK-RV32-FDV-NEXT: addi a0, a0, -240 -; CHECK-RV32-FDV-NEXT: vl1r.v v13, (a0) # vscale x 8-byte Folded Reload -; CHECK-RV32-FDV-NEXT: csrr a0, vlenb -; CHECK-RV32-FDV-NEXT: slli a1, a0, 4 -; CHECK-RV32-FDV-NEXT: sub a0, a1, a0 -; CHECK-RV32-FDV-NEXT: sub a0, s0, a0 -; CHECK-RV32-FDV-NEXT: addi a0, a0, -240 -; CHECK-RV32-FDV-NEXT: vl1r.v v14, (a0) # vscale x 8-byte Folded Reload +; CHECK-RV32-FDV-NEXT: vl8r.v v0, (a0) # vscale x 64-byte Folded Reload ; CHECK-RV32-FDV-NEXT: csrr a0, vlenb ; CHECK-RV32-FDV-NEXT: slli a0, a0, 4 ; CHECK-RV32-FDV-NEXT: sub a0, s0, a0 ; CHECK-RV32-FDV-NEXT: addi a0, a0, -240 -; CHECK-RV32-FDV-NEXT: vl1r.v v15, (a0) # vscale x 8-byte Folded Reload -; CHECK-RV32-FDV-NEXT: csrr a0, vlenb -; CHECK-RV32-FDV-NEXT: slli a1, a0, 4 -; CHECK-RV32-FDV-NEXT: add a0, a1, a0 -; CHECK-RV32-FDV-NEXT: sub a0, s0, a0 -; CHECK-RV32-FDV-NEXT: addi a0, a0, -240 -; CHECK-RV32-FDV-NEXT: vl1r.v v16, (a0) # vscale x 8-byte Folded Reload -; CHECK-RV32-FDV-NEXT: csrr a0, vlenb -; CHECK-RV32-FDV-NEXT: slli a0, a0, 1 -; CHECK-RV32-FDV-NEXT: mv a1, a0 -; CHECK-RV32-FDV-NEXT: slli a0, a0, 3 -; CHECK-RV32-FDV-NEXT: add a0, a0, a1 -; CHECK-RV32-FDV-NEXT: sub a0, s0, a0 -; CHECK-RV32-FDV-NEXT: addi a0, a0, -240 -; CHECK-RV32-FDV-NEXT: vl1r.v v17, (a0) # vscale x 8-byte Folded Reload -; CHECK-RV32-FDV-NEXT: csrr a0, vlenb -; CHECK-RV32-FDV-NEXT: mv a1, a0 -; CHECK-RV32-FDV-NEXT: slli a0, a0, 1 -; CHECK-RV32-FDV-NEXT: add a1, a1, a0 -; CHECK-RV32-FDV-NEXT: slli a0, a0, 3 -; CHECK-RV32-FDV-NEXT: add a0, a0, a1 -; CHECK-RV32-FDV-NEXT: sub a0, s0, a0 -; CHECK-RV32-FDV-NEXT: addi a0, a0, -240 -; CHECK-RV32-FDV-NEXT: vl1r.v v18, (a0) # vscale x 8-byte Folded Reload -; CHECK-RV32-FDV-NEXT: csrr a0, vlenb -; CHECK-RV32-FDV-NEXT: slli a0, a0, 2 -; CHECK-RV32-FDV-NEXT: mv a1, a0 -; CHECK-RV32-FDV-NEXT: slli a0, a0, 2 -; CHECK-RV32-FDV-NEXT: add a0, a0, a1 -; CHECK-RV32-FDV-NEXT: sub a0, s0, a0 -; CHECK-RV32-FDV-NEXT: addi a0, a0, -240 -; CHECK-RV32-FDV-NEXT: vl1r.v v19, (a0) # vscale x 8-byte Folded Reload -; CHECK-RV32-FDV-NEXT: csrr a0, vlenb -; CHECK-RV32-FDV-NEXT: mv a1, a0 -; CHECK-RV32-FDV-NEXT: slli a0, a0, 2 -; CHECK-RV32-FDV-NEXT: add a1, a1, a0 -; CHECK-RV32-FDV-NEXT: slli a0, a0, 2 -; CHECK-RV32-FDV-NEXT: add a0, a0, a1 -; CHECK-RV32-FDV-NEXT: sub a0, s0, a0 -; CHECK-RV32-FDV-NEXT: addi a0, a0, -240 -; CHECK-RV32-FDV-NEXT: vl1r.v v20, (a0) # vscale x 8-byte Folded Reload -; CHECK-RV32-FDV-NEXT: csrr a0, vlenb -; CHECK-RV32-FDV-NEXT: slli a0, a0, 1 -; CHECK-RV32-FDV-NEXT: mv a1, a0 -; CHECK-RV32-FDV-NEXT: slli a0, a0, 1 -; CHECK-RV32-FDV-NEXT: add a1, a1, a0 -; CHECK-RV32-FDV-NEXT: slli a0, a0, 2 -; CHECK-RV32-FDV-NEXT: add a0, a0, a1 -; CHECK-RV32-FDV-NEXT: sub a0, s0, a0 -; CHECK-RV32-FDV-NEXT: addi a0, a0, -240 -; CHECK-RV32-FDV-NEXT: vl1r.v v21, (a0) # vscale x 8-byte Folded Reload -; CHECK-RV32-FDV-NEXT: csrr a0, vlenb -; CHECK-RV32-FDV-NEXT: mv a1, a0 -; CHECK-RV32-FDV-NEXT: slli a0, a0, 1 -; CHECK-RV32-FDV-NEXT: add a1, a1, a0 -; CHECK-RV32-FDV-NEXT: slli a0, a0, 1 -; CHECK-RV32-FDV-NEXT: add a1, a1, a0 -; CHECK-RV32-FDV-NEXT: slli a0, a0, 2 -; CHECK-RV32-FDV-NEXT: add a0, a0, a1 -; CHECK-RV32-FDV-NEXT: sub a0, s0, a0 -; CHECK-RV32-FDV-NEXT: addi a0, a0, -240 -; CHECK-RV32-FDV-NEXT: vl1r.v v22, (a0) # vscale x 8-byte Folded Reload +; CHECK-RV32-FDV-NEXT: vl8r.v v8, (a0) # vscale x 64-byte Folded Reload ; CHECK-RV32-FDV-NEXT: csrr a0, vlenb ; CHECK-RV32-FDV-NEXT: slli a0, a0, 3 ; CHECK-RV32-FDV-NEXT: mv a1, a0 @@ -7241,81 +3242,12 @@ define void @foo_fp_with_call() #2 { ; CHECK-RV32-FDV-NEXT: add a0, a0, a1 ; CHECK-RV32-FDV-NEXT: sub a0, s0, a0 ; CHECK-RV32-FDV-NEXT: addi a0, a0, -240 -; CHECK-RV32-FDV-NEXT: vl1r.v v23, (a0) # vscale x 8-byte Folded Reload -; CHECK-RV32-FDV-NEXT: csrr a0, vlenb -; CHECK-RV32-FDV-NEXT: mv a1, a0 -; CHECK-RV32-FDV-NEXT: slli a0, a0, 3 -; CHECK-RV32-FDV-NEXT: add a1, a1, a0 -; CHECK-RV32-FDV-NEXT: slli a0, a0, 1 -; CHECK-RV32-FDV-NEXT: add a0, a0, a1 -; CHECK-RV32-FDV-NEXT: sub a0, s0, a0 -; CHECK-RV32-FDV-NEXT: addi a0, a0, -240 -; CHECK-RV32-FDV-NEXT: vl1r.v v24, (a0) # vscale x 8-byte Folded Reload -; CHECK-RV32-FDV-NEXT: csrr a0, vlenb -; CHECK-RV32-FDV-NEXT: slli a0, a0, 1 -; CHECK-RV32-FDV-NEXT: mv a1, a0 -; CHECK-RV32-FDV-NEXT: slli a0, a0, 2 -; CHECK-RV32-FDV-NEXT: add a1, a1, a0 -; CHECK-RV32-FDV-NEXT: slli a0, a0, 1 -; CHECK-RV32-FDV-NEXT: add a0, a0, a1 -; CHECK-RV32-FDV-NEXT: sub a0, s0, a0 -; CHECK-RV32-FDV-NEXT: addi a0, a0, -240 -; CHECK-RV32-FDV-NEXT: vl1r.v v25, (a0) # vscale x 8-byte Folded Reload -; CHECK-RV32-FDV-NEXT: csrr a0, vlenb -; CHECK-RV32-FDV-NEXT: mv a1, a0 -; CHECK-RV32-FDV-NEXT: slli a0, a0, 1 -; CHECK-RV32-FDV-NEXT: add a1, a1, a0 -; CHECK-RV32-FDV-NEXT: slli a0, a0, 2 -; CHECK-RV32-FDV-NEXT: add a1, a1, a0 -; CHECK-RV32-FDV-NEXT: slli a0, a0, 1 -; CHECK-RV32-FDV-NEXT: add a0, a0, a1 -; CHECK-RV32-FDV-NEXT: sub a0, s0, a0 -; CHECK-RV32-FDV-NEXT: addi a0, a0, -240 -; CHECK-RV32-FDV-NEXT: vl1r.v v26, (a0) # vscale x 8-byte Folded Reload -; CHECK-RV32-FDV-NEXT: csrr a0, vlenb -; CHECK-RV32-FDV-NEXT: slli a0, a0, 2 -; CHECK-RV32-FDV-NEXT: mv a1, a0 -; CHECK-RV32-FDV-NEXT: slli a0, a0, 1 -; CHECK-RV32-FDV-NEXT: add a1, a1, a0 -; CHECK-RV32-FDV-NEXT: slli a0, a0, 1 -; CHECK-RV32-FDV-NEXT: add a0, a0, a1 -; CHECK-RV32-FDV-NEXT: sub a0, s0, a0 -; CHECK-RV32-FDV-NEXT: addi a0, a0, -240 -; CHECK-RV32-FDV-NEXT: vl1r.v v27, (a0) # vscale x 8-byte Folded Reload -; CHECK-RV32-FDV-NEXT: csrr a0, vlenb -; CHECK-RV32-FDV-NEXT: mv a1, a0 -; CHECK-RV32-FDV-NEXT: slli a0, a0, 2 -; CHECK-RV32-FDV-NEXT: add a1, a1, a0 -; CHECK-RV32-FDV-NEXT: slli a0, a0, 1 -; CHECK-RV32-FDV-NEXT: add a1, a1, a0 -; CHECK-RV32-FDV-NEXT: slli a0, a0, 1 -; CHECK-RV32-FDV-NEXT: add a0, a0, a1 -; CHECK-RV32-FDV-NEXT: sub a0, s0, a0 -; CHECK-RV32-FDV-NEXT: addi a0, a0, -240 -; CHECK-RV32-FDV-NEXT: vl1r.v v28, (a0) # vscale x 8-byte Folded Reload -; CHECK-RV32-FDV-NEXT: csrr a0, vlenb -; CHECK-RV32-FDV-NEXT: slli a0, a0, 1 -; CHECK-RV32-FDV-NEXT: mv a1, a0 -; CHECK-RV32-FDV-NEXT: slli a0, a0, 1 -; CHECK-RV32-FDV-NEXT: add a1, a1, a0 -; CHECK-RV32-FDV-NEXT: slli a0, a0, 1 -; CHECK-RV32-FDV-NEXT: add a1, a1, a0 -; CHECK-RV32-FDV-NEXT: slli a0, a0, 1 -; CHECK-RV32-FDV-NEXT: add a0, a0, a1 -; CHECK-RV32-FDV-NEXT: sub a0, s0, a0 -; CHECK-RV32-FDV-NEXT: addi a0, a0, -240 -; CHECK-RV32-FDV-NEXT: vl1r.v v29, (a0) # vscale x 8-byte Folded Reload -; CHECK-RV32-FDV-NEXT: csrr a0, vlenb -; CHECK-RV32-FDV-NEXT: slli a1, a0, 5 -; CHECK-RV32-FDV-NEXT: sub a0, a1, a0 -; CHECK-RV32-FDV-NEXT: sub a0, s0, a0 -; CHECK-RV32-FDV-NEXT: addi a0, a0, -240 -; CHECK-RV32-FDV-NEXT: vl1r.v v30, (a0) # vscale x 8-byte Folded Reload +; CHECK-RV32-FDV-NEXT: vl8r.v v16, (a0) # vscale x 64-byte Folded Reload ; CHECK-RV32-FDV-NEXT: csrr a0, vlenb ; CHECK-RV32-FDV-NEXT: slli a0, a0, 5 ; CHECK-RV32-FDV-NEXT: sub a0, s0, a0 ; CHECK-RV32-FDV-NEXT: addi a0, a0, -240 -; CHECK-RV32-FDV-NEXT: vl1r.v v31, (a0) # vscale x 8-byte Folded Reload +; CHECK-RV32-FDV-NEXT: vl8r.v v24, (a0) # vscale x 64-byte Folded Reload ; CHECK-RV32-FDV-NEXT: addi sp, s0, -240 ; CHECK-RV32-FDV-NEXT: lw ra, 236(sp) # 4-byte Folded Reload ; CHECK-RV32-FDV-NEXT: lw t0, 232(sp) # 4-byte Folded Reload @@ -8186,422 +4118,39 @@ define void @foo_fp_with_call() #2 { ; CHECK-RV64-V-NEXT: slli a0, a0, 5 ; CHECK-RV64-V-NEXT: sub sp, sp, a0 ; CHECK-RV64-V-NEXT: csrr a0, vlenb -; CHECK-RV64-V-NEXT: sub a0, s0, a0 -; CHECK-RV64-V-NEXT: addi a0, a0, -160 -; CHECK-RV64-V-NEXT: vs1r.v v0, (a0) # vscale x 8-byte Folded Spill -; CHECK-RV64-V-NEXT: csrr a0, vlenb -; CHECK-RV64-V-NEXT: slli a0, a0, 1 -; CHECK-RV64-V-NEXT: sub a0, s0, a0 -; CHECK-RV64-V-NEXT: addi a0, a0, -160 -; CHECK-RV64-V-NEXT: vs1r.v v1, (a0) # vscale x 8-byte Folded Spill -; CHECK-RV64-V-NEXT: csrr a0, vlenb -; CHECK-RV64-V-NEXT: slli a1, a0, 1 -; CHECK-RV64-V-NEXT: add a0, a1, a0 -; CHECK-RV64-V-NEXT: sub a0, s0, a0 -; CHECK-RV64-V-NEXT: addi a0, a0, -160 -; CHECK-RV64-V-NEXT: vs1r.v v2, (a0) # vscale x 8-byte Folded Spill -; CHECK-RV64-V-NEXT: csrr a0, vlenb -; CHECK-RV64-V-NEXT: slli a0, a0, 2 -; CHECK-RV64-V-NEXT: sub a0, s0, a0 -; CHECK-RV64-V-NEXT: addi a0, a0, -160 -; CHECK-RV64-V-NEXT: vs1r.v v3, (a0) # vscale x 8-byte Folded Spill -; CHECK-RV64-V-NEXT: csrr a0, vlenb -; CHECK-RV64-V-NEXT: slli a1, a0, 2 -; CHECK-RV64-V-NEXT: add a0, a1, a0 -; CHECK-RV64-V-NEXT: sub a0, s0, a0 -; CHECK-RV64-V-NEXT: addi a0, a0, -160 -; CHECK-RV64-V-NEXT: vs1r.v v4, (a0) # vscale x 8-byte Folded Spill -; CHECK-RV64-V-NEXT: csrr a0, vlenb -; CHECK-RV64-V-NEXT: slli a0, a0, 1 -; CHECK-RV64-V-NEXT: mv a1, a0 -; CHECK-RV64-V-NEXT: slli a0, a0, 1 -; CHECK-RV64-V-NEXT: add a0, a0, a1 -; CHECK-RV64-V-NEXT: sub a0, s0, a0 -; CHECK-RV64-V-NEXT: addi a0, a0, -160 -; CHECK-RV64-V-NEXT: vs1r.v v5, (a0) # vscale x 8-byte Folded Spill -; CHECK-RV64-V-NEXT: csrr a0, vlenb -; CHECK-RV64-V-NEXT: slli a1, a0, 3 -; CHECK-RV64-V-NEXT: sub a0, a1, a0 -; CHECK-RV64-V-NEXT: sub a0, s0, a0 -; CHECK-RV64-V-NEXT: addi a0, a0, -160 -; CHECK-RV64-V-NEXT: vs1r.v v6, (a0) # vscale x 8-byte Folded Spill -; CHECK-RV64-V-NEXT: csrr a0, vlenb ; CHECK-RV64-V-NEXT: slli a0, a0, 3 ; CHECK-RV64-V-NEXT: sub a0, s0, a0 ; CHECK-RV64-V-NEXT: addi a0, a0, -160 -; CHECK-RV64-V-NEXT: vs1r.v v7, (a0) # vscale x 8-byte Folded Spill -; CHECK-RV64-V-NEXT: csrr a0, vlenb -; CHECK-RV64-V-NEXT: slli a1, a0, 3 -; CHECK-RV64-V-NEXT: add a0, a1, a0 -; CHECK-RV64-V-NEXT: sub a0, s0, a0 -; CHECK-RV64-V-NEXT: addi a0, a0, -160 -; CHECK-RV64-V-NEXT: vs1r.v v8, (a0) # vscale x 8-byte Folded Spill -; CHECK-RV64-V-NEXT: csrr a0, vlenb -; CHECK-RV64-V-NEXT: slli a0, a0, 1 -; CHECK-RV64-V-NEXT: mv a1, a0 -; CHECK-RV64-V-NEXT: slli a0, a0, 2 -; CHECK-RV64-V-NEXT: add a0, a0, a1 -; CHECK-RV64-V-NEXT: sub a0, s0, a0 -; CHECK-RV64-V-NEXT: addi a0, a0, -160 -; CHECK-RV64-V-NEXT: vs1r.v v9, (a0) # vscale x 8-byte Folded Spill -; CHECK-RV64-V-NEXT: csrr a0, vlenb -; CHECK-RV64-V-NEXT: mv a1, a0 -; CHECK-RV64-V-NEXT: slli a0, a0, 1 -; CHECK-RV64-V-NEXT: add a1, a1, a0 -; CHECK-RV64-V-NEXT: slli a0, a0, 2 -; CHECK-RV64-V-NEXT: add a0, a0, a1 -; CHECK-RV64-V-NEXT: sub a0, s0, a0 -; CHECK-RV64-V-NEXT: addi a0, a0, -160 -; CHECK-RV64-V-NEXT: vs1r.v v10, (a0) # vscale x 8-byte Folded Spill -; CHECK-RV64-V-NEXT: csrr a0, vlenb -; CHECK-RV64-V-NEXT: slli a0, a0, 2 -; CHECK-RV64-V-NEXT: mv a1, a0 -; CHECK-RV64-V-NEXT: slli a0, a0, 1 -; CHECK-RV64-V-NEXT: add a0, a0, a1 -; CHECK-RV64-V-NEXT: sub a0, s0, a0 -; CHECK-RV64-V-NEXT: addi a0, a0, -160 -; CHECK-RV64-V-NEXT: vs1r.v v11, (a0) # vscale x 8-byte Folded Spill -; CHECK-RV64-V-NEXT: csrr a0, vlenb -; CHECK-RV64-V-NEXT: mv a1, a0 -; CHECK-RV64-V-NEXT: slli a0, a0, 2 -; CHECK-RV64-V-NEXT: add a1, a1, a0 -; CHECK-RV64-V-NEXT: slli a0, a0, 1 -; CHECK-RV64-V-NEXT: add a0, a0, a1 -; CHECK-RV64-V-NEXT: sub a0, s0, a0 -; CHECK-RV64-V-NEXT: addi a0, a0, -160 -; CHECK-RV64-V-NEXT: vs1r.v v12, (a0) # vscale x 8-byte Folded Spill -; CHECK-RV64-V-NEXT: csrr a0, vlenb -; CHECK-RV64-V-NEXT: slli a0, a0, 1 -; CHECK-RV64-V-NEXT: mv a1, a0 -; CHECK-RV64-V-NEXT: slli a0, a0, 1 -; CHECK-RV64-V-NEXT: add a1, a1, a0 -; CHECK-RV64-V-NEXT: slli a0, a0, 1 -; CHECK-RV64-V-NEXT: add a0, a0, a1 -; CHECK-RV64-V-NEXT: sub a0, s0, a0 -; CHECK-RV64-V-NEXT: addi a0, a0, -160 -; CHECK-RV64-V-NEXT: vs1r.v v13, (a0) # vscale x 8-byte Folded Spill -; CHECK-RV64-V-NEXT: csrr a0, vlenb -; CHECK-RV64-V-NEXT: slli a1, a0, 4 -; CHECK-RV64-V-NEXT: sub a0, a1, a0 -; CHECK-RV64-V-NEXT: sub a0, s0, a0 -; CHECK-RV64-V-NEXT: addi a0, a0, -160 -; CHECK-RV64-V-NEXT: vs1r.v v14, (a0) # vscale x 8-byte Folded Spill +; CHECK-RV64-V-NEXT: vs8r.v v0, (a0) # vscale x 64-byte Folded Spill ; CHECK-RV64-V-NEXT: csrr a0, vlenb ; CHECK-RV64-V-NEXT: slli a0, a0, 4 ; CHECK-RV64-V-NEXT: sub a0, s0, a0 ; CHECK-RV64-V-NEXT: addi a0, a0, -160 -; CHECK-RV64-V-NEXT: vs1r.v v15, (a0) # vscale x 8-byte Folded Spill +; CHECK-RV64-V-NEXT: vs8r.v v8, (a0) # vscale x 64-byte Folded Spill ; CHECK-RV64-V-NEXT: csrr a0, vlenb -; CHECK-RV64-V-NEXT: slli a1, a0, 4 -; CHECK-RV64-V-NEXT: add a0, a1, a0 -; CHECK-RV64-V-NEXT: sub a0, s0, a0 -; CHECK-RV64-V-NEXT: addi a0, a0, -160 -; CHECK-RV64-V-NEXT: vs1r.v v16, (a0) # vscale x 8-byte Folded Spill -; CHECK-RV64-V-NEXT: csrr a0, vlenb -; CHECK-RV64-V-NEXT: slli a0, a0, 1 -; CHECK-RV64-V-NEXT: mv a1, a0 ; CHECK-RV64-V-NEXT: slli a0, a0, 3 -; CHECK-RV64-V-NEXT: add a0, a0, a1 -; CHECK-RV64-V-NEXT: sub a0, s0, a0 -; CHECK-RV64-V-NEXT: addi a0, a0, -160 -; CHECK-RV64-V-NEXT: vs1r.v v17, (a0) # vscale x 8-byte Folded Spill -; CHECK-RV64-V-NEXT: csrr a0, vlenb -; CHECK-RV64-V-NEXT: mv a1, a0 -; CHECK-RV64-V-NEXT: slli a0, a0, 1 -; CHECK-RV64-V-NEXT: add a1, a1, a0 -; CHECK-RV64-V-NEXT: slli a0, a0, 3 -; CHECK-RV64-V-NEXT: add a0, a0, a1 -; CHECK-RV64-V-NEXT: sub a0, s0, a0 -; CHECK-RV64-V-NEXT: addi a0, a0, -160 -; CHECK-RV64-V-NEXT: vs1r.v v18, (a0) # vscale x 8-byte Folded Spill -; CHECK-RV64-V-NEXT: csrr a0, vlenb -; CHECK-RV64-V-NEXT: slli a0, a0, 2 -; CHECK-RV64-V-NEXT: mv a1, a0 -; CHECK-RV64-V-NEXT: slli a0, a0, 2 -; CHECK-RV64-V-NEXT: add a0, a0, a1 -; CHECK-RV64-V-NEXT: sub a0, s0, a0 -; CHECK-RV64-V-NEXT: addi a0, a0, -160 -; CHECK-RV64-V-NEXT: vs1r.v v19, (a0) # vscale x 8-byte Folded Spill -; CHECK-RV64-V-NEXT: csrr a0, vlenb -; CHECK-RV64-V-NEXT: mv a1, a0 -; CHECK-RV64-V-NEXT: slli a0, a0, 2 -; CHECK-RV64-V-NEXT: add a1, a1, a0 -; CHECK-RV64-V-NEXT: slli a0, a0, 2 -; CHECK-RV64-V-NEXT: add a0, a0, a1 -; CHECK-RV64-V-NEXT: sub a0, s0, a0 -; CHECK-RV64-V-NEXT: addi a0, a0, -160 -; CHECK-RV64-V-NEXT: vs1r.v v20, (a0) # vscale x 8-byte Folded Spill -; CHECK-RV64-V-NEXT: csrr a0, vlenb -; CHECK-RV64-V-NEXT: slli a0, a0, 1 -; CHECK-RV64-V-NEXT: mv a1, a0 -; CHECK-RV64-V-NEXT: slli a0, a0, 1 -; CHECK-RV64-V-NEXT: add a1, a1, a0 -; CHECK-RV64-V-NEXT: slli a0, a0, 2 -; CHECK-RV64-V-NEXT: add a0, a0, a1 -; CHECK-RV64-V-NEXT: sub a0, s0, a0 -; CHECK-RV64-V-NEXT: addi a0, a0, -160 -; CHECK-RV64-V-NEXT: vs1r.v v21, (a0) # vscale x 8-byte Folded Spill -; CHECK-RV64-V-NEXT: csrr a0, vlenb -; CHECK-RV64-V-NEXT: mv a1, a0 -; CHECK-RV64-V-NEXT: slli a0, a0, 1 -; CHECK-RV64-V-NEXT: add a1, a1, a0 -; CHECK-RV64-V-NEXT: slli a0, a0, 1 -; CHECK-RV64-V-NEXT: add a1, a1, a0 -; CHECK-RV64-V-NEXT: slli a0, a0, 2 -; CHECK-RV64-V-NEXT: add a0, a0, a1 -; CHECK-RV64-V-NEXT: sub a0, s0, a0 -; CHECK-RV64-V-NEXT: addi a0, a0, -160 -; CHECK-RV64-V-NEXT: vs1r.v v22, (a0) # vscale x 8-byte Folded Spill -; CHECK-RV64-V-NEXT: csrr a0, vlenb -; CHECK-RV64-V-NEXT: slli a0, a0, 3 -; CHECK-RV64-V-NEXT: mv a1, a0 -; CHECK-RV64-V-NEXT: slli a0, a0, 1 -; CHECK-RV64-V-NEXT: add a0, a0, a1 -; CHECK-RV64-V-NEXT: sub a0, s0, a0 -; CHECK-RV64-V-NEXT: addi a0, a0, -160 -; CHECK-RV64-V-NEXT: vs1r.v v23, (a0) # vscale x 8-byte Folded Spill -; CHECK-RV64-V-NEXT: csrr a0, vlenb -; CHECK-RV64-V-NEXT: mv a1, a0 -; CHECK-RV64-V-NEXT: slli a0, a0, 3 -; CHECK-RV64-V-NEXT: add a1, a1, a0 -; CHECK-RV64-V-NEXT: slli a0, a0, 1 -; CHECK-RV64-V-NEXT: add a0, a0, a1 -; CHECK-RV64-V-NEXT: sub a0, s0, a0 -; CHECK-RV64-V-NEXT: addi a0, a0, -160 -; CHECK-RV64-V-NEXT: vs1r.v v24, (a0) # vscale x 8-byte Folded Spill -; CHECK-RV64-V-NEXT: csrr a0, vlenb -; CHECK-RV64-V-NEXT: slli a0, a0, 1 ; CHECK-RV64-V-NEXT: mv a1, a0 -; CHECK-RV64-V-NEXT: slli a0, a0, 2 -; CHECK-RV64-V-NEXT: add a1, a1, a0 ; CHECK-RV64-V-NEXT: slli a0, a0, 1 ; CHECK-RV64-V-NEXT: add a0, a0, a1 ; CHECK-RV64-V-NEXT: sub a0, s0, a0 ; CHECK-RV64-V-NEXT: addi a0, a0, -160 -; CHECK-RV64-V-NEXT: vs1r.v v25, (a0) # vscale x 8-byte Folded Spill -; CHECK-RV64-V-NEXT: csrr a0, vlenb -; CHECK-RV64-V-NEXT: mv a1, a0 -; CHECK-RV64-V-NEXT: slli a0, a0, 1 -; CHECK-RV64-V-NEXT: add a1, a1, a0 -; CHECK-RV64-V-NEXT: slli a0, a0, 2 -; CHECK-RV64-V-NEXT: add a1, a1, a0 -; CHECK-RV64-V-NEXT: slli a0, a0, 1 -; CHECK-RV64-V-NEXT: add a0, a0, a1 -; CHECK-RV64-V-NEXT: sub a0, s0, a0 -; CHECK-RV64-V-NEXT: addi a0, a0, -160 -; CHECK-RV64-V-NEXT: vs1r.v v26, (a0) # vscale x 8-byte Folded Spill -; CHECK-RV64-V-NEXT: csrr a0, vlenb -; CHECK-RV64-V-NEXT: slli a0, a0, 2 -; CHECK-RV64-V-NEXT: mv a1, a0 -; CHECK-RV64-V-NEXT: slli a0, a0, 1 -; CHECK-RV64-V-NEXT: add a1, a1, a0 -; CHECK-RV64-V-NEXT: slli a0, a0, 1 -; CHECK-RV64-V-NEXT: add a0, a0, a1 -; CHECK-RV64-V-NEXT: sub a0, s0, a0 -; CHECK-RV64-V-NEXT: addi a0, a0, -160 -; CHECK-RV64-V-NEXT: vs1r.v v27, (a0) # vscale x 8-byte Folded Spill -; CHECK-RV64-V-NEXT: csrr a0, vlenb -; CHECK-RV64-V-NEXT: mv a1, a0 -; CHECK-RV64-V-NEXT: slli a0, a0, 2 -; CHECK-RV64-V-NEXT: add a1, a1, a0 -; CHECK-RV64-V-NEXT: slli a0, a0, 1 -; CHECK-RV64-V-NEXT: add a1, a1, a0 -; CHECK-RV64-V-NEXT: slli a0, a0, 1 -; CHECK-RV64-V-NEXT: add a0, a0, a1 -; CHECK-RV64-V-NEXT: sub a0, s0, a0 -; CHECK-RV64-V-NEXT: addi a0, a0, -160 -; CHECK-RV64-V-NEXT: vs1r.v v28, (a0) # vscale x 8-byte Folded Spill -; CHECK-RV64-V-NEXT: csrr a0, vlenb -; CHECK-RV64-V-NEXT: slli a0, a0, 1 -; CHECK-RV64-V-NEXT: mv a1, a0 -; CHECK-RV64-V-NEXT: slli a0, a0, 1 -; CHECK-RV64-V-NEXT: add a1, a1, a0 -; CHECK-RV64-V-NEXT: slli a0, a0, 1 -; CHECK-RV64-V-NEXT: add a1, a1, a0 -; CHECK-RV64-V-NEXT: slli a0, a0, 1 -; CHECK-RV64-V-NEXT: add a0, a0, a1 -; CHECK-RV64-V-NEXT: sub a0, s0, a0 -; CHECK-RV64-V-NEXT: addi a0, a0, -160 -; CHECK-RV64-V-NEXT: vs1r.v v29, (a0) # vscale x 8-byte Folded Spill -; CHECK-RV64-V-NEXT: csrr a0, vlenb -; CHECK-RV64-V-NEXT: slli a1, a0, 5 -; CHECK-RV64-V-NEXT: sub a0, a1, a0 -; CHECK-RV64-V-NEXT: sub a0, s0, a0 -; CHECK-RV64-V-NEXT: addi a0, a0, -160 -; CHECK-RV64-V-NEXT: vs1r.v v30, (a0) # vscale x 8-byte Folded Spill +; CHECK-RV64-V-NEXT: vs8r.v v16, (a0) # vscale x 64-byte Folded Spill ; CHECK-RV64-V-NEXT: csrr a0, vlenb ; CHECK-RV64-V-NEXT: slli a0, a0, 5 ; CHECK-RV64-V-NEXT: sub a0, s0, a0 ; CHECK-RV64-V-NEXT: addi a0, a0, -160 -; CHECK-RV64-V-NEXT: vs1r.v v31, (a0) # vscale x 8-byte Folded Spill +; CHECK-RV64-V-NEXT: vs8r.v v24, (a0) # vscale x 64-byte Folded Spill ; CHECK-RV64-V-NEXT: call otherfoo ; CHECK-RV64-V-NEXT: csrr a0, vlenb -; CHECK-RV64-V-NEXT: sub a0, s0, a0 -; CHECK-RV64-V-NEXT: addi a0, a0, -160 -; CHECK-RV64-V-NEXT: vl1r.v v0, (a0) # vscale x 8-byte Folded Reload -; CHECK-RV64-V-NEXT: csrr a0, vlenb -; CHECK-RV64-V-NEXT: slli a0, a0, 1 -; CHECK-RV64-V-NEXT: sub a0, s0, a0 -; CHECK-RV64-V-NEXT: addi a0, a0, -160 -; CHECK-RV64-V-NEXT: vl1r.v v1, (a0) # vscale x 8-byte Folded Reload -; CHECK-RV64-V-NEXT: csrr a0, vlenb -; CHECK-RV64-V-NEXT: slli a1, a0, 1 -; CHECK-RV64-V-NEXT: add a0, a1, a0 -; CHECK-RV64-V-NEXT: sub a0, s0, a0 -; CHECK-RV64-V-NEXT: addi a0, a0, -160 -; CHECK-RV64-V-NEXT: vl1r.v v2, (a0) # vscale x 8-byte Folded Reload -; CHECK-RV64-V-NEXT: csrr a0, vlenb -; CHECK-RV64-V-NEXT: slli a0, a0, 2 -; CHECK-RV64-V-NEXT: sub a0, s0, a0 -; CHECK-RV64-V-NEXT: addi a0, a0, -160 -; CHECK-RV64-V-NEXT: vl1r.v v3, (a0) # vscale x 8-byte Folded Reload -; CHECK-RV64-V-NEXT: csrr a0, vlenb -; CHECK-RV64-V-NEXT: slli a1, a0, 2 -; CHECK-RV64-V-NEXT: add a0, a1, a0 -; CHECK-RV64-V-NEXT: sub a0, s0, a0 -; CHECK-RV64-V-NEXT: addi a0, a0, -160 -; CHECK-RV64-V-NEXT: vl1r.v v4, (a0) # vscale x 8-byte Folded Reload -; CHECK-RV64-V-NEXT: csrr a0, vlenb -; CHECK-RV64-V-NEXT: slli a0, a0, 1 -; CHECK-RV64-V-NEXT: mv a1, a0 -; CHECK-RV64-V-NEXT: slli a0, a0, 1 -; CHECK-RV64-V-NEXT: add a0, a0, a1 -; CHECK-RV64-V-NEXT: sub a0, s0, a0 -; CHECK-RV64-V-NEXT: addi a0, a0, -160 -; CHECK-RV64-V-NEXT: vl1r.v v5, (a0) # vscale x 8-byte Folded Reload -; CHECK-RV64-V-NEXT: csrr a0, vlenb -; CHECK-RV64-V-NEXT: slli a1, a0, 3 -; CHECK-RV64-V-NEXT: sub a0, a1, a0 -; CHECK-RV64-V-NEXT: sub a0, s0, a0 -; CHECK-RV64-V-NEXT: addi a0, a0, -160 -; CHECK-RV64-V-NEXT: vl1r.v v6, (a0) # vscale x 8-byte Folded Reload -; CHECK-RV64-V-NEXT: csrr a0, vlenb ; CHECK-RV64-V-NEXT: slli a0, a0, 3 ; CHECK-RV64-V-NEXT: sub a0, s0, a0 ; CHECK-RV64-V-NEXT: addi a0, a0, -160 -; CHECK-RV64-V-NEXT: vl1r.v v7, (a0) # vscale x 8-byte Folded Reload -; CHECK-RV64-V-NEXT: csrr a0, vlenb -; CHECK-RV64-V-NEXT: slli a1, a0, 3 -; CHECK-RV64-V-NEXT: add a0, a1, a0 -; CHECK-RV64-V-NEXT: sub a0, s0, a0 -; CHECK-RV64-V-NEXT: addi a0, a0, -160 -; CHECK-RV64-V-NEXT: vl1r.v v8, (a0) # vscale x 8-byte Folded Reload -; CHECK-RV64-V-NEXT: csrr a0, vlenb -; CHECK-RV64-V-NEXT: slli a0, a0, 1 -; CHECK-RV64-V-NEXT: mv a1, a0 -; CHECK-RV64-V-NEXT: slli a0, a0, 2 -; CHECK-RV64-V-NEXT: add a0, a0, a1 -; CHECK-RV64-V-NEXT: sub a0, s0, a0 -; CHECK-RV64-V-NEXT: addi a0, a0, -160 -; CHECK-RV64-V-NEXT: vl1r.v v9, (a0) # vscale x 8-byte Folded Reload -; CHECK-RV64-V-NEXT: csrr a0, vlenb -; CHECK-RV64-V-NEXT: mv a1, a0 -; CHECK-RV64-V-NEXT: slli a0, a0, 1 -; CHECK-RV64-V-NEXT: add a1, a1, a0 -; CHECK-RV64-V-NEXT: slli a0, a0, 2 -; CHECK-RV64-V-NEXT: add a0, a0, a1 -; CHECK-RV64-V-NEXT: sub a0, s0, a0 -; CHECK-RV64-V-NEXT: addi a0, a0, -160 -; CHECK-RV64-V-NEXT: vl1r.v v10, (a0) # vscale x 8-byte Folded Reload -; CHECK-RV64-V-NEXT: csrr a0, vlenb -; CHECK-RV64-V-NEXT: slli a0, a0, 2 -; CHECK-RV64-V-NEXT: mv a1, a0 -; CHECK-RV64-V-NEXT: slli a0, a0, 1 -; CHECK-RV64-V-NEXT: add a0, a0, a1 -; CHECK-RV64-V-NEXT: sub a0, s0, a0 -; CHECK-RV64-V-NEXT: addi a0, a0, -160 -; CHECK-RV64-V-NEXT: vl1r.v v11, (a0) # vscale x 8-byte Folded Reload -; CHECK-RV64-V-NEXT: csrr a0, vlenb -; CHECK-RV64-V-NEXT: mv a1, a0 -; CHECK-RV64-V-NEXT: slli a0, a0, 2 -; CHECK-RV64-V-NEXT: add a1, a1, a0 -; CHECK-RV64-V-NEXT: slli a0, a0, 1 -; CHECK-RV64-V-NEXT: add a0, a0, a1 -; CHECK-RV64-V-NEXT: sub a0, s0, a0 -; CHECK-RV64-V-NEXT: addi a0, a0, -160 -; CHECK-RV64-V-NEXT: vl1r.v v12, (a0) # vscale x 8-byte Folded Reload -; CHECK-RV64-V-NEXT: csrr a0, vlenb -; CHECK-RV64-V-NEXT: slli a0, a0, 1 -; CHECK-RV64-V-NEXT: mv a1, a0 -; CHECK-RV64-V-NEXT: slli a0, a0, 1 -; CHECK-RV64-V-NEXT: add a1, a1, a0 -; CHECK-RV64-V-NEXT: slli a0, a0, 1 -; CHECK-RV64-V-NEXT: add a0, a0, a1 -; CHECK-RV64-V-NEXT: sub a0, s0, a0 -; CHECK-RV64-V-NEXT: addi a0, a0, -160 -; CHECK-RV64-V-NEXT: vl1r.v v13, (a0) # vscale x 8-byte Folded Reload -; CHECK-RV64-V-NEXT: csrr a0, vlenb -; CHECK-RV64-V-NEXT: slli a1, a0, 4 -; CHECK-RV64-V-NEXT: sub a0, a1, a0 -; CHECK-RV64-V-NEXT: sub a0, s0, a0 -; CHECK-RV64-V-NEXT: addi a0, a0, -160 -; CHECK-RV64-V-NEXT: vl1r.v v14, (a0) # vscale x 8-byte Folded Reload +; CHECK-RV64-V-NEXT: vl8r.v v0, (a0) # vscale x 64-byte Folded Reload ; CHECK-RV64-V-NEXT: csrr a0, vlenb ; CHECK-RV64-V-NEXT: slli a0, a0, 4 ; CHECK-RV64-V-NEXT: sub a0, s0, a0 ; CHECK-RV64-V-NEXT: addi a0, a0, -160 -; CHECK-RV64-V-NEXT: vl1r.v v15, (a0) # vscale x 8-byte Folded Reload -; CHECK-RV64-V-NEXT: csrr a0, vlenb -; CHECK-RV64-V-NEXT: slli a1, a0, 4 -; CHECK-RV64-V-NEXT: add a0, a1, a0 -; CHECK-RV64-V-NEXT: sub a0, s0, a0 -; CHECK-RV64-V-NEXT: addi a0, a0, -160 -; CHECK-RV64-V-NEXT: vl1r.v v16, (a0) # vscale x 8-byte Folded Reload -; CHECK-RV64-V-NEXT: csrr a0, vlenb -; CHECK-RV64-V-NEXT: slli a0, a0, 1 -; CHECK-RV64-V-NEXT: mv a1, a0 -; CHECK-RV64-V-NEXT: slli a0, a0, 3 -; CHECK-RV64-V-NEXT: add a0, a0, a1 -; CHECK-RV64-V-NEXT: sub a0, s0, a0 -; CHECK-RV64-V-NEXT: addi a0, a0, -160 -; CHECK-RV64-V-NEXT: vl1r.v v17, (a0) # vscale x 8-byte Folded Reload -; CHECK-RV64-V-NEXT: csrr a0, vlenb -; CHECK-RV64-V-NEXT: mv a1, a0 -; CHECK-RV64-V-NEXT: slli a0, a0, 1 -; CHECK-RV64-V-NEXT: add a1, a1, a0 -; CHECK-RV64-V-NEXT: slli a0, a0, 3 -; CHECK-RV64-V-NEXT: add a0, a0, a1 -; CHECK-RV64-V-NEXT: sub a0, s0, a0 -; CHECK-RV64-V-NEXT: addi a0, a0, -160 -; CHECK-RV64-V-NEXT: vl1r.v v18, (a0) # vscale x 8-byte Folded Reload -; CHECK-RV64-V-NEXT: csrr a0, vlenb -; CHECK-RV64-V-NEXT: slli a0, a0, 2 -; CHECK-RV64-V-NEXT: mv a1, a0 -; CHECK-RV64-V-NEXT: slli a0, a0, 2 -; CHECK-RV64-V-NEXT: add a0, a0, a1 -; CHECK-RV64-V-NEXT: sub a0, s0, a0 -; CHECK-RV64-V-NEXT: addi a0, a0, -160 -; CHECK-RV64-V-NEXT: vl1r.v v19, (a0) # vscale x 8-byte Folded Reload -; CHECK-RV64-V-NEXT: csrr a0, vlenb -; CHECK-RV64-V-NEXT: mv a1, a0 -; CHECK-RV64-V-NEXT: slli a0, a0, 2 -; CHECK-RV64-V-NEXT: add a1, a1, a0 -; CHECK-RV64-V-NEXT: slli a0, a0, 2 -; CHECK-RV64-V-NEXT: add a0, a0, a1 -; CHECK-RV64-V-NEXT: sub a0, s0, a0 -; CHECK-RV64-V-NEXT: addi a0, a0, -160 -; CHECK-RV64-V-NEXT: vl1r.v v20, (a0) # vscale x 8-byte Folded Reload -; CHECK-RV64-V-NEXT: csrr a0, vlenb -; CHECK-RV64-V-NEXT: slli a0, a0, 1 -; CHECK-RV64-V-NEXT: mv a1, a0 -; CHECK-RV64-V-NEXT: slli a0, a0, 1 -; CHECK-RV64-V-NEXT: add a1, a1, a0 -; CHECK-RV64-V-NEXT: slli a0, a0, 2 -; CHECK-RV64-V-NEXT: add a0, a0, a1 -; CHECK-RV64-V-NEXT: sub a0, s0, a0 -; CHECK-RV64-V-NEXT: addi a0, a0, -160 -; CHECK-RV64-V-NEXT: vl1r.v v21, (a0) # vscale x 8-byte Folded Reload -; CHECK-RV64-V-NEXT: csrr a0, vlenb -; CHECK-RV64-V-NEXT: mv a1, a0 -; CHECK-RV64-V-NEXT: slli a0, a0, 1 -; CHECK-RV64-V-NEXT: add a1, a1, a0 -; CHECK-RV64-V-NEXT: slli a0, a0, 1 -; CHECK-RV64-V-NEXT: add a1, a1, a0 -; CHECK-RV64-V-NEXT: slli a0, a0, 2 -; CHECK-RV64-V-NEXT: add a0, a0, a1 -; CHECK-RV64-V-NEXT: sub a0, s0, a0 -; CHECK-RV64-V-NEXT: addi a0, a0, -160 -; CHECK-RV64-V-NEXT: vl1r.v v22, (a0) # vscale x 8-byte Folded Reload +; CHECK-RV64-V-NEXT: vl8r.v v8, (a0) # vscale x 64-byte Folded Reload ; CHECK-RV64-V-NEXT: csrr a0, vlenb ; CHECK-RV64-V-NEXT: slli a0, a0, 3 ; CHECK-RV64-V-NEXT: mv a1, a0 @@ -8609,81 +4158,12 @@ define void @foo_fp_with_call() #2 { ; CHECK-RV64-V-NEXT: add a0, a0, a1 ; CHECK-RV64-V-NEXT: sub a0, s0, a0 ; CHECK-RV64-V-NEXT: addi a0, a0, -160 -; CHECK-RV64-V-NEXT: vl1r.v v23, (a0) # vscale x 8-byte Folded Reload -; CHECK-RV64-V-NEXT: csrr a0, vlenb -; CHECK-RV64-V-NEXT: mv a1, a0 -; CHECK-RV64-V-NEXT: slli a0, a0, 3 -; CHECK-RV64-V-NEXT: add a1, a1, a0 -; CHECK-RV64-V-NEXT: slli a0, a0, 1 -; CHECK-RV64-V-NEXT: add a0, a0, a1 -; CHECK-RV64-V-NEXT: sub a0, s0, a0 -; CHECK-RV64-V-NEXT: addi a0, a0, -160 -; CHECK-RV64-V-NEXT: vl1r.v v24, (a0) # vscale x 8-byte Folded Reload -; CHECK-RV64-V-NEXT: csrr a0, vlenb -; CHECK-RV64-V-NEXT: slli a0, a0, 1 -; CHECK-RV64-V-NEXT: mv a1, a0 -; CHECK-RV64-V-NEXT: slli a0, a0, 2 -; CHECK-RV64-V-NEXT: add a1, a1, a0 -; CHECK-RV64-V-NEXT: slli a0, a0, 1 -; CHECK-RV64-V-NEXT: add a0, a0, a1 -; CHECK-RV64-V-NEXT: sub a0, s0, a0 -; CHECK-RV64-V-NEXT: addi a0, a0, -160 -; CHECK-RV64-V-NEXT: vl1r.v v25, (a0) # vscale x 8-byte Folded Reload -; CHECK-RV64-V-NEXT: csrr a0, vlenb -; CHECK-RV64-V-NEXT: mv a1, a0 -; CHECK-RV64-V-NEXT: slli a0, a0, 1 -; CHECK-RV64-V-NEXT: add a1, a1, a0 -; CHECK-RV64-V-NEXT: slli a0, a0, 2 -; CHECK-RV64-V-NEXT: add a1, a1, a0 -; CHECK-RV64-V-NEXT: slli a0, a0, 1 -; CHECK-RV64-V-NEXT: add a0, a0, a1 -; CHECK-RV64-V-NEXT: sub a0, s0, a0 -; CHECK-RV64-V-NEXT: addi a0, a0, -160 -; CHECK-RV64-V-NEXT: vl1r.v v26, (a0) # vscale x 8-byte Folded Reload -; CHECK-RV64-V-NEXT: csrr a0, vlenb -; CHECK-RV64-V-NEXT: slli a0, a0, 2 -; CHECK-RV64-V-NEXT: mv a1, a0 -; CHECK-RV64-V-NEXT: slli a0, a0, 1 -; CHECK-RV64-V-NEXT: add a1, a1, a0 -; CHECK-RV64-V-NEXT: slli a0, a0, 1 -; CHECK-RV64-V-NEXT: add a0, a0, a1 -; CHECK-RV64-V-NEXT: sub a0, s0, a0 -; CHECK-RV64-V-NEXT: addi a0, a0, -160 -; CHECK-RV64-V-NEXT: vl1r.v v27, (a0) # vscale x 8-byte Folded Reload -; CHECK-RV64-V-NEXT: csrr a0, vlenb -; CHECK-RV64-V-NEXT: mv a1, a0 -; CHECK-RV64-V-NEXT: slli a0, a0, 2 -; CHECK-RV64-V-NEXT: add a1, a1, a0 -; CHECK-RV64-V-NEXT: slli a0, a0, 1 -; CHECK-RV64-V-NEXT: add a1, a1, a0 -; CHECK-RV64-V-NEXT: slli a0, a0, 1 -; CHECK-RV64-V-NEXT: add a0, a0, a1 -; CHECK-RV64-V-NEXT: sub a0, s0, a0 -; CHECK-RV64-V-NEXT: addi a0, a0, -160 -; CHECK-RV64-V-NEXT: vl1r.v v28, (a0) # vscale x 8-byte Folded Reload -; CHECK-RV64-V-NEXT: csrr a0, vlenb -; CHECK-RV64-V-NEXT: slli a0, a0, 1 -; CHECK-RV64-V-NEXT: mv a1, a0 -; CHECK-RV64-V-NEXT: slli a0, a0, 1 -; CHECK-RV64-V-NEXT: add a1, a1, a0 -; CHECK-RV64-V-NEXT: slli a0, a0, 1 -; CHECK-RV64-V-NEXT: add a1, a1, a0 -; CHECK-RV64-V-NEXT: slli a0, a0, 1 -; CHECK-RV64-V-NEXT: add a0, a0, a1 -; CHECK-RV64-V-NEXT: sub a0, s0, a0 -; CHECK-RV64-V-NEXT: addi a0, a0, -160 -; CHECK-RV64-V-NEXT: vl1r.v v29, (a0) # vscale x 8-byte Folded Reload -; CHECK-RV64-V-NEXT: csrr a0, vlenb -; CHECK-RV64-V-NEXT: slli a1, a0, 5 -; CHECK-RV64-V-NEXT: sub a0, a1, a0 -; CHECK-RV64-V-NEXT: sub a0, s0, a0 -; CHECK-RV64-V-NEXT: addi a0, a0, -160 -; CHECK-RV64-V-NEXT: vl1r.v v30, (a0) # vscale x 8-byte Folded Reload +; CHECK-RV64-V-NEXT: vl8r.v v16, (a0) # vscale x 64-byte Folded Reload ; CHECK-RV64-V-NEXT: csrr a0, vlenb ; CHECK-RV64-V-NEXT: slli a0, a0, 5 ; CHECK-RV64-V-NEXT: sub a0, s0, a0 ; CHECK-RV64-V-NEXT: addi a0, a0, -160 -; CHECK-RV64-V-NEXT: vl1r.v v31, (a0) # vscale x 8-byte Folded Reload +; CHECK-RV64-V-NEXT: vl8r.v v24, (a0) # vscale x 64-byte Folded Reload ; CHECK-RV64-V-NEXT: addi sp, s0, -160 ; CHECK-RV64-V-NEXT: ld ra, 152(sp) # 8-byte Folded Reload ; CHECK-RV64-V-NEXT: ld t0, 144(sp) # 8-byte Folded Reload @@ -8750,172 +4230,15 @@ define void @foo_fp_with_call() #2 { ; CHECK-RV64-FV-NEXT: slli a0, a0, 5 ; CHECK-RV64-FV-NEXT: sub sp, sp, a0 ; CHECK-RV64-FV-NEXT: csrr a0, vlenb -; CHECK-RV64-FV-NEXT: sub a0, s0, a0 -; CHECK-RV64-FV-NEXT: addi a0, a0, -240 -; CHECK-RV64-FV-NEXT: vs1r.v v0, (a0) # vscale x 8-byte Folded Spill -; CHECK-RV64-FV-NEXT: csrr a0, vlenb -; CHECK-RV64-FV-NEXT: slli a0, a0, 1 -; CHECK-RV64-FV-NEXT: sub a0, s0, a0 -; CHECK-RV64-FV-NEXT: addi a0, a0, -240 -; CHECK-RV64-FV-NEXT: vs1r.v v1, (a0) # vscale x 8-byte Folded Spill -; CHECK-RV64-FV-NEXT: csrr a0, vlenb -; CHECK-RV64-FV-NEXT: slli a1, a0, 1 -; CHECK-RV64-FV-NEXT: add a0, a1, a0 -; CHECK-RV64-FV-NEXT: sub a0, s0, a0 -; CHECK-RV64-FV-NEXT: addi a0, a0, -240 -; CHECK-RV64-FV-NEXT: vs1r.v v2, (a0) # vscale x 8-byte Folded Spill -; CHECK-RV64-FV-NEXT: csrr a0, vlenb -; CHECK-RV64-FV-NEXT: slli a0, a0, 2 -; CHECK-RV64-FV-NEXT: sub a0, s0, a0 -; CHECK-RV64-FV-NEXT: addi a0, a0, -240 -; CHECK-RV64-FV-NEXT: vs1r.v v3, (a0) # vscale x 8-byte Folded Spill -; CHECK-RV64-FV-NEXT: csrr a0, vlenb -; CHECK-RV64-FV-NEXT: slli a1, a0, 2 -; CHECK-RV64-FV-NEXT: add a0, a1, a0 -; CHECK-RV64-FV-NEXT: sub a0, s0, a0 -; CHECK-RV64-FV-NEXT: addi a0, a0, -240 -; CHECK-RV64-FV-NEXT: vs1r.v v4, (a0) # vscale x 8-byte Folded Spill -; CHECK-RV64-FV-NEXT: csrr a0, vlenb -; CHECK-RV64-FV-NEXT: slli a0, a0, 1 -; CHECK-RV64-FV-NEXT: mv a1, a0 -; CHECK-RV64-FV-NEXT: slli a0, a0, 1 -; CHECK-RV64-FV-NEXT: add a0, a0, a1 -; CHECK-RV64-FV-NEXT: sub a0, s0, a0 -; CHECK-RV64-FV-NEXT: addi a0, a0, -240 -; CHECK-RV64-FV-NEXT: vs1r.v v5, (a0) # vscale x 8-byte Folded Spill -; CHECK-RV64-FV-NEXT: csrr a0, vlenb -; CHECK-RV64-FV-NEXT: slli a1, a0, 3 -; CHECK-RV64-FV-NEXT: sub a0, a1, a0 -; CHECK-RV64-FV-NEXT: sub a0, s0, a0 -; CHECK-RV64-FV-NEXT: addi a0, a0, -240 -; CHECK-RV64-FV-NEXT: vs1r.v v6, (a0) # vscale x 8-byte Folded Spill -; CHECK-RV64-FV-NEXT: csrr a0, vlenb ; CHECK-RV64-FV-NEXT: slli a0, a0, 3 ; CHECK-RV64-FV-NEXT: sub a0, s0, a0 ; CHECK-RV64-FV-NEXT: addi a0, a0, -240 -; CHECK-RV64-FV-NEXT: vs1r.v v7, (a0) # vscale x 8-byte Folded Spill -; CHECK-RV64-FV-NEXT: csrr a0, vlenb -; CHECK-RV64-FV-NEXT: slli a1, a0, 3 -; CHECK-RV64-FV-NEXT: add a0, a1, a0 -; CHECK-RV64-FV-NEXT: sub a0, s0, a0 -; CHECK-RV64-FV-NEXT: addi a0, a0, -240 -; CHECK-RV64-FV-NEXT: vs1r.v v8, (a0) # vscale x 8-byte Folded Spill -; CHECK-RV64-FV-NEXT: csrr a0, vlenb -; CHECK-RV64-FV-NEXT: slli a0, a0, 1 -; CHECK-RV64-FV-NEXT: mv a1, a0 -; CHECK-RV64-FV-NEXT: slli a0, a0, 2 -; CHECK-RV64-FV-NEXT: add a0, a0, a1 -; CHECK-RV64-FV-NEXT: sub a0, s0, a0 -; CHECK-RV64-FV-NEXT: addi a0, a0, -240 -; CHECK-RV64-FV-NEXT: vs1r.v v9, (a0) # vscale x 8-byte Folded Spill -; CHECK-RV64-FV-NEXT: csrr a0, vlenb -; CHECK-RV64-FV-NEXT: mv a1, a0 -; CHECK-RV64-FV-NEXT: slli a0, a0, 1 -; CHECK-RV64-FV-NEXT: add a1, a1, a0 -; CHECK-RV64-FV-NEXT: slli a0, a0, 2 -; CHECK-RV64-FV-NEXT: add a0, a0, a1 -; CHECK-RV64-FV-NEXT: sub a0, s0, a0 -; CHECK-RV64-FV-NEXT: addi a0, a0, -240 -; CHECK-RV64-FV-NEXT: vs1r.v v10, (a0) # vscale x 8-byte Folded Spill -; CHECK-RV64-FV-NEXT: csrr a0, vlenb -; CHECK-RV64-FV-NEXT: slli a0, a0, 2 -; CHECK-RV64-FV-NEXT: mv a1, a0 -; CHECK-RV64-FV-NEXT: slli a0, a0, 1 -; CHECK-RV64-FV-NEXT: add a0, a0, a1 -; CHECK-RV64-FV-NEXT: sub a0, s0, a0 -; CHECK-RV64-FV-NEXT: addi a0, a0, -240 -; CHECK-RV64-FV-NEXT: vs1r.v v11, (a0) # vscale x 8-byte Folded Spill -; CHECK-RV64-FV-NEXT: csrr a0, vlenb -; CHECK-RV64-FV-NEXT: mv a1, a0 -; CHECK-RV64-FV-NEXT: slli a0, a0, 2 -; CHECK-RV64-FV-NEXT: add a1, a1, a0 -; CHECK-RV64-FV-NEXT: slli a0, a0, 1 -; CHECK-RV64-FV-NEXT: add a0, a0, a1 -; CHECK-RV64-FV-NEXT: sub a0, s0, a0 -; CHECK-RV64-FV-NEXT: addi a0, a0, -240 -; CHECK-RV64-FV-NEXT: vs1r.v v12, (a0) # vscale x 8-byte Folded Spill -; CHECK-RV64-FV-NEXT: csrr a0, vlenb -; CHECK-RV64-FV-NEXT: slli a0, a0, 1 -; CHECK-RV64-FV-NEXT: mv a1, a0 -; CHECK-RV64-FV-NEXT: slli a0, a0, 1 -; CHECK-RV64-FV-NEXT: add a1, a1, a0 -; CHECK-RV64-FV-NEXT: slli a0, a0, 1 -; CHECK-RV64-FV-NEXT: add a0, a0, a1 -; CHECK-RV64-FV-NEXT: sub a0, s0, a0 -; CHECK-RV64-FV-NEXT: addi a0, a0, -240 -; CHECK-RV64-FV-NEXT: vs1r.v v13, (a0) # vscale x 8-byte Folded Spill -; CHECK-RV64-FV-NEXT: csrr a0, vlenb -; CHECK-RV64-FV-NEXT: slli a1, a0, 4 -; CHECK-RV64-FV-NEXT: sub a0, a1, a0 -; CHECK-RV64-FV-NEXT: sub a0, s0, a0 -; CHECK-RV64-FV-NEXT: addi a0, a0, -240 -; CHECK-RV64-FV-NEXT: vs1r.v v14, (a0) # vscale x 8-byte Folded Spill +; CHECK-RV64-FV-NEXT: vs8r.v v0, (a0) # vscale x 64-byte Folded Spill ; CHECK-RV64-FV-NEXT: csrr a0, vlenb ; CHECK-RV64-FV-NEXT: slli a0, a0, 4 ; CHECK-RV64-FV-NEXT: sub a0, s0, a0 ; CHECK-RV64-FV-NEXT: addi a0, a0, -240 -; CHECK-RV64-FV-NEXT: vs1r.v v15, (a0) # vscale x 8-byte Folded Spill -; CHECK-RV64-FV-NEXT: csrr a0, vlenb -; CHECK-RV64-FV-NEXT: slli a1, a0, 4 -; CHECK-RV64-FV-NEXT: add a0, a1, a0 -; CHECK-RV64-FV-NEXT: sub a0, s0, a0 -; CHECK-RV64-FV-NEXT: addi a0, a0, -240 -; CHECK-RV64-FV-NEXT: vs1r.v v16, (a0) # vscale x 8-byte Folded Spill -; CHECK-RV64-FV-NEXT: csrr a0, vlenb -; CHECK-RV64-FV-NEXT: slli a0, a0, 1 -; CHECK-RV64-FV-NEXT: mv a1, a0 -; CHECK-RV64-FV-NEXT: slli a0, a0, 3 -; CHECK-RV64-FV-NEXT: add a0, a0, a1 -; CHECK-RV64-FV-NEXT: sub a0, s0, a0 -; CHECK-RV64-FV-NEXT: addi a0, a0, -240 -; CHECK-RV64-FV-NEXT: vs1r.v v17, (a0) # vscale x 8-byte Folded Spill -; CHECK-RV64-FV-NEXT: csrr a0, vlenb -; CHECK-RV64-FV-NEXT: mv a1, a0 -; CHECK-RV64-FV-NEXT: slli a0, a0, 1 -; CHECK-RV64-FV-NEXT: add a1, a1, a0 -; CHECK-RV64-FV-NEXT: slli a0, a0, 3 -; CHECK-RV64-FV-NEXT: add a0, a0, a1 -; CHECK-RV64-FV-NEXT: sub a0, s0, a0 -; CHECK-RV64-FV-NEXT: addi a0, a0, -240 -; CHECK-RV64-FV-NEXT: vs1r.v v18, (a0) # vscale x 8-byte Folded Spill -; CHECK-RV64-FV-NEXT: csrr a0, vlenb -; CHECK-RV64-FV-NEXT: slli a0, a0, 2 -; CHECK-RV64-FV-NEXT: mv a1, a0 -; CHECK-RV64-FV-NEXT: slli a0, a0, 2 -; CHECK-RV64-FV-NEXT: add a0, a0, a1 -; CHECK-RV64-FV-NEXT: sub a0, s0, a0 -; CHECK-RV64-FV-NEXT: addi a0, a0, -240 -; CHECK-RV64-FV-NEXT: vs1r.v v19, (a0) # vscale x 8-byte Folded Spill -; CHECK-RV64-FV-NEXT: csrr a0, vlenb -; CHECK-RV64-FV-NEXT: mv a1, a0 -; CHECK-RV64-FV-NEXT: slli a0, a0, 2 -; CHECK-RV64-FV-NEXT: add a1, a1, a0 -; CHECK-RV64-FV-NEXT: slli a0, a0, 2 -; CHECK-RV64-FV-NEXT: add a0, a0, a1 -; CHECK-RV64-FV-NEXT: sub a0, s0, a0 -; CHECK-RV64-FV-NEXT: addi a0, a0, -240 -; CHECK-RV64-FV-NEXT: vs1r.v v20, (a0) # vscale x 8-byte Folded Spill -; CHECK-RV64-FV-NEXT: csrr a0, vlenb -; CHECK-RV64-FV-NEXT: slli a0, a0, 1 -; CHECK-RV64-FV-NEXT: mv a1, a0 -; CHECK-RV64-FV-NEXT: slli a0, a0, 1 -; CHECK-RV64-FV-NEXT: add a1, a1, a0 -; CHECK-RV64-FV-NEXT: slli a0, a0, 2 -; CHECK-RV64-FV-NEXT: add a0, a0, a1 -; CHECK-RV64-FV-NEXT: sub a0, s0, a0 -; CHECK-RV64-FV-NEXT: addi a0, a0, -240 -; CHECK-RV64-FV-NEXT: vs1r.v v21, (a0) # vscale x 8-byte Folded Spill -; CHECK-RV64-FV-NEXT: csrr a0, vlenb -; CHECK-RV64-FV-NEXT: mv a1, a0 -; CHECK-RV64-FV-NEXT: slli a0, a0, 1 -; CHECK-RV64-FV-NEXT: add a1, a1, a0 -; CHECK-RV64-FV-NEXT: slli a0, a0, 1 -; CHECK-RV64-FV-NEXT: add a1, a1, a0 -; CHECK-RV64-FV-NEXT: slli a0, a0, 2 -; CHECK-RV64-FV-NEXT: add a0, a0, a1 -; CHECK-RV64-FV-NEXT: sub a0, s0, a0 -; CHECK-RV64-FV-NEXT: addi a0, a0, -240 -; CHECK-RV64-FV-NEXT: vs1r.v v22, (a0) # vscale x 8-byte Folded Spill +; CHECK-RV64-FV-NEXT: vs8r.v v8, (a0) # vscale x 64-byte Folded Spill ; CHECK-RV64-FV-NEXT: csrr a0, vlenb ; CHECK-RV64-FV-NEXT: slli a0, a0, 3 ; CHECK-RV64-FV-NEXT: mv a1, a0 @@ -8923,331 +4246,36 @@ define void @foo_fp_with_call() #2 { ; CHECK-RV64-FV-NEXT: add a0, a0, a1 ; CHECK-RV64-FV-NEXT: sub a0, s0, a0 ; CHECK-RV64-FV-NEXT: addi a0, a0, -240 -; CHECK-RV64-FV-NEXT: vs1r.v v23, (a0) # vscale x 8-byte Folded Spill -; CHECK-RV64-FV-NEXT: csrr a0, vlenb -; CHECK-RV64-FV-NEXT: mv a1, a0 -; CHECK-RV64-FV-NEXT: slli a0, a0, 3 -; CHECK-RV64-FV-NEXT: add a1, a1, a0 -; CHECK-RV64-FV-NEXT: slli a0, a0, 1 -; CHECK-RV64-FV-NEXT: add a0, a0, a1 -; CHECK-RV64-FV-NEXT: sub a0, s0, a0 -; CHECK-RV64-FV-NEXT: addi a0, a0, -240 -; CHECK-RV64-FV-NEXT: vs1r.v v24, (a0) # vscale x 8-byte Folded Spill -; CHECK-RV64-FV-NEXT: csrr a0, vlenb -; CHECK-RV64-FV-NEXT: slli a0, a0, 1 -; CHECK-RV64-FV-NEXT: mv a1, a0 -; CHECK-RV64-FV-NEXT: slli a0, a0, 2 -; CHECK-RV64-FV-NEXT: add a1, a1, a0 -; CHECK-RV64-FV-NEXT: slli a0, a0, 1 -; CHECK-RV64-FV-NEXT: add a0, a0, a1 -; CHECK-RV64-FV-NEXT: sub a0, s0, a0 -; CHECK-RV64-FV-NEXT: addi a0, a0, -240 -; CHECK-RV64-FV-NEXT: vs1r.v v25, (a0) # vscale x 8-byte Folded Spill -; CHECK-RV64-FV-NEXT: csrr a0, vlenb -; CHECK-RV64-FV-NEXT: mv a1, a0 -; CHECK-RV64-FV-NEXT: slli a0, a0, 1 -; CHECK-RV64-FV-NEXT: add a1, a1, a0 -; CHECK-RV64-FV-NEXT: slli a0, a0, 2 -; CHECK-RV64-FV-NEXT: add a1, a1, a0 -; CHECK-RV64-FV-NEXT: slli a0, a0, 1 -; CHECK-RV64-FV-NEXT: add a0, a0, a1 -; CHECK-RV64-FV-NEXT: sub a0, s0, a0 -; CHECK-RV64-FV-NEXT: addi a0, a0, -240 -; CHECK-RV64-FV-NEXT: vs1r.v v26, (a0) # vscale x 8-byte Folded Spill -; CHECK-RV64-FV-NEXT: csrr a0, vlenb -; CHECK-RV64-FV-NEXT: slli a0, a0, 2 -; CHECK-RV64-FV-NEXT: mv a1, a0 -; CHECK-RV64-FV-NEXT: slli a0, a0, 1 -; CHECK-RV64-FV-NEXT: add a1, a1, a0 -; CHECK-RV64-FV-NEXT: slli a0, a0, 1 -; CHECK-RV64-FV-NEXT: add a0, a0, a1 -; CHECK-RV64-FV-NEXT: sub a0, s0, a0 -; CHECK-RV64-FV-NEXT: addi a0, a0, -240 -; CHECK-RV64-FV-NEXT: vs1r.v v27, (a0) # vscale x 8-byte Folded Spill -; CHECK-RV64-FV-NEXT: csrr a0, vlenb -; CHECK-RV64-FV-NEXT: mv a1, a0 -; CHECK-RV64-FV-NEXT: slli a0, a0, 2 -; CHECK-RV64-FV-NEXT: add a1, a1, a0 -; CHECK-RV64-FV-NEXT: slli a0, a0, 1 -; CHECK-RV64-FV-NEXT: add a1, a1, a0 -; CHECK-RV64-FV-NEXT: slli a0, a0, 1 -; CHECK-RV64-FV-NEXT: add a0, a0, a1 -; CHECK-RV64-FV-NEXT: sub a0, s0, a0 -; CHECK-RV64-FV-NEXT: addi a0, a0, -240 -; CHECK-RV64-FV-NEXT: vs1r.v v28, (a0) # vscale x 8-byte Folded Spill -; CHECK-RV64-FV-NEXT: csrr a0, vlenb -; CHECK-RV64-FV-NEXT: slli a0, a0, 1 -; CHECK-RV64-FV-NEXT: mv a1, a0 -; CHECK-RV64-FV-NEXT: slli a0, a0, 1 -; CHECK-RV64-FV-NEXT: add a1, a1, a0 -; CHECK-RV64-FV-NEXT: slli a0, a0, 1 -; CHECK-RV64-FV-NEXT: add a1, a1, a0 -; CHECK-RV64-FV-NEXT: slli a0, a0, 1 -; CHECK-RV64-FV-NEXT: add a0, a0, a1 -; CHECK-RV64-FV-NEXT: sub a0, s0, a0 -; CHECK-RV64-FV-NEXT: addi a0, a0, -240 -; CHECK-RV64-FV-NEXT: vs1r.v v29, (a0) # vscale x 8-byte Folded Spill -; CHECK-RV64-FV-NEXT: csrr a0, vlenb -; CHECK-RV64-FV-NEXT: slli a1, a0, 5 -; CHECK-RV64-FV-NEXT: sub a0, a1, a0 -; CHECK-RV64-FV-NEXT: sub a0, s0, a0 -; CHECK-RV64-FV-NEXT: addi a0, a0, -240 -; CHECK-RV64-FV-NEXT: vs1r.v v30, (a0) # vscale x 8-byte Folded Spill +; CHECK-RV64-FV-NEXT: vs8r.v v16, (a0) # vscale x 64-byte Folded Spill ; CHECK-RV64-FV-NEXT: csrr a0, vlenb ; CHECK-RV64-FV-NEXT: slli a0, a0, 5 ; CHECK-RV64-FV-NEXT: sub a0, s0, a0 ; CHECK-RV64-FV-NEXT: addi a0, a0, -240 -; CHECK-RV64-FV-NEXT: vs1r.v v31, (a0) # vscale x 8-byte Folded Spill +; CHECK-RV64-FV-NEXT: vs8r.v v24, (a0) # vscale x 64-byte Folded Spill ; CHECK-RV64-FV-NEXT: call otherfoo ; CHECK-RV64-FV-NEXT: csrr a0, vlenb -; CHECK-RV64-FV-NEXT: sub a0, s0, a0 -; CHECK-RV64-FV-NEXT: addi a0, a0, -240 -; CHECK-RV64-FV-NEXT: vl1r.v v0, (a0) # vscale x 8-byte Folded Reload -; CHECK-RV64-FV-NEXT: csrr a0, vlenb -; CHECK-RV64-FV-NEXT: slli a0, a0, 1 -; CHECK-RV64-FV-NEXT: sub a0, s0, a0 -; CHECK-RV64-FV-NEXT: addi a0, a0, -240 -; CHECK-RV64-FV-NEXT: vl1r.v v1, (a0) # vscale x 8-byte Folded Reload -; CHECK-RV64-FV-NEXT: csrr a0, vlenb -; CHECK-RV64-FV-NEXT: slli a1, a0, 1 -; CHECK-RV64-FV-NEXT: add a0, a1, a0 -; CHECK-RV64-FV-NEXT: sub a0, s0, a0 -; CHECK-RV64-FV-NEXT: addi a0, a0, -240 -; CHECK-RV64-FV-NEXT: vl1r.v v2, (a0) # vscale x 8-byte Folded Reload -; CHECK-RV64-FV-NEXT: csrr a0, vlenb -; CHECK-RV64-FV-NEXT: slli a0, a0, 2 -; CHECK-RV64-FV-NEXT: sub a0, s0, a0 -; CHECK-RV64-FV-NEXT: addi a0, a0, -240 -; CHECK-RV64-FV-NEXT: vl1r.v v3, (a0) # vscale x 8-byte Folded Reload -; CHECK-RV64-FV-NEXT: csrr a0, vlenb -; CHECK-RV64-FV-NEXT: slli a1, a0, 2 -; CHECK-RV64-FV-NEXT: add a0, a1, a0 -; CHECK-RV64-FV-NEXT: sub a0, s0, a0 -; CHECK-RV64-FV-NEXT: addi a0, a0, -240 -; CHECK-RV64-FV-NEXT: vl1r.v v4, (a0) # vscale x 8-byte Folded Reload -; CHECK-RV64-FV-NEXT: csrr a0, vlenb -; CHECK-RV64-FV-NEXT: slli a0, a0, 1 -; CHECK-RV64-FV-NEXT: mv a1, a0 -; CHECK-RV64-FV-NEXT: slli a0, a0, 1 -; CHECK-RV64-FV-NEXT: add a0, a0, a1 -; CHECK-RV64-FV-NEXT: sub a0, s0, a0 -; CHECK-RV64-FV-NEXT: addi a0, a0, -240 -; CHECK-RV64-FV-NEXT: vl1r.v v5, (a0) # vscale x 8-byte Folded Reload -; CHECK-RV64-FV-NEXT: csrr a0, vlenb -; CHECK-RV64-FV-NEXT: slli a1, a0, 3 -; CHECK-RV64-FV-NEXT: sub a0, a1, a0 -; CHECK-RV64-FV-NEXT: sub a0, s0, a0 -; CHECK-RV64-FV-NEXT: addi a0, a0, -240 -; CHECK-RV64-FV-NEXT: vl1r.v v6, (a0) # vscale x 8-byte Folded Reload -; CHECK-RV64-FV-NEXT: csrr a0, vlenb ; CHECK-RV64-FV-NEXT: slli a0, a0, 3 ; CHECK-RV64-FV-NEXT: sub a0, s0, a0 ; CHECK-RV64-FV-NEXT: addi a0, a0, -240 -; CHECK-RV64-FV-NEXT: vl1r.v v7, (a0) # vscale x 8-byte Folded Reload -; CHECK-RV64-FV-NEXT: csrr a0, vlenb -; CHECK-RV64-FV-NEXT: slli a1, a0, 3 -; CHECK-RV64-FV-NEXT: add a0, a1, a0 -; CHECK-RV64-FV-NEXT: sub a0, s0, a0 -; CHECK-RV64-FV-NEXT: addi a0, a0, -240 -; CHECK-RV64-FV-NEXT: vl1r.v v8, (a0) # vscale x 8-byte Folded Reload -; CHECK-RV64-FV-NEXT: csrr a0, vlenb -; CHECK-RV64-FV-NEXT: slli a0, a0, 1 -; CHECK-RV64-FV-NEXT: mv a1, a0 -; CHECK-RV64-FV-NEXT: slli a0, a0, 2 -; CHECK-RV64-FV-NEXT: add a0, a0, a1 -; CHECK-RV64-FV-NEXT: sub a0, s0, a0 -; CHECK-RV64-FV-NEXT: addi a0, a0, -240 -; CHECK-RV64-FV-NEXT: vl1r.v v9, (a0) # vscale x 8-byte Folded Reload -; CHECK-RV64-FV-NEXT: csrr a0, vlenb -; CHECK-RV64-FV-NEXT: mv a1, a0 -; CHECK-RV64-FV-NEXT: slli a0, a0, 1 -; CHECK-RV64-FV-NEXT: add a1, a1, a0 -; CHECK-RV64-FV-NEXT: slli a0, a0, 2 -; CHECK-RV64-FV-NEXT: add a0, a0, a1 -; CHECK-RV64-FV-NEXT: sub a0, s0, a0 -; CHECK-RV64-FV-NEXT: addi a0, a0, -240 -; CHECK-RV64-FV-NEXT: vl1r.v v10, (a0) # vscale x 8-byte Folded Reload -; CHECK-RV64-FV-NEXT: csrr a0, vlenb -; CHECK-RV64-FV-NEXT: slli a0, a0, 2 -; CHECK-RV64-FV-NEXT: mv a1, a0 -; CHECK-RV64-FV-NEXT: slli a0, a0, 1 -; CHECK-RV64-FV-NEXT: add a0, a0, a1 -; CHECK-RV64-FV-NEXT: sub a0, s0, a0 -; CHECK-RV64-FV-NEXT: addi a0, a0, -240 -; CHECK-RV64-FV-NEXT: vl1r.v v11, (a0) # vscale x 8-byte Folded Reload -; CHECK-RV64-FV-NEXT: csrr a0, vlenb -; CHECK-RV64-FV-NEXT: mv a1, a0 -; CHECK-RV64-FV-NEXT: slli a0, a0, 2 -; CHECK-RV64-FV-NEXT: add a1, a1, a0 -; CHECK-RV64-FV-NEXT: slli a0, a0, 1 -; CHECK-RV64-FV-NEXT: add a0, a0, a1 -; CHECK-RV64-FV-NEXT: sub a0, s0, a0 -; CHECK-RV64-FV-NEXT: addi a0, a0, -240 -; CHECK-RV64-FV-NEXT: vl1r.v v12, (a0) # vscale x 8-byte Folded Reload -; CHECK-RV64-FV-NEXT: csrr a0, vlenb -; CHECK-RV64-FV-NEXT: slli a0, a0, 1 -; CHECK-RV64-FV-NEXT: mv a1, a0 -; CHECK-RV64-FV-NEXT: slli a0, a0, 1 -; CHECK-RV64-FV-NEXT: add a1, a1, a0 -; CHECK-RV64-FV-NEXT: slli a0, a0, 1 -; CHECK-RV64-FV-NEXT: add a0, a0, a1 -; CHECK-RV64-FV-NEXT: sub a0, s0, a0 -; CHECK-RV64-FV-NEXT: addi a0, a0, -240 -; CHECK-RV64-FV-NEXT: vl1r.v v13, (a0) # vscale x 8-byte Folded Reload -; CHECK-RV64-FV-NEXT: csrr a0, vlenb -; CHECK-RV64-FV-NEXT: slli a1, a0, 4 -; CHECK-RV64-FV-NEXT: sub a0, a1, a0 -; CHECK-RV64-FV-NEXT: sub a0, s0, a0 -; CHECK-RV64-FV-NEXT: addi a0, a0, -240 -; CHECK-RV64-FV-NEXT: vl1r.v v14, (a0) # vscale x 8-byte Folded Reload +; CHECK-RV64-FV-NEXT: vl8r.v v0, (a0) # vscale x 64-byte Folded Reload ; CHECK-RV64-FV-NEXT: csrr a0, vlenb ; CHECK-RV64-FV-NEXT: slli a0, a0, 4 ; CHECK-RV64-FV-NEXT: sub a0, s0, a0 ; CHECK-RV64-FV-NEXT: addi a0, a0, -240 -; CHECK-RV64-FV-NEXT: vl1r.v v15, (a0) # vscale x 8-byte Folded Reload +; CHECK-RV64-FV-NEXT: vl8r.v v8, (a0) # vscale x 64-byte Folded Reload ; CHECK-RV64-FV-NEXT: csrr a0, vlenb -; CHECK-RV64-FV-NEXT: slli a1, a0, 4 -; CHECK-RV64-FV-NEXT: add a0, a1, a0 -; CHECK-RV64-FV-NEXT: sub a0, s0, a0 -; CHECK-RV64-FV-NEXT: addi a0, a0, -240 -; CHECK-RV64-FV-NEXT: vl1r.v v16, (a0) # vscale x 8-byte Folded Reload -; CHECK-RV64-FV-NEXT: csrr a0, vlenb -; CHECK-RV64-FV-NEXT: slli a0, a0, 1 -; CHECK-RV64-FV-NEXT: mv a1, a0 ; CHECK-RV64-FV-NEXT: slli a0, a0, 3 -; CHECK-RV64-FV-NEXT: add a0, a0, a1 -; CHECK-RV64-FV-NEXT: sub a0, s0, a0 -; CHECK-RV64-FV-NEXT: addi a0, a0, -240 -; CHECK-RV64-FV-NEXT: vl1r.v v17, (a0) # vscale x 8-byte Folded Reload -; CHECK-RV64-FV-NEXT: csrr a0, vlenb -; CHECK-RV64-FV-NEXT: mv a1, a0 -; CHECK-RV64-FV-NEXT: slli a0, a0, 1 -; CHECK-RV64-FV-NEXT: add a1, a1, a0 -; CHECK-RV64-FV-NEXT: slli a0, a0, 3 -; CHECK-RV64-FV-NEXT: add a0, a0, a1 -; CHECK-RV64-FV-NEXT: sub a0, s0, a0 -; CHECK-RV64-FV-NEXT: addi a0, a0, -240 -; CHECK-RV64-FV-NEXT: vl1r.v v18, (a0) # vscale x 8-byte Folded Reload -; CHECK-RV64-FV-NEXT: csrr a0, vlenb -; CHECK-RV64-FV-NEXT: slli a0, a0, 2 -; CHECK-RV64-FV-NEXT: mv a1, a0 -; CHECK-RV64-FV-NEXT: slli a0, a0, 2 -; CHECK-RV64-FV-NEXT: add a0, a0, a1 -; CHECK-RV64-FV-NEXT: sub a0, s0, a0 -; CHECK-RV64-FV-NEXT: addi a0, a0, -240 -; CHECK-RV64-FV-NEXT: vl1r.v v19, (a0) # vscale x 8-byte Folded Reload -; CHECK-RV64-FV-NEXT: csrr a0, vlenb -; CHECK-RV64-FV-NEXT: mv a1, a0 -; CHECK-RV64-FV-NEXT: slli a0, a0, 2 -; CHECK-RV64-FV-NEXT: add a1, a1, a0 -; CHECK-RV64-FV-NEXT: slli a0, a0, 2 -; CHECK-RV64-FV-NEXT: add a0, a0, a1 -; CHECK-RV64-FV-NEXT: sub a0, s0, a0 -; CHECK-RV64-FV-NEXT: addi a0, a0, -240 -; CHECK-RV64-FV-NEXT: vl1r.v v20, (a0) # vscale x 8-byte Folded Reload -; CHECK-RV64-FV-NEXT: csrr a0, vlenb -; CHECK-RV64-FV-NEXT: slli a0, a0, 1 -; CHECK-RV64-FV-NEXT: mv a1, a0 -; CHECK-RV64-FV-NEXT: slli a0, a0, 1 -; CHECK-RV64-FV-NEXT: add a1, a1, a0 -; CHECK-RV64-FV-NEXT: slli a0, a0, 2 -; CHECK-RV64-FV-NEXT: add a0, a0, a1 -; CHECK-RV64-FV-NEXT: sub a0, s0, a0 -; CHECK-RV64-FV-NEXT: addi a0, a0, -240 -; CHECK-RV64-FV-NEXT: vl1r.v v21, (a0) # vscale x 8-byte Folded Reload -; CHECK-RV64-FV-NEXT: csrr a0, vlenb -; CHECK-RV64-FV-NEXT: mv a1, a0 -; CHECK-RV64-FV-NEXT: slli a0, a0, 1 -; CHECK-RV64-FV-NEXT: add a1, a1, a0 -; CHECK-RV64-FV-NEXT: slli a0, a0, 1 -; CHECK-RV64-FV-NEXT: add a1, a1, a0 -; CHECK-RV64-FV-NEXT: slli a0, a0, 2 -; CHECK-RV64-FV-NEXT: add a0, a0, a1 -; CHECK-RV64-FV-NEXT: sub a0, s0, a0 -; CHECK-RV64-FV-NEXT: addi a0, a0, -240 -; CHECK-RV64-FV-NEXT: vl1r.v v22, (a0) # vscale x 8-byte Folded Reload -; CHECK-RV64-FV-NEXT: csrr a0, vlenb -; CHECK-RV64-FV-NEXT: slli a0, a0, 3 -; CHECK-RV64-FV-NEXT: mv a1, a0 -; CHECK-RV64-FV-NEXT: slli a0, a0, 1 -; CHECK-RV64-FV-NEXT: add a0, a0, a1 -; CHECK-RV64-FV-NEXT: sub a0, s0, a0 -; CHECK-RV64-FV-NEXT: addi a0, a0, -240 -; CHECK-RV64-FV-NEXT: vl1r.v v23, (a0) # vscale x 8-byte Folded Reload -; CHECK-RV64-FV-NEXT: csrr a0, vlenb -; CHECK-RV64-FV-NEXT: mv a1, a0 -; CHECK-RV64-FV-NEXT: slli a0, a0, 3 -; CHECK-RV64-FV-NEXT: add a1, a1, a0 -; CHECK-RV64-FV-NEXT: slli a0, a0, 1 -; CHECK-RV64-FV-NEXT: add a0, a0, a1 -; CHECK-RV64-FV-NEXT: sub a0, s0, a0 -; CHECK-RV64-FV-NEXT: addi a0, a0, -240 -; CHECK-RV64-FV-NEXT: vl1r.v v24, (a0) # vscale x 8-byte Folded Reload -; CHECK-RV64-FV-NEXT: csrr a0, vlenb -; CHECK-RV64-FV-NEXT: slli a0, a0, 1 ; CHECK-RV64-FV-NEXT: mv a1, a0 -; CHECK-RV64-FV-NEXT: slli a0, a0, 2 -; CHECK-RV64-FV-NEXT: add a1, a1, a0 ; CHECK-RV64-FV-NEXT: slli a0, a0, 1 ; CHECK-RV64-FV-NEXT: add a0, a0, a1 ; CHECK-RV64-FV-NEXT: sub a0, s0, a0 ; CHECK-RV64-FV-NEXT: addi a0, a0, -240 -; CHECK-RV64-FV-NEXT: vl1r.v v25, (a0) # vscale x 8-byte Folded Reload -; CHECK-RV64-FV-NEXT: csrr a0, vlenb -; CHECK-RV64-FV-NEXT: mv a1, a0 -; CHECK-RV64-FV-NEXT: slli a0, a0, 1 -; CHECK-RV64-FV-NEXT: add a1, a1, a0 -; CHECK-RV64-FV-NEXT: slli a0, a0, 2 -; CHECK-RV64-FV-NEXT: add a1, a1, a0 -; CHECK-RV64-FV-NEXT: slli a0, a0, 1 -; CHECK-RV64-FV-NEXT: add a0, a0, a1 -; CHECK-RV64-FV-NEXT: sub a0, s0, a0 -; CHECK-RV64-FV-NEXT: addi a0, a0, -240 -; CHECK-RV64-FV-NEXT: vl1r.v v26, (a0) # vscale x 8-byte Folded Reload -; CHECK-RV64-FV-NEXT: csrr a0, vlenb -; CHECK-RV64-FV-NEXT: slli a0, a0, 2 -; CHECK-RV64-FV-NEXT: mv a1, a0 -; CHECK-RV64-FV-NEXT: slli a0, a0, 1 -; CHECK-RV64-FV-NEXT: add a1, a1, a0 -; CHECK-RV64-FV-NEXT: slli a0, a0, 1 -; CHECK-RV64-FV-NEXT: add a0, a0, a1 -; CHECK-RV64-FV-NEXT: sub a0, s0, a0 -; CHECK-RV64-FV-NEXT: addi a0, a0, -240 -; CHECK-RV64-FV-NEXT: vl1r.v v27, (a0) # vscale x 8-byte Folded Reload -; CHECK-RV64-FV-NEXT: csrr a0, vlenb -; CHECK-RV64-FV-NEXT: mv a1, a0 -; CHECK-RV64-FV-NEXT: slli a0, a0, 2 -; CHECK-RV64-FV-NEXT: add a1, a1, a0 -; CHECK-RV64-FV-NEXT: slli a0, a0, 1 -; CHECK-RV64-FV-NEXT: add a1, a1, a0 -; CHECK-RV64-FV-NEXT: slli a0, a0, 1 -; CHECK-RV64-FV-NEXT: add a0, a0, a1 -; CHECK-RV64-FV-NEXT: sub a0, s0, a0 -; CHECK-RV64-FV-NEXT: addi a0, a0, -240 -; CHECK-RV64-FV-NEXT: vl1r.v v28, (a0) # vscale x 8-byte Folded Reload -; CHECK-RV64-FV-NEXT: csrr a0, vlenb -; CHECK-RV64-FV-NEXT: slli a0, a0, 1 -; CHECK-RV64-FV-NEXT: mv a1, a0 -; CHECK-RV64-FV-NEXT: slli a0, a0, 1 -; CHECK-RV64-FV-NEXT: add a1, a1, a0 -; CHECK-RV64-FV-NEXT: slli a0, a0, 1 -; CHECK-RV64-FV-NEXT: add a1, a1, a0 -; CHECK-RV64-FV-NEXT: slli a0, a0, 1 -; CHECK-RV64-FV-NEXT: add a0, a0, a1 -; CHECK-RV64-FV-NEXT: sub a0, s0, a0 -; CHECK-RV64-FV-NEXT: addi a0, a0, -240 -; CHECK-RV64-FV-NEXT: vl1r.v v29, (a0) # vscale x 8-byte Folded Reload -; CHECK-RV64-FV-NEXT: csrr a0, vlenb -; CHECK-RV64-FV-NEXT: slli a1, a0, 5 -; CHECK-RV64-FV-NEXT: sub a0, a1, a0 -; CHECK-RV64-FV-NEXT: sub a0, s0, a0 -; CHECK-RV64-FV-NEXT: addi a0, a0, -240 -; CHECK-RV64-FV-NEXT: vl1r.v v30, (a0) # vscale x 8-byte Folded Reload +; CHECK-RV64-FV-NEXT: vl8r.v v16, (a0) # vscale x 64-byte Folded Reload ; CHECK-RV64-FV-NEXT: csrr a0, vlenb ; CHECK-RV64-FV-NEXT: slli a0, a0, 5 ; CHECK-RV64-FV-NEXT: sub a0, s0, a0 ; CHECK-RV64-FV-NEXT: addi a0, a0, -240 -; CHECK-RV64-FV-NEXT: vl1r.v v31, (a0) # vscale x 8-byte Folded Reload +; CHECK-RV64-FV-NEXT: vl8r.v v24, (a0) # vscale x 64-byte Folded Reload ; CHECK-RV64-FV-NEXT: addi sp, s0, -240 ; CHECK-RV64-FV-NEXT: ld ra, 232(sp) # 8-byte Folded Reload ; CHECK-RV64-FV-NEXT: ld t0, 224(sp) # 8-byte Folded Reload @@ -9334,172 +4362,15 @@ define void @foo_fp_with_call() #2 { ; CHECK-RV64-FDV-NEXT: slli a0, a0, 5 ; CHECK-RV64-FDV-NEXT: sub sp, sp, a0 ; CHECK-RV64-FDV-NEXT: csrr a0, vlenb -; CHECK-RV64-FDV-NEXT: sub a0, s0, a0 -; CHECK-RV64-FDV-NEXT: addi a0, a0, -320 -; CHECK-RV64-FDV-NEXT: vs1r.v v0, (a0) # vscale x 8-byte Folded Spill -; CHECK-RV64-FDV-NEXT: csrr a0, vlenb -; CHECK-RV64-FDV-NEXT: slli a0, a0, 1 -; CHECK-RV64-FDV-NEXT: sub a0, s0, a0 -; CHECK-RV64-FDV-NEXT: addi a0, a0, -320 -; CHECK-RV64-FDV-NEXT: vs1r.v v1, (a0) # vscale x 8-byte Folded Spill -; CHECK-RV64-FDV-NEXT: csrr a0, vlenb -; CHECK-RV64-FDV-NEXT: slli a1, a0, 1 -; CHECK-RV64-FDV-NEXT: add a0, a1, a0 -; CHECK-RV64-FDV-NEXT: sub a0, s0, a0 -; CHECK-RV64-FDV-NEXT: addi a0, a0, -320 -; CHECK-RV64-FDV-NEXT: vs1r.v v2, (a0) # vscale x 8-byte Folded Spill -; CHECK-RV64-FDV-NEXT: csrr a0, vlenb -; CHECK-RV64-FDV-NEXT: slli a0, a0, 2 -; CHECK-RV64-FDV-NEXT: sub a0, s0, a0 -; CHECK-RV64-FDV-NEXT: addi a0, a0, -320 -; CHECK-RV64-FDV-NEXT: vs1r.v v3, (a0) # vscale x 8-byte Folded Spill -; CHECK-RV64-FDV-NEXT: csrr a0, vlenb -; CHECK-RV64-FDV-NEXT: slli a1, a0, 2 -; CHECK-RV64-FDV-NEXT: add a0, a1, a0 -; CHECK-RV64-FDV-NEXT: sub a0, s0, a0 -; CHECK-RV64-FDV-NEXT: addi a0, a0, -320 -; CHECK-RV64-FDV-NEXT: vs1r.v v4, (a0) # vscale x 8-byte Folded Spill -; CHECK-RV64-FDV-NEXT: csrr a0, vlenb -; CHECK-RV64-FDV-NEXT: slli a0, a0, 1 -; CHECK-RV64-FDV-NEXT: mv a1, a0 -; CHECK-RV64-FDV-NEXT: slli a0, a0, 1 -; CHECK-RV64-FDV-NEXT: add a0, a0, a1 -; CHECK-RV64-FDV-NEXT: sub a0, s0, a0 -; CHECK-RV64-FDV-NEXT: addi a0, a0, -320 -; CHECK-RV64-FDV-NEXT: vs1r.v v5, (a0) # vscale x 8-byte Folded Spill -; CHECK-RV64-FDV-NEXT: csrr a0, vlenb -; CHECK-RV64-FDV-NEXT: slli a1, a0, 3 -; CHECK-RV64-FDV-NEXT: sub a0, a1, a0 -; CHECK-RV64-FDV-NEXT: sub a0, s0, a0 -; CHECK-RV64-FDV-NEXT: addi a0, a0, -320 -; CHECK-RV64-FDV-NEXT: vs1r.v v6, (a0) # vscale x 8-byte Folded Spill -; CHECK-RV64-FDV-NEXT: csrr a0, vlenb ; CHECK-RV64-FDV-NEXT: slli a0, a0, 3 ; CHECK-RV64-FDV-NEXT: sub a0, s0, a0 ; CHECK-RV64-FDV-NEXT: addi a0, a0, -320 -; CHECK-RV64-FDV-NEXT: vs1r.v v7, (a0) # vscale x 8-byte Folded Spill -; CHECK-RV64-FDV-NEXT: csrr a0, vlenb -; CHECK-RV64-FDV-NEXT: slli a1, a0, 3 -; CHECK-RV64-FDV-NEXT: add a0, a1, a0 -; CHECK-RV64-FDV-NEXT: sub a0, s0, a0 -; CHECK-RV64-FDV-NEXT: addi a0, a0, -320 -; CHECK-RV64-FDV-NEXT: vs1r.v v8, (a0) # vscale x 8-byte Folded Spill -; CHECK-RV64-FDV-NEXT: csrr a0, vlenb -; CHECK-RV64-FDV-NEXT: slli a0, a0, 1 -; CHECK-RV64-FDV-NEXT: mv a1, a0 -; CHECK-RV64-FDV-NEXT: slli a0, a0, 2 -; CHECK-RV64-FDV-NEXT: add a0, a0, a1 -; CHECK-RV64-FDV-NEXT: sub a0, s0, a0 -; CHECK-RV64-FDV-NEXT: addi a0, a0, -320 -; CHECK-RV64-FDV-NEXT: vs1r.v v9, (a0) # vscale x 8-byte Folded Spill -; CHECK-RV64-FDV-NEXT: csrr a0, vlenb -; CHECK-RV64-FDV-NEXT: mv a1, a0 -; CHECK-RV64-FDV-NEXT: slli a0, a0, 1 -; CHECK-RV64-FDV-NEXT: add a1, a1, a0 -; CHECK-RV64-FDV-NEXT: slli a0, a0, 2 -; CHECK-RV64-FDV-NEXT: add a0, a0, a1 -; CHECK-RV64-FDV-NEXT: sub a0, s0, a0 -; CHECK-RV64-FDV-NEXT: addi a0, a0, -320 -; CHECK-RV64-FDV-NEXT: vs1r.v v10, (a0) # vscale x 8-byte Folded Spill -; CHECK-RV64-FDV-NEXT: csrr a0, vlenb -; CHECK-RV64-FDV-NEXT: slli a0, a0, 2 -; CHECK-RV64-FDV-NEXT: mv a1, a0 -; CHECK-RV64-FDV-NEXT: slli a0, a0, 1 -; CHECK-RV64-FDV-NEXT: add a0, a0, a1 -; CHECK-RV64-FDV-NEXT: sub a0, s0, a0 -; CHECK-RV64-FDV-NEXT: addi a0, a0, -320 -; CHECK-RV64-FDV-NEXT: vs1r.v v11, (a0) # vscale x 8-byte Folded Spill -; CHECK-RV64-FDV-NEXT: csrr a0, vlenb -; CHECK-RV64-FDV-NEXT: mv a1, a0 -; CHECK-RV64-FDV-NEXT: slli a0, a0, 2 -; CHECK-RV64-FDV-NEXT: add a1, a1, a0 -; CHECK-RV64-FDV-NEXT: slli a0, a0, 1 -; CHECK-RV64-FDV-NEXT: add a0, a0, a1 -; CHECK-RV64-FDV-NEXT: sub a0, s0, a0 -; CHECK-RV64-FDV-NEXT: addi a0, a0, -320 -; CHECK-RV64-FDV-NEXT: vs1r.v v12, (a0) # vscale x 8-byte Folded Spill -; CHECK-RV64-FDV-NEXT: csrr a0, vlenb -; CHECK-RV64-FDV-NEXT: slli a0, a0, 1 -; CHECK-RV64-FDV-NEXT: mv a1, a0 -; CHECK-RV64-FDV-NEXT: slli a0, a0, 1 -; CHECK-RV64-FDV-NEXT: add a1, a1, a0 -; CHECK-RV64-FDV-NEXT: slli a0, a0, 1 -; CHECK-RV64-FDV-NEXT: add a0, a0, a1 -; CHECK-RV64-FDV-NEXT: sub a0, s0, a0 -; CHECK-RV64-FDV-NEXT: addi a0, a0, -320 -; CHECK-RV64-FDV-NEXT: vs1r.v v13, (a0) # vscale x 8-byte Folded Spill -; CHECK-RV64-FDV-NEXT: csrr a0, vlenb -; CHECK-RV64-FDV-NEXT: slli a1, a0, 4 -; CHECK-RV64-FDV-NEXT: sub a0, a1, a0 -; CHECK-RV64-FDV-NEXT: sub a0, s0, a0 -; CHECK-RV64-FDV-NEXT: addi a0, a0, -320 -; CHECK-RV64-FDV-NEXT: vs1r.v v14, (a0) # vscale x 8-byte Folded Spill +; CHECK-RV64-FDV-NEXT: vs8r.v v0, (a0) # vscale x 64-byte Folded Spill ; CHECK-RV64-FDV-NEXT: csrr a0, vlenb ; CHECK-RV64-FDV-NEXT: slli a0, a0, 4 ; CHECK-RV64-FDV-NEXT: sub a0, s0, a0 ; CHECK-RV64-FDV-NEXT: addi a0, a0, -320 -; CHECK-RV64-FDV-NEXT: vs1r.v v15, (a0) # vscale x 8-byte Folded Spill -; CHECK-RV64-FDV-NEXT: csrr a0, vlenb -; CHECK-RV64-FDV-NEXT: slli a1, a0, 4 -; CHECK-RV64-FDV-NEXT: add a0, a1, a0 -; CHECK-RV64-FDV-NEXT: sub a0, s0, a0 -; CHECK-RV64-FDV-NEXT: addi a0, a0, -320 -; CHECK-RV64-FDV-NEXT: vs1r.v v16, (a0) # vscale x 8-byte Folded Spill -; CHECK-RV64-FDV-NEXT: csrr a0, vlenb -; CHECK-RV64-FDV-NEXT: slli a0, a0, 1 -; CHECK-RV64-FDV-NEXT: mv a1, a0 -; CHECK-RV64-FDV-NEXT: slli a0, a0, 3 -; CHECK-RV64-FDV-NEXT: add a0, a0, a1 -; CHECK-RV64-FDV-NEXT: sub a0, s0, a0 -; CHECK-RV64-FDV-NEXT: addi a0, a0, -320 -; CHECK-RV64-FDV-NEXT: vs1r.v v17, (a0) # vscale x 8-byte Folded Spill -; CHECK-RV64-FDV-NEXT: csrr a0, vlenb -; CHECK-RV64-FDV-NEXT: mv a1, a0 -; CHECK-RV64-FDV-NEXT: slli a0, a0, 1 -; CHECK-RV64-FDV-NEXT: add a1, a1, a0 -; CHECK-RV64-FDV-NEXT: slli a0, a0, 3 -; CHECK-RV64-FDV-NEXT: add a0, a0, a1 -; CHECK-RV64-FDV-NEXT: sub a0, s0, a0 -; CHECK-RV64-FDV-NEXT: addi a0, a0, -320 -; CHECK-RV64-FDV-NEXT: vs1r.v v18, (a0) # vscale x 8-byte Folded Spill -; CHECK-RV64-FDV-NEXT: csrr a0, vlenb -; CHECK-RV64-FDV-NEXT: slli a0, a0, 2 -; CHECK-RV64-FDV-NEXT: mv a1, a0 -; CHECK-RV64-FDV-NEXT: slli a0, a0, 2 -; CHECK-RV64-FDV-NEXT: add a0, a0, a1 -; CHECK-RV64-FDV-NEXT: sub a0, s0, a0 -; CHECK-RV64-FDV-NEXT: addi a0, a0, -320 -; CHECK-RV64-FDV-NEXT: vs1r.v v19, (a0) # vscale x 8-byte Folded Spill -; CHECK-RV64-FDV-NEXT: csrr a0, vlenb -; CHECK-RV64-FDV-NEXT: mv a1, a0 -; CHECK-RV64-FDV-NEXT: slli a0, a0, 2 -; CHECK-RV64-FDV-NEXT: add a1, a1, a0 -; CHECK-RV64-FDV-NEXT: slli a0, a0, 2 -; CHECK-RV64-FDV-NEXT: add a0, a0, a1 -; CHECK-RV64-FDV-NEXT: sub a0, s0, a0 -; CHECK-RV64-FDV-NEXT: addi a0, a0, -320 -; CHECK-RV64-FDV-NEXT: vs1r.v v20, (a0) # vscale x 8-byte Folded Spill -; CHECK-RV64-FDV-NEXT: csrr a0, vlenb -; CHECK-RV64-FDV-NEXT: slli a0, a0, 1 -; CHECK-RV64-FDV-NEXT: mv a1, a0 -; CHECK-RV64-FDV-NEXT: slli a0, a0, 1 -; CHECK-RV64-FDV-NEXT: add a1, a1, a0 -; CHECK-RV64-FDV-NEXT: slli a0, a0, 2 -; CHECK-RV64-FDV-NEXT: add a0, a0, a1 -; CHECK-RV64-FDV-NEXT: sub a0, s0, a0 -; CHECK-RV64-FDV-NEXT: addi a0, a0, -320 -; CHECK-RV64-FDV-NEXT: vs1r.v v21, (a0) # vscale x 8-byte Folded Spill -; CHECK-RV64-FDV-NEXT: csrr a0, vlenb -; CHECK-RV64-FDV-NEXT: mv a1, a0 -; CHECK-RV64-FDV-NEXT: slli a0, a0, 1 -; CHECK-RV64-FDV-NEXT: add a1, a1, a0 -; CHECK-RV64-FDV-NEXT: slli a0, a0, 1 -; CHECK-RV64-FDV-NEXT: add a1, a1, a0 -; CHECK-RV64-FDV-NEXT: slli a0, a0, 2 -; CHECK-RV64-FDV-NEXT: add a0, a0, a1 -; CHECK-RV64-FDV-NEXT: sub a0, s0, a0 -; CHECK-RV64-FDV-NEXT: addi a0, a0, -320 -; CHECK-RV64-FDV-NEXT: vs1r.v v22, (a0) # vscale x 8-byte Folded Spill +; CHECK-RV64-FDV-NEXT: vs8r.v v8, (a0) # vscale x 64-byte Folded Spill ; CHECK-RV64-FDV-NEXT: csrr a0, vlenb ; CHECK-RV64-FDV-NEXT: slli a0, a0, 3 ; CHECK-RV64-FDV-NEXT: mv a1, a0 @@ -9507,249 +4378,23 @@ define void @foo_fp_with_call() #2 { ; CHECK-RV64-FDV-NEXT: add a0, a0, a1 ; CHECK-RV64-FDV-NEXT: sub a0, s0, a0 ; CHECK-RV64-FDV-NEXT: addi a0, a0, -320 -; CHECK-RV64-FDV-NEXT: vs1r.v v23, (a0) # vscale x 8-byte Folded Spill -; CHECK-RV64-FDV-NEXT: csrr a0, vlenb -; CHECK-RV64-FDV-NEXT: mv a1, a0 -; CHECK-RV64-FDV-NEXT: slli a0, a0, 3 -; CHECK-RV64-FDV-NEXT: add a1, a1, a0 -; CHECK-RV64-FDV-NEXT: slli a0, a0, 1 -; CHECK-RV64-FDV-NEXT: add a0, a0, a1 -; CHECK-RV64-FDV-NEXT: sub a0, s0, a0 -; CHECK-RV64-FDV-NEXT: addi a0, a0, -320 -; CHECK-RV64-FDV-NEXT: vs1r.v v24, (a0) # vscale x 8-byte Folded Spill -; CHECK-RV64-FDV-NEXT: csrr a0, vlenb -; CHECK-RV64-FDV-NEXT: slli a0, a0, 1 -; CHECK-RV64-FDV-NEXT: mv a1, a0 -; CHECK-RV64-FDV-NEXT: slli a0, a0, 2 -; CHECK-RV64-FDV-NEXT: add a1, a1, a0 -; CHECK-RV64-FDV-NEXT: slli a0, a0, 1 -; CHECK-RV64-FDV-NEXT: add a0, a0, a1 -; CHECK-RV64-FDV-NEXT: sub a0, s0, a0 -; CHECK-RV64-FDV-NEXT: addi a0, a0, -320 -; CHECK-RV64-FDV-NEXT: vs1r.v v25, (a0) # vscale x 8-byte Folded Spill -; CHECK-RV64-FDV-NEXT: csrr a0, vlenb -; CHECK-RV64-FDV-NEXT: mv a1, a0 -; CHECK-RV64-FDV-NEXT: slli a0, a0, 1 -; CHECK-RV64-FDV-NEXT: add a1, a1, a0 -; CHECK-RV64-FDV-NEXT: slli a0, a0, 2 -; CHECK-RV64-FDV-NEXT: add a1, a1, a0 -; CHECK-RV64-FDV-NEXT: slli a0, a0, 1 -; CHECK-RV64-FDV-NEXT: add a0, a0, a1 -; CHECK-RV64-FDV-NEXT: sub a0, s0, a0 -; CHECK-RV64-FDV-NEXT: addi a0, a0, -320 -; CHECK-RV64-FDV-NEXT: vs1r.v v26, (a0) # vscale x 8-byte Folded Spill -; CHECK-RV64-FDV-NEXT: csrr a0, vlenb -; CHECK-RV64-FDV-NEXT: slli a0, a0, 2 -; CHECK-RV64-FDV-NEXT: mv a1, a0 -; CHECK-RV64-FDV-NEXT: slli a0, a0, 1 -; CHECK-RV64-FDV-NEXT: add a1, a1, a0 -; CHECK-RV64-FDV-NEXT: slli a0, a0, 1 -; CHECK-RV64-FDV-NEXT: add a0, a0, a1 -; CHECK-RV64-FDV-NEXT: sub a0, s0, a0 -; CHECK-RV64-FDV-NEXT: addi a0, a0, -320 -; CHECK-RV64-FDV-NEXT: vs1r.v v27, (a0) # vscale x 8-byte Folded Spill -; CHECK-RV64-FDV-NEXT: csrr a0, vlenb -; CHECK-RV64-FDV-NEXT: mv a1, a0 -; CHECK-RV64-FDV-NEXT: slli a0, a0, 2 -; CHECK-RV64-FDV-NEXT: add a1, a1, a0 -; CHECK-RV64-FDV-NEXT: slli a0, a0, 1 -; CHECK-RV64-FDV-NEXT: add a1, a1, a0 -; CHECK-RV64-FDV-NEXT: slli a0, a0, 1 -; CHECK-RV64-FDV-NEXT: add a0, a0, a1 -; CHECK-RV64-FDV-NEXT: sub a0, s0, a0 -; CHECK-RV64-FDV-NEXT: addi a0, a0, -320 -; CHECK-RV64-FDV-NEXT: vs1r.v v28, (a0) # vscale x 8-byte Folded Spill -; CHECK-RV64-FDV-NEXT: csrr a0, vlenb -; CHECK-RV64-FDV-NEXT: slli a0, a0, 1 -; CHECK-RV64-FDV-NEXT: mv a1, a0 -; CHECK-RV64-FDV-NEXT: slli a0, a0, 1 -; CHECK-RV64-FDV-NEXT: add a1, a1, a0 -; CHECK-RV64-FDV-NEXT: slli a0, a0, 1 -; CHECK-RV64-FDV-NEXT: add a1, a1, a0 -; CHECK-RV64-FDV-NEXT: slli a0, a0, 1 -; CHECK-RV64-FDV-NEXT: add a0, a0, a1 -; CHECK-RV64-FDV-NEXT: sub a0, s0, a0 -; CHECK-RV64-FDV-NEXT: addi a0, a0, -320 -; CHECK-RV64-FDV-NEXT: vs1r.v v29, (a0) # vscale x 8-byte Folded Spill -; CHECK-RV64-FDV-NEXT: csrr a0, vlenb -; CHECK-RV64-FDV-NEXT: slli a1, a0, 5 -; CHECK-RV64-FDV-NEXT: sub a0, a1, a0 -; CHECK-RV64-FDV-NEXT: sub a0, s0, a0 -; CHECK-RV64-FDV-NEXT: addi a0, a0, -320 -; CHECK-RV64-FDV-NEXT: vs1r.v v30, (a0) # vscale x 8-byte Folded Spill +; CHECK-RV64-FDV-NEXT: vs8r.v v16, (a0) # vscale x 64-byte Folded Spill ; CHECK-RV64-FDV-NEXT: csrr a0, vlenb ; CHECK-RV64-FDV-NEXT: slli a0, a0, 5 ; CHECK-RV64-FDV-NEXT: sub a0, s0, a0 ; CHECK-RV64-FDV-NEXT: addi a0, a0, -320 -; CHECK-RV64-FDV-NEXT: vs1r.v v31, (a0) # vscale x 8-byte Folded Spill +; CHECK-RV64-FDV-NEXT: vs8r.v v24, (a0) # vscale x 64-byte Folded Spill ; CHECK-RV64-FDV-NEXT: call otherfoo ; CHECK-RV64-FDV-NEXT: csrr a0, vlenb -; CHECK-RV64-FDV-NEXT: sub a0, s0, a0 -; CHECK-RV64-FDV-NEXT: addi a0, a0, -320 -; CHECK-RV64-FDV-NEXT: vl1r.v v0, (a0) # vscale x 8-byte Folded Reload -; CHECK-RV64-FDV-NEXT: csrr a0, vlenb -; CHECK-RV64-FDV-NEXT: slli a0, a0, 1 -; CHECK-RV64-FDV-NEXT: sub a0, s0, a0 -; CHECK-RV64-FDV-NEXT: addi a0, a0, -320 -; CHECK-RV64-FDV-NEXT: vl1r.v v1, (a0) # vscale x 8-byte Folded Reload -; CHECK-RV64-FDV-NEXT: csrr a0, vlenb -; CHECK-RV64-FDV-NEXT: slli a1, a0, 1 -; CHECK-RV64-FDV-NEXT: add a0, a1, a0 -; CHECK-RV64-FDV-NEXT: sub a0, s0, a0 -; CHECK-RV64-FDV-NEXT: addi a0, a0, -320 -; CHECK-RV64-FDV-NEXT: vl1r.v v2, (a0) # vscale x 8-byte Folded Reload -; CHECK-RV64-FDV-NEXT: csrr a0, vlenb -; CHECK-RV64-FDV-NEXT: slli a0, a0, 2 -; CHECK-RV64-FDV-NEXT: sub a0, s0, a0 -; CHECK-RV64-FDV-NEXT: addi a0, a0, -320 -; CHECK-RV64-FDV-NEXT: vl1r.v v3, (a0) # vscale x 8-byte Folded Reload -; CHECK-RV64-FDV-NEXT: csrr a0, vlenb -; CHECK-RV64-FDV-NEXT: slli a1, a0, 2 -; CHECK-RV64-FDV-NEXT: add a0, a1, a0 -; CHECK-RV64-FDV-NEXT: sub a0, s0, a0 -; CHECK-RV64-FDV-NEXT: addi a0, a0, -320 -; CHECK-RV64-FDV-NEXT: vl1r.v v4, (a0) # vscale x 8-byte Folded Reload -; CHECK-RV64-FDV-NEXT: csrr a0, vlenb -; CHECK-RV64-FDV-NEXT: slli a0, a0, 1 -; CHECK-RV64-FDV-NEXT: mv a1, a0 -; CHECK-RV64-FDV-NEXT: slli a0, a0, 1 -; CHECK-RV64-FDV-NEXT: add a0, a0, a1 -; CHECK-RV64-FDV-NEXT: sub a0, s0, a0 -; CHECK-RV64-FDV-NEXT: addi a0, a0, -320 -; CHECK-RV64-FDV-NEXT: vl1r.v v5, (a0) # vscale x 8-byte Folded Reload -; CHECK-RV64-FDV-NEXT: csrr a0, vlenb -; CHECK-RV64-FDV-NEXT: slli a1, a0, 3 -; CHECK-RV64-FDV-NEXT: sub a0, a1, a0 -; CHECK-RV64-FDV-NEXT: sub a0, s0, a0 -; CHECK-RV64-FDV-NEXT: addi a0, a0, -320 -; CHECK-RV64-FDV-NEXT: vl1r.v v6, (a0) # vscale x 8-byte Folded Reload -; CHECK-RV64-FDV-NEXT: csrr a0, vlenb ; CHECK-RV64-FDV-NEXT: slli a0, a0, 3 ; CHECK-RV64-FDV-NEXT: sub a0, s0, a0 ; CHECK-RV64-FDV-NEXT: addi a0, a0, -320 -; CHECK-RV64-FDV-NEXT: vl1r.v v7, (a0) # vscale x 8-byte Folded Reload -; CHECK-RV64-FDV-NEXT: csrr a0, vlenb -; CHECK-RV64-FDV-NEXT: slli a1, a0, 3 -; CHECK-RV64-FDV-NEXT: add a0, a1, a0 -; CHECK-RV64-FDV-NEXT: sub a0, s0, a0 -; CHECK-RV64-FDV-NEXT: addi a0, a0, -320 -; CHECK-RV64-FDV-NEXT: vl1r.v v8, (a0) # vscale x 8-byte Folded Reload -; CHECK-RV64-FDV-NEXT: csrr a0, vlenb -; CHECK-RV64-FDV-NEXT: slli a0, a0, 1 -; CHECK-RV64-FDV-NEXT: mv a1, a0 -; CHECK-RV64-FDV-NEXT: slli a0, a0, 2 -; CHECK-RV64-FDV-NEXT: add a0, a0, a1 -; CHECK-RV64-FDV-NEXT: sub a0, s0, a0 -; CHECK-RV64-FDV-NEXT: addi a0, a0, -320 -; CHECK-RV64-FDV-NEXT: vl1r.v v9, (a0) # vscale x 8-byte Folded Reload -; CHECK-RV64-FDV-NEXT: csrr a0, vlenb -; CHECK-RV64-FDV-NEXT: mv a1, a0 -; CHECK-RV64-FDV-NEXT: slli a0, a0, 1 -; CHECK-RV64-FDV-NEXT: add a1, a1, a0 -; CHECK-RV64-FDV-NEXT: slli a0, a0, 2 -; CHECK-RV64-FDV-NEXT: add a0, a0, a1 -; CHECK-RV64-FDV-NEXT: sub a0, s0, a0 -; CHECK-RV64-FDV-NEXT: addi a0, a0, -320 -; CHECK-RV64-FDV-NEXT: vl1r.v v10, (a0) # vscale x 8-byte Folded Reload -; CHECK-RV64-FDV-NEXT: csrr a0, vlenb -; CHECK-RV64-FDV-NEXT: slli a0, a0, 2 -; CHECK-RV64-FDV-NEXT: mv a1, a0 -; CHECK-RV64-FDV-NEXT: slli a0, a0, 1 -; CHECK-RV64-FDV-NEXT: add a0, a0, a1 -; CHECK-RV64-FDV-NEXT: sub a0, s0, a0 -; CHECK-RV64-FDV-NEXT: addi a0, a0, -320 -; CHECK-RV64-FDV-NEXT: vl1r.v v11, (a0) # vscale x 8-byte Folded Reload -; CHECK-RV64-FDV-NEXT: csrr a0, vlenb -; CHECK-RV64-FDV-NEXT: mv a1, a0 -; CHECK-RV64-FDV-NEXT: slli a0, a0, 2 -; CHECK-RV64-FDV-NEXT: add a1, a1, a0 -; CHECK-RV64-FDV-NEXT: slli a0, a0, 1 -; CHECK-RV64-FDV-NEXT: add a0, a0, a1 -; CHECK-RV64-FDV-NEXT: sub a0, s0, a0 -; CHECK-RV64-FDV-NEXT: addi a0, a0, -320 -; CHECK-RV64-FDV-NEXT: vl1r.v v12, (a0) # vscale x 8-byte Folded Reload -; CHECK-RV64-FDV-NEXT: csrr a0, vlenb -; CHECK-RV64-FDV-NEXT: slli a0, a0, 1 -; CHECK-RV64-FDV-NEXT: mv a1, a0 -; CHECK-RV64-FDV-NEXT: slli a0, a0, 1 -; CHECK-RV64-FDV-NEXT: add a1, a1, a0 -; CHECK-RV64-FDV-NEXT: slli a0, a0, 1 -; CHECK-RV64-FDV-NEXT: add a0, a0, a1 -; CHECK-RV64-FDV-NEXT: sub a0, s0, a0 -; CHECK-RV64-FDV-NEXT: addi a0, a0, -320 -; CHECK-RV64-FDV-NEXT: vl1r.v v13, (a0) # vscale x 8-byte Folded Reload -; CHECK-RV64-FDV-NEXT: csrr a0, vlenb -; CHECK-RV64-FDV-NEXT: slli a1, a0, 4 -; CHECK-RV64-FDV-NEXT: sub a0, a1, a0 -; CHECK-RV64-FDV-NEXT: sub a0, s0, a0 -; CHECK-RV64-FDV-NEXT: addi a0, a0, -320 -; CHECK-RV64-FDV-NEXT: vl1r.v v14, (a0) # vscale x 8-byte Folded Reload +; CHECK-RV64-FDV-NEXT: vl8r.v v0, (a0) # vscale x 64-byte Folded Reload ; CHECK-RV64-FDV-NEXT: csrr a0, vlenb ; CHECK-RV64-FDV-NEXT: slli a0, a0, 4 ; CHECK-RV64-FDV-NEXT: sub a0, s0, a0 ; CHECK-RV64-FDV-NEXT: addi a0, a0, -320 -; CHECK-RV64-FDV-NEXT: vl1r.v v15, (a0) # vscale x 8-byte Folded Reload -; CHECK-RV64-FDV-NEXT: csrr a0, vlenb -; CHECK-RV64-FDV-NEXT: slli a1, a0, 4 -; CHECK-RV64-FDV-NEXT: add a0, a1, a0 -; CHECK-RV64-FDV-NEXT: sub a0, s0, a0 -; CHECK-RV64-FDV-NEXT: addi a0, a0, -320 -; CHECK-RV64-FDV-NEXT: vl1r.v v16, (a0) # vscale x 8-byte Folded Reload -; CHECK-RV64-FDV-NEXT: csrr a0, vlenb -; CHECK-RV64-FDV-NEXT: slli a0, a0, 1 -; CHECK-RV64-FDV-NEXT: mv a1, a0 -; CHECK-RV64-FDV-NEXT: slli a0, a0, 3 -; CHECK-RV64-FDV-NEXT: add a0, a0, a1 -; CHECK-RV64-FDV-NEXT: sub a0, s0, a0 -; CHECK-RV64-FDV-NEXT: addi a0, a0, -320 -; CHECK-RV64-FDV-NEXT: vl1r.v v17, (a0) # vscale x 8-byte Folded Reload -; CHECK-RV64-FDV-NEXT: csrr a0, vlenb -; CHECK-RV64-FDV-NEXT: mv a1, a0 -; CHECK-RV64-FDV-NEXT: slli a0, a0, 1 -; CHECK-RV64-FDV-NEXT: add a1, a1, a0 -; CHECK-RV64-FDV-NEXT: slli a0, a0, 3 -; CHECK-RV64-FDV-NEXT: add a0, a0, a1 -; CHECK-RV64-FDV-NEXT: sub a0, s0, a0 -; CHECK-RV64-FDV-NEXT: addi a0, a0, -320 -; CHECK-RV64-FDV-NEXT: vl1r.v v18, (a0) # vscale x 8-byte Folded Reload -; CHECK-RV64-FDV-NEXT: csrr a0, vlenb -; CHECK-RV64-FDV-NEXT: slli a0, a0, 2 -; CHECK-RV64-FDV-NEXT: mv a1, a0 -; CHECK-RV64-FDV-NEXT: slli a0, a0, 2 -; CHECK-RV64-FDV-NEXT: add a0, a0, a1 -; CHECK-RV64-FDV-NEXT: sub a0, s0, a0 -; CHECK-RV64-FDV-NEXT: addi a0, a0, -320 -; CHECK-RV64-FDV-NEXT: vl1r.v v19, (a0) # vscale x 8-byte Folded Reload -; CHECK-RV64-FDV-NEXT: csrr a0, vlenb -; CHECK-RV64-FDV-NEXT: mv a1, a0 -; CHECK-RV64-FDV-NEXT: slli a0, a0, 2 -; CHECK-RV64-FDV-NEXT: add a1, a1, a0 -; CHECK-RV64-FDV-NEXT: slli a0, a0, 2 -; CHECK-RV64-FDV-NEXT: add a0, a0, a1 -; CHECK-RV64-FDV-NEXT: sub a0, s0, a0 -; CHECK-RV64-FDV-NEXT: addi a0, a0, -320 -; CHECK-RV64-FDV-NEXT: vl1r.v v20, (a0) # vscale x 8-byte Folded Reload -; CHECK-RV64-FDV-NEXT: csrr a0, vlenb -; CHECK-RV64-FDV-NEXT: slli a0, a0, 1 -; CHECK-RV64-FDV-NEXT: mv a1, a0 -; CHECK-RV64-FDV-NEXT: slli a0, a0, 1 -; CHECK-RV64-FDV-NEXT: add a1, a1, a0 -; CHECK-RV64-FDV-NEXT: slli a0, a0, 2 -; CHECK-RV64-FDV-NEXT: add a0, a0, a1 -; CHECK-RV64-FDV-NEXT: sub a0, s0, a0 -; CHECK-RV64-FDV-NEXT: addi a0, a0, -320 -; CHECK-RV64-FDV-NEXT: vl1r.v v21, (a0) # vscale x 8-byte Folded Reload -; CHECK-RV64-FDV-NEXT: csrr a0, vlenb -; CHECK-RV64-FDV-NEXT: mv a1, a0 -; CHECK-RV64-FDV-NEXT: slli a0, a0, 1 -; CHECK-RV64-FDV-NEXT: add a1, a1, a0 -; CHECK-RV64-FDV-NEXT: slli a0, a0, 1 -; CHECK-RV64-FDV-NEXT: add a1, a1, a0 -; CHECK-RV64-FDV-NEXT: slli a0, a0, 2 -; CHECK-RV64-FDV-NEXT: add a0, a0, a1 -; CHECK-RV64-FDV-NEXT: sub a0, s0, a0 -; CHECK-RV64-FDV-NEXT: addi a0, a0, -320 -; CHECK-RV64-FDV-NEXT: vl1r.v v22, (a0) # vscale x 8-byte Folded Reload +; CHECK-RV64-FDV-NEXT: vl8r.v v8, (a0) # vscale x 64-byte Folded Reload ; CHECK-RV64-FDV-NEXT: csrr a0, vlenb ; CHECK-RV64-FDV-NEXT: slli a0, a0, 3 ; CHECK-RV64-FDV-NEXT: mv a1, a0 @@ -9757,81 +4402,12 @@ define void @foo_fp_with_call() #2 { ; CHECK-RV64-FDV-NEXT: add a0, a0, a1 ; CHECK-RV64-FDV-NEXT: sub a0, s0, a0 ; CHECK-RV64-FDV-NEXT: addi a0, a0, -320 -; CHECK-RV64-FDV-NEXT: vl1r.v v23, (a0) # vscale x 8-byte Folded Reload -; CHECK-RV64-FDV-NEXT: csrr a0, vlenb -; CHECK-RV64-FDV-NEXT: mv a1, a0 -; CHECK-RV64-FDV-NEXT: slli a0, a0, 3 -; CHECK-RV64-FDV-NEXT: add a1, a1, a0 -; CHECK-RV64-FDV-NEXT: slli a0, a0, 1 -; CHECK-RV64-FDV-NEXT: add a0, a0, a1 -; CHECK-RV64-FDV-NEXT: sub a0, s0, a0 -; CHECK-RV64-FDV-NEXT: addi a0, a0, -320 -; CHECK-RV64-FDV-NEXT: vl1r.v v24, (a0) # vscale x 8-byte Folded Reload -; CHECK-RV64-FDV-NEXT: csrr a0, vlenb -; CHECK-RV64-FDV-NEXT: slli a0, a0, 1 -; CHECK-RV64-FDV-NEXT: mv a1, a0 -; CHECK-RV64-FDV-NEXT: slli a0, a0, 2 -; CHECK-RV64-FDV-NEXT: add a1, a1, a0 -; CHECK-RV64-FDV-NEXT: slli a0, a0, 1 -; CHECK-RV64-FDV-NEXT: add a0, a0, a1 -; CHECK-RV64-FDV-NEXT: sub a0, s0, a0 -; CHECK-RV64-FDV-NEXT: addi a0, a0, -320 -; CHECK-RV64-FDV-NEXT: vl1r.v v25, (a0) # vscale x 8-byte Folded Reload -; CHECK-RV64-FDV-NEXT: csrr a0, vlenb -; CHECK-RV64-FDV-NEXT: mv a1, a0 -; CHECK-RV64-FDV-NEXT: slli a0, a0, 1 -; CHECK-RV64-FDV-NEXT: add a1, a1, a0 -; CHECK-RV64-FDV-NEXT: slli a0, a0, 2 -; CHECK-RV64-FDV-NEXT: add a1, a1, a0 -; CHECK-RV64-FDV-NEXT: slli a0, a0, 1 -; CHECK-RV64-FDV-NEXT: add a0, a0, a1 -; CHECK-RV64-FDV-NEXT: sub a0, s0, a0 -; CHECK-RV64-FDV-NEXT: addi a0, a0, -320 -; CHECK-RV64-FDV-NEXT: vl1r.v v26, (a0) # vscale x 8-byte Folded Reload -; CHECK-RV64-FDV-NEXT: csrr a0, vlenb -; CHECK-RV64-FDV-NEXT: slli a0, a0, 2 -; CHECK-RV64-FDV-NEXT: mv a1, a0 -; CHECK-RV64-FDV-NEXT: slli a0, a0, 1 -; CHECK-RV64-FDV-NEXT: add a1, a1, a0 -; CHECK-RV64-FDV-NEXT: slli a0, a0, 1 -; CHECK-RV64-FDV-NEXT: add a0, a0, a1 -; CHECK-RV64-FDV-NEXT: sub a0, s0, a0 -; CHECK-RV64-FDV-NEXT: addi a0, a0, -320 -; CHECK-RV64-FDV-NEXT: vl1r.v v27, (a0) # vscale x 8-byte Folded Reload -; CHECK-RV64-FDV-NEXT: csrr a0, vlenb -; CHECK-RV64-FDV-NEXT: mv a1, a0 -; CHECK-RV64-FDV-NEXT: slli a0, a0, 2 -; CHECK-RV64-FDV-NEXT: add a1, a1, a0 -; CHECK-RV64-FDV-NEXT: slli a0, a0, 1 -; CHECK-RV64-FDV-NEXT: add a1, a1, a0 -; CHECK-RV64-FDV-NEXT: slli a0, a0, 1 -; CHECK-RV64-FDV-NEXT: add a0, a0, a1 -; CHECK-RV64-FDV-NEXT: sub a0, s0, a0 -; CHECK-RV64-FDV-NEXT: addi a0, a0, -320 -; CHECK-RV64-FDV-NEXT: vl1r.v v28, (a0) # vscale x 8-byte Folded Reload -; CHECK-RV64-FDV-NEXT: csrr a0, vlenb -; CHECK-RV64-FDV-NEXT: slli a0, a0, 1 -; CHECK-RV64-FDV-NEXT: mv a1, a0 -; CHECK-RV64-FDV-NEXT: slli a0, a0, 1 -; CHECK-RV64-FDV-NEXT: add a1, a1, a0 -; CHECK-RV64-FDV-NEXT: slli a0, a0, 1 -; CHECK-RV64-FDV-NEXT: add a1, a1, a0 -; CHECK-RV64-FDV-NEXT: slli a0, a0, 1 -; CHECK-RV64-FDV-NEXT: add a0, a0, a1 -; CHECK-RV64-FDV-NEXT: sub a0, s0, a0 -; CHECK-RV64-FDV-NEXT: addi a0, a0, -320 -; CHECK-RV64-FDV-NEXT: vl1r.v v29, (a0) # vscale x 8-byte Folded Reload -; CHECK-RV64-FDV-NEXT: csrr a0, vlenb -; CHECK-RV64-FDV-NEXT: slli a1, a0, 5 -; CHECK-RV64-FDV-NEXT: sub a0, a1, a0 -; CHECK-RV64-FDV-NEXT: sub a0, s0, a0 -; CHECK-RV64-FDV-NEXT: addi a0, a0, -320 -; CHECK-RV64-FDV-NEXT: vl1r.v v30, (a0) # vscale x 8-byte Folded Reload +; CHECK-RV64-FDV-NEXT: vl8r.v v16, (a0) # vscale x 64-byte Folded Reload ; CHECK-RV64-FDV-NEXT: csrr a0, vlenb ; CHECK-RV64-FDV-NEXT: slli a0, a0, 5 ; CHECK-RV64-FDV-NEXT: sub a0, s0, a0 ; CHECK-RV64-FDV-NEXT: addi a0, a0, -320 -; CHECK-RV64-FDV-NEXT: vl1r.v v31, (a0) # vscale x 8-byte Folded Reload +; CHECK-RV64-FDV-NEXT: vl8r.v v24, (a0) # vscale x 64-byte Folded Reload ; CHECK-RV64-FDV-NEXT: addi sp, s0, -320 ; CHECK-RV64-FDV-NEXT: ld ra, 312(sp) # 8-byte Folded Reload ; CHECK-RV64-FDV-NEXT: ld t0, 304(sp) # 8-byte Folded Reload diff --git a/llvm/test/CodeGen/RISCV/intrinsic-cttz-elts-vscale.ll b/llvm/test/CodeGen/RISCV/intrinsic-cttz-elts-vscale.ll index 87c8343..a06c750 100644 --- a/llvm/test/CodeGen/RISCV/intrinsic-cttz-elts-vscale.ll +++ b/llvm/test/CodeGen/RISCV/intrinsic-cttz-elts-vscale.ll @@ -7,18 +7,18 @@ define i32 @ctz_nxv4i32(<vscale x 4 x i32> %a) #0 { ; RV32-LABEL: ctz_nxv4i32: ; RV32: # %bb.0: -; RV32-NEXT: vsetvli a0, zero, e16, m1, ta, ma -; RV32-NEXT: vid.v v10 -; RV32-NEXT: vmv.v.i v11, -1 ; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: vsetvli a1, zero, e16, m1, ta, ma +; RV32-NEXT: vid.v v10 +; RV32-NEXT: li a1, -1 ; RV32-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; RV32-NEXT: vmsne.vi v0, v8, 0 ; RV32-NEXT: srli a0, a0, 1 ; RV32-NEXT: vsetvli zero, zero, e16, m1, ta, ma ; RV32-NEXT: vmv.v.x v8, a0 -; RV32-NEXT: vmacc.vv v8, v10, v11 -; RV32-NEXT: vmv.v.i v9, 0 -; RV32-NEXT: vmerge.vvm v8, v9, v8, v0 +; RV32-NEXT: vmadd.vx v10, a1, v8 +; RV32-NEXT: vmv.v.i v8, 0 +; RV32-NEXT: vmerge.vvm v8, v8, v10, v0 ; RV32-NEXT: vredmaxu.vs v8, v8, v8 ; RV32-NEXT: vmv.x.s a1, v8 ; RV32-NEXT: sub a0, a0, a1 @@ -28,18 +28,18 @@ define i32 @ctz_nxv4i32(<vscale x 4 x i32> %a) #0 { ; ; RV64-LABEL: ctz_nxv4i32: ; RV64: # %bb.0: -; RV64-NEXT: vsetvli a0, zero, e16, m1, ta, ma -; RV64-NEXT: vid.v v10 -; RV64-NEXT: vmv.v.i v11, -1 ; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: vsetvli a1, zero, e16, m1, ta, ma +; RV64-NEXT: vid.v v10 +; RV64-NEXT: li a1, -1 ; RV64-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; RV64-NEXT: vmsne.vi v0, v8, 0 ; RV64-NEXT: srli a0, a0, 1 ; RV64-NEXT: vsetvli zero, zero, e16, m1, ta, ma ; RV64-NEXT: vmv.v.x v8, a0 -; RV64-NEXT: vmacc.vv v8, v10, v11 -; RV64-NEXT: vmv.v.i v9, 0 -; RV64-NEXT: vmerge.vvm v8, v9, v8, v0 +; RV64-NEXT: vmadd.vx v10, a1, v8 +; RV64-NEXT: vmv.v.i v8, 0 +; RV64-NEXT: vmerge.vvm v8, v8, v10, v0 ; RV64-NEXT: vredmaxu.vs v8, v8, v8 ; RV64-NEXT: vmv.x.s a1, v8 ; RV64-NEXT: sub a0, a0, a1 @@ -109,17 +109,17 @@ define i64 @ctz_nxv8i1_no_range(<vscale x 8 x i16> %a) { ; ; RV64-LABEL: ctz_nxv8i1_no_range: ; RV64: # %bb.0: -; RV64-NEXT: vsetvli a0, zero, e64, m8, ta, ma -; RV64-NEXT: vid.v v16 -; RV64-NEXT: vmv.v.i v24, -1 ; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, ma +; RV64-NEXT: vid.v v16 +; RV64-NEXT: li a1, -1 ; RV64-NEXT: vsetvli zero, zero, e16, m2, ta, ma ; RV64-NEXT: vmsne.vi v0, v8, 0 ; RV64-NEXT: vsetvli zero, zero, e64, m8, ta, ma ; RV64-NEXT: vmv.v.x v8, a0 -; RV64-NEXT: vmacc.vv v8, v16, v24 -; RV64-NEXT: vmv.v.i v16, 0 -; RV64-NEXT: vmerge.vvm v8, v16, v8, v0 +; RV64-NEXT: vmadd.vx v16, a1, v8 +; RV64-NEXT: vmv.v.i v8, 0 +; RV64-NEXT: vmerge.vvm v8, v8, v16, v0 ; RV64-NEXT: vredmaxu.vs v8, v8, v8 ; RV64-NEXT: vmv.x.s a1, v8 ; RV64-NEXT: sub a0, a0, a1 diff --git a/llvm/test/CodeGen/RISCV/memset-inline.ll b/llvm/test/CodeGen/RISCV/memset-inline.ll index 1263892..4091524 100644 --- a/llvm/test/CodeGen/RISCV/memset-inline.ll +++ b/llvm/test/CodeGen/RISCV/memset-inline.ll @@ -684,13 +684,13 @@ define void @aligned_memset_64(ptr align 64 %a, i8 %value) nounwind { ; ///////////////////////////////////////////////////////////////////////////// -define void @bzero_1(ptr %a) nounwind { -; RV32-BOTH-LABEL: bzero_1: +define void @memset_zero_1(ptr %a) nounwind { +; RV32-BOTH-LABEL: memset_zero_1: ; RV32-BOTH: # %bb.0: ; RV32-BOTH-NEXT: sb zero, 0(a0) ; RV32-BOTH-NEXT: ret ; -; RV64-BOTH-LABEL: bzero_1: +; RV64-BOTH-LABEL: memset_zero_1: ; RV64-BOTH: # %bb.0: ; RV64-BOTH-NEXT: sb zero, 0(a0) ; RV64-BOTH-NEXT: ret @@ -698,25 +698,25 @@ define void @bzero_1(ptr %a) nounwind { ret void } -define void @bzero_2(ptr %a) nounwind { -; RV32-LABEL: bzero_2: +define void @memset_zero_2(ptr %a) nounwind { +; RV32-LABEL: memset_zero_2: ; RV32: # %bb.0: ; RV32-NEXT: sb zero, 0(a0) ; RV32-NEXT: sb zero, 1(a0) ; RV32-NEXT: ret ; -; RV64-LABEL: bzero_2: +; RV64-LABEL: memset_zero_2: ; RV64: # %bb.0: ; RV64-NEXT: sb zero, 0(a0) ; RV64-NEXT: sb zero, 1(a0) ; RV64-NEXT: ret ; -; RV32-FAST-LABEL: bzero_2: +; RV32-FAST-LABEL: memset_zero_2: ; RV32-FAST: # %bb.0: ; RV32-FAST-NEXT: sh zero, 0(a0) ; RV32-FAST-NEXT: ret ; -; RV64-FAST-LABEL: bzero_2: +; RV64-FAST-LABEL: memset_zero_2: ; RV64-FAST: # %bb.0: ; RV64-FAST-NEXT: sh zero, 0(a0) ; RV64-FAST-NEXT: ret @@ -724,8 +724,8 @@ define void @bzero_2(ptr %a) nounwind { ret void } -define void @bzero_4(ptr %a) nounwind { -; RV32-LABEL: bzero_4: +define void @memset_zero_4(ptr %a) nounwind { +; RV32-LABEL: memset_zero_4: ; RV32: # %bb.0: ; RV32-NEXT: sb zero, 0(a0) ; RV32-NEXT: sb zero, 1(a0) @@ -733,7 +733,7 @@ define void @bzero_4(ptr %a) nounwind { ; RV32-NEXT: sb zero, 3(a0) ; RV32-NEXT: ret ; -; RV64-LABEL: bzero_4: +; RV64-LABEL: memset_zero_4: ; RV64: # %bb.0: ; RV64-NEXT: sb zero, 0(a0) ; RV64-NEXT: sb zero, 1(a0) @@ -741,12 +741,12 @@ define void @bzero_4(ptr %a) nounwind { ; RV64-NEXT: sb zero, 3(a0) ; RV64-NEXT: ret ; -; RV32-FAST-LABEL: bzero_4: +; RV32-FAST-LABEL: memset_zero_4: ; RV32-FAST: # %bb.0: ; RV32-FAST-NEXT: sw zero, 0(a0) ; RV32-FAST-NEXT: ret ; -; RV64-FAST-LABEL: bzero_4: +; RV64-FAST-LABEL: memset_zero_4: ; RV64-FAST: # %bb.0: ; RV64-FAST-NEXT: sw zero, 0(a0) ; RV64-FAST-NEXT: ret @@ -754,8 +754,8 @@ define void @bzero_4(ptr %a) nounwind { ret void } -define void @bzero_8(ptr %a) nounwind { -; RV32-LABEL: bzero_8: +define void @memset_zero_8(ptr %a) nounwind { +; RV32-LABEL: memset_zero_8: ; RV32: # %bb.0: ; RV32-NEXT: sb zero, 4(a0) ; RV32-NEXT: sb zero, 5(a0) @@ -767,7 +767,7 @@ define void @bzero_8(ptr %a) nounwind { ; RV32-NEXT: sb zero, 3(a0) ; RV32-NEXT: ret ; -; RV64-LABEL: bzero_8: +; RV64-LABEL: memset_zero_8: ; RV64: # %bb.0: ; RV64-NEXT: sb zero, 4(a0) ; RV64-NEXT: sb zero, 5(a0) @@ -779,13 +779,13 @@ define void @bzero_8(ptr %a) nounwind { ; RV64-NEXT: sb zero, 3(a0) ; RV64-NEXT: ret ; -; RV32-FAST-LABEL: bzero_8: +; RV32-FAST-LABEL: memset_zero_8: ; RV32-FAST: # %bb.0: ; RV32-FAST-NEXT: sw zero, 0(a0) ; RV32-FAST-NEXT: sw zero, 4(a0) ; RV32-FAST-NEXT: ret ; -; RV64-FAST-LABEL: bzero_8: +; RV64-FAST-LABEL: memset_zero_8: ; RV64-FAST: # %bb.0: ; RV64-FAST-NEXT: sd zero, 0(a0) ; RV64-FAST-NEXT: ret @@ -793,8 +793,8 @@ define void @bzero_8(ptr %a) nounwind { ret void } -define void @bzero_16(ptr %a) nounwind { -; RV32-LABEL: bzero_16: +define void @memset_zero_16(ptr %a) nounwind { +; RV32-LABEL: memset_zero_16: ; RV32: # %bb.0: ; RV32-NEXT: sb zero, 12(a0) ; RV32-NEXT: sb zero, 13(a0) @@ -814,7 +814,7 @@ define void @bzero_16(ptr %a) nounwind { ; RV32-NEXT: sb zero, 3(a0) ; RV32-NEXT: ret ; -; RV64-LABEL: bzero_16: +; RV64-LABEL: memset_zero_16: ; RV64: # %bb.0: ; RV64-NEXT: sb zero, 12(a0) ; RV64-NEXT: sb zero, 13(a0) @@ -834,7 +834,7 @@ define void @bzero_16(ptr %a) nounwind { ; RV64-NEXT: sb zero, 3(a0) ; RV64-NEXT: ret ; -; RV32-FAST-LABEL: bzero_16: +; RV32-FAST-LABEL: memset_zero_16: ; RV32-FAST: # %bb.0: ; RV32-FAST-NEXT: sw zero, 0(a0) ; RV32-FAST-NEXT: sw zero, 4(a0) @@ -842,7 +842,7 @@ define void @bzero_16(ptr %a) nounwind { ; RV32-FAST-NEXT: sw zero, 12(a0) ; RV32-FAST-NEXT: ret ; -; RV64-FAST-LABEL: bzero_16: +; RV64-FAST-LABEL: memset_zero_16: ; RV64-FAST: # %bb.0: ; RV64-FAST-NEXT: sd zero, 0(a0) ; RV64-FAST-NEXT: sd zero, 8(a0) @@ -851,8 +851,8 @@ define void @bzero_16(ptr %a) nounwind { ret void } -define void @bzero_32(ptr %a) nounwind { -; RV32-LABEL: bzero_32: +define void @memset_zero_32(ptr %a) nounwind { +; RV32-LABEL: memset_zero_32: ; RV32: # %bb.0: ; RV32-NEXT: sb zero, 28(a0) ; RV32-NEXT: sb zero, 29(a0) @@ -888,7 +888,7 @@ define void @bzero_32(ptr %a) nounwind { ; RV32-NEXT: sb zero, 3(a0) ; RV32-NEXT: ret ; -; RV64-LABEL: bzero_32: +; RV64-LABEL: memset_zero_32: ; RV64: # %bb.0: ; RV64-NEXT: sb zero, 28(a0) ; RV64-NEXT: sb zero, 29(a0) @@ -924,7 +924,7 @@ define void @bzero_32(ptr %a) nounwind { ; RV64-NEXT: sb zero, 3(a0) ; RV64-NEXT: ret ; -; RV32-FAST-LABEL: bzero_32: +; RV32-FAST-LABEL: memset_zero_32: ; RV32-FAST: # %bb.0: ; RV32-FAST-NEXT: sw zero, 16(a0) ; RV32-FAST-NEXT: sw zero, 20(a0) @@ -936,7 +936,7 @@ define void @bzero_32(ptr %a) nounwind { ; RV32-FAST-NEXT: sw zero, 12(a0) ; RV32-FAST-NEXT: ret ; -; RV64-FAST-LABEL: bzero_32: +; RV64-FAST-LABEL: memset_zero_32: ; RV64-FAST: # %bb.0: ; RV64-FAST-NEXT: sd zero, 0(a0) ; RV64-FAST-NEXT: sd zero, 8(a0) @@ -947,8 +947,8 @@ define void @bzero_32(ptr %a) nounwind { ret void } -define void @bzero_64(ptr %a) nounwind { -; RV32-LABEL: bzero_64: +define void @memset_zero_64(ptr %a) nounwind { +; RV32-LABEL: memset_zero_64: ; RV32: # %bb.0: ; RV32-NEXT: sb zero, 60(a0) ; RV32-NEXT: sb zero, 61(a0) @@ -1016,7 +1016,7 @@ define void @bzero_64(ptr %a) nounwind { ; RV32-NEXT: sb zero, 3(a0) ; RV32-NEXT: ret ; -; RV64-LABEL: bzero_64: +; RV64-LABEL: memset_zero_64: ; RV64: # %bb.0: ; RV64-NEXT: sb zero, 60(a0) ; RV64-NEXT: sb zero, 61(a0) @@ -1084,7 +1084,7 @@ define void @bzero_64(ptr %a) nounwind { ; RV64-NEXT: sb zero, 3(a0) ; RV64-NEXT: ret ; -; RV32-FAST-LABEL: bzero_64: +; RV32-FAST-LABEL: memset_zero_64: ; RV32-FAST: # %bb.0: ; RV32-FAST-NEXT: sw zero, 48(a0) ; RV32-FAST-NEXT: sw zero, 52(a0) @@ -1104,7 +1104,7 @@ define void @bzero_64(ptr %a) nounwind { ; RV32-FAST-NEXT: sw zero, 12(a0) ; RV32-FAST-NEXT: ret ; -; RV64-FAST-LABEL: bzero_64: +; RV64-FAST-LABEL: memset_zero_64: ; RV64-FAST: # %bb.0: ; RV64-FAST-NEXT: sd zero, 32(a0) ; RV64-FAST-NEXT: sd zero, 40(a0) @@ -1121,13 +1121,13 @@ define void @bzero_64(ptr %a) nounwind { ; ///////////////////////////////////////////////////////////////////////////// -define void @aligned_bzero_2(ptr %a) nounwind { -; RV32-BOTH-LABEL: aligned_bzero_2: +define void @aligned_memset_zero_2(ptr %a) nounwind { +; RV32-BOTH-LABEL: aligned_memset_zero_2: ; RV32-BOTH: # %bb.0: ; RV32-BOTH-NEXT: sh zero, 0(a0) ; RV32-BOTH-NEXT: ret ; -; RV64-BOTH-LABEL: aligned_bzero_2: +; RV64-BOTH-LABEL: aligned_memset_zero_2: ; RV64-BOTH: # %bb.0: ; RV64-BOTH-NEXT: sh zero, 0(a0) ; RV64-BOTH-NEXT: ret @@ -1135,13 +1135,13 @@ define void @aligned_bzero_2(ptr %a) nounwind { ret void } -define void @aligned_bzero_4(ptr %a) nounwind { -; RV32-BOTH-LABEL: aligned_bzero_4: +define void @aligned_memset_zero_4(ptr %a) nounwind { +; RV32-BOTH-LABEL: aligned_memset_zero_4: ; RV32-BOTH: # %bb.0: ; RV32-BOTH-NEXT: sw zero, 0(a0) ; RV32-BOTH-NEXT: ret ; -; RV64-BOTH-LABEL: aligned_bzero_4: +; RV64-BOTH-LABEL: aligned_memset_zero_4: ; RV64-BOTH: # %bb.0: ; RV64-BOTH-NEXT: sw zero, 0(a0) ; RV64-BOTH-NEXT: ret @@ -1149,14 +1149,14 @@ define void @aligned_bzero_4(ptr %a) nounwind { ret void } -define void @aligned_bzero_8(ptr %a) nounwind { -; RV32-BOTH-LABEL: aligned_bzero_8: +define void @aligned_memset_zero_8(ptr %a) nounwind { +; RV32-BOTH-LABEL: aligned_memset_zero_8: ; RV32-BOTH: # %bb.0: ; RV32-BOTH-NEXT: sw zero, 0(a0) ; RV32-BOTH-NEXT: sw zero, 4(a0) ; RV32-BOTH-NEXT: ret ; -; RV64-BOTH-LABEL: aligned_bzero_8: +; RV64-BOTH-LABEL: aligned_memset_zero_8: ; RV64-BOTH: # %bb.0: ; RV64-BOTH-NEXT: sd zero, 0(a0) ; RV64-BOTH-NEXT: ret @@ -1165,8 +1165,8 @@ define void @aligned_bzero_8(ptr %a) nounwind { } -define void @aligned_bzero_16(ptr %a) nounwind { -; RV32-BOTH-LABEL: aligned_bzero_16: +define void @aligned_memset_zero_16(ptr %a) nounwind { +; RV32-BOTH-LABEL: aligned_memset_zero_16: ; RV32-BOTH: # %bb.0: ; RV32-BOTH-NEXT: sw zero, 0(a0) ; RV32-BOTH-NEXT: sw zero, 4(a0) @@ -1174,7 +1174,7 @@ define void @aligned_bzero_16(ptr %a) nounwind { ; RV32-BOTH-NEXT: sw zero, 12(a0) ; RV32-BOTH-NEXT: ret ; -; RV64-BOTH-LABEL: aligned_bzero_16: +; RV64-BOTH-LABEL: aligned_memset_zero_16: ; RV64-BOTH: # %bb.0: ; RV64-BOTH-NEXT: sd zero, 0(a0) ; RV64-BOTH-NEXT: sd zero, 8(a0) @@ -1183,8 +1183,8 @@ define void @aligned_bzero_16(ptr %a) nounwind { ret void } -define void @aligned_bzero_32(ptr %a) nounwind { -; RV32-BOTH-LABEL: aligned_bzero_32: +define void @aligned_memset_zero_32(ptr %a) nounwind { +; RV32-BOTH-LABEL: aligned_memset_zero_32: ; RV32-BOTH: # %bb.0: ; RV32-BOTH-NEXT: sw zero, 16(a0) ; RV32-BOTH-NEXT: sw zero, 20(a0) @@ -1196,7 +1196,7 @@ define void @aligned_bzero_32(ptr %a) nounwind { ; RV32-BOTH-NEXT: sw zero, 12(a0) ; RV32-BOTH-NEXT: ret ; -; RV64-BOTH-LABEL: aligned_bzero_32: +; RV64-BOTH-LABEL: aligned_memset_zero_32: ; RV64-BOTH: # %bb.0: ; RV64-BOTH-NEXT: sd zero, 0(a0) ; RV64-BOTH-NEXT: sd zero, 8(a0) @@ -1207,8 +1207,8 @@ define void @aligned_bzero_32(ptr %a) nounwind { ret void } -define void @aligned_bzero_64(ptr %a) nounwind { -; RV32-BOTH-LABEL: aligned_bzero_64: +define void @aligned_memset_zero_64(ptr %a) nounwind { +; RV32-BOTH-LABEL: aligned_memset_zero_64: ; RV32-BOTH: # %bb.0: ; RV32-BOTH-NEXT: sw zero, 48(a0) ; RV32-BOTH-NEXT: sw zero, 52(a0) @@ -1228,7 +1228,7 @@ define void @aligned_bzero_64(ptr %a) nounwind { ; RV32-BOTH-NEXT: sw zero, 12(a0) ; RV32-BOTH-NEXT: ret ; -; RV64-BOTH-LABEL: aligned_bzero_64: +; RV64-BOTH-LABEL: aligned_memset_zero_64: ; RV64-BOTH: # %bb.0: ; RV64-BOTH-NEXT: sd zero, 32(a0) ; RV64-BOTH-NEXT: sd zero, 40(a0) @@ -1247,28 +1247,28 @@ define void @aligned_bzero_64(ptr %a) nounwind { ; ///////////////////////////////////////////////////////////////////////////// ; Usual overlap tricks -define void @aligned_bzero_7(ptr %a) nounwind { -; RV32-LABEL: aligned_bzero_7: +define void @aligned_memset_zero_7(ptr %a) nounwind { +; RV32-LABEL: aligned_memset_zero_7: ; RV32: # %bb.0: ; RV32-NEXT: sw zero, 0(a0) ; RV32-NEXT: sh zero, 4(a0) ; RV32-NEXT: sb zero, 6(a0) ; RV32-NEXT: ret ; -; RV64-LABEL: aligned_bzero_7: +; RV64-LABEL: aligned_memset_zero_7: ; RV64: # %bb.0: ; RV64-NEXT: sw zero, 0(a0) ; RV64-NEXT: sh zero, 4(a0) ; RV64-NEXT: sb zero, 6(a0) ; RV64-NEXT: ret ; -; RV32-FAST-LABEL: aligned_bzero_7: +; RV32-FAST-LABEL: aligned_memset_zero_7: ; RV32-FAST: # %bb.0: ; RV32-FAST-NEXT: sw zero, 3(a0) ; RV32-FAST-NEXT: sw zero, 0(a0) ; RV32-FAST-NEXT: ret ; -; RV64-FAST-LABEL: aligned_bzero_7: +; RV64-FAST-LABEL: aligned_memset_zero_7: ; RV64-FAST: # %bb.0: ; RV64-FAST-NEXT: sw zero, 3(a0) ; RV64-FAST-NEXT: sw zero, 0(a0) @@ -1277,8 +1277,8 @@ define void @aligned_bzero_7(ptr %a) nounwind { ret void } -define void @aligned_bzero_15(ptr %a) nounwind { -; RV32-LABEL: aligned_bzero_15: +define void @aligned_memset_zero_15(ptr %a) nounwind { +; RV32-LABEL: aligned_memset_zero_15: ; RV32: # %bb.0: ; RV32-NEXT: sb zero, 14(a0) ; RV32-NEXT: sw zero, 0(a0) @@ -1287,7 +1287,7 @@ define void @aligned_bzero_15(ptr %a) nounwind { ; RV32-NEXT: sh zero, 12(a0) ; RV32-NEXT: ret ; -; RV64-LABEL: aligned_bzero_15: +; RV64-LABEL: aligned_memset_zero_15: ; RV64: # %bb.0: ; RV64-NEXT: sd zero, 0(a0) ; RV64-NEXT: sw zero, 8(a0) @@ -1295,7 +1295,7 @@ define void @aligned_bzero_15(ptr %a) nounwind { ; RV64-NEXT: sb zero, 14(a0) ; RV64-NEXT: ret ; -; RV32-FAST-LABEL: aligned_bzero_15: +; RV32-FAST-LABEL: aligned_memset_zero_15: ; RV32-FAST: # %bb.0: ; RV32-FAST-NEXT: sw zero, 11(a0) ; RV32-FAST-NEXT: sw zero, 0(a0) @@ -1303,7 +1303,7 @@ define void @aligned_bzero_15(ptr %a) nounwind { ; RV32-FAST-NEXT: sw zero, 8(a0) ; RV32-FAST-NEXT: ret ; -; RV64-FAST-LABEL: aligned_bzero_15: +; RV64-FAST-LABEL: aligned_memset_zero_15: ; RV64-FAST: # %bb.0: ; RV64-FAST-NEXT: sd zero, 7(a0) ; RV64-FAST-NEXT: sd zero, 0(a0) diff --git a/llvm/test/CodeGen/RISCV/pr148084.ll b/llvm/test/CodeGen/RISCV/pr148084.ll new file mode 100644 index 0000000..9fa26c7 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/pr148084.ll @@ -0,0 +1,279 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc < %s | FileCheck %s + +source_filename = "external/libaom/av1/encoder/tx_search.c" +target datalayout = "e-m:e-p:64:64-i64:64-i128:128-n32:64-S128" +target triple = "riscv64-unknown-linux-android10000" + +define fastcc void @search_tx_type() #0 { +; CHECK-LABEL: search_tx_type: +; CHECK: # %bb.0: # %._crit_edge.i +; CHECK-NEXT: # %bb.1: # %bb +; CHECK-NEXT: lbu a1, 0(zero) +; CHECK-NEXT: lw a0, 0(zero) +; CHECK-NEXT: lh a2, 0(zero) +; CHECK-NEXT: seqz a1, a1 +; CHECK-NEXT: srai a3, a0, 63 +; CHECK-NEXT: addi a1, a1, -1 +; CHECK-NEXT: and a1, a1, a2 +; CHECK-NEXT: andi a2, a1, 1 +; CHECK-NEXT: addi a2, a2, -1 +; CHECK-NEXT: or a3, a3, a0 +; CHECK-NEXT: or a2, a2, a3 +; CHECK-NEXT: bgez a2, .LBB0_3 +; CHECK-NEXT: # %bb.2: +; CHECK-NEXT: bexti a3, a1, 1 +; CHECK-NEXT: addi a3, a3, -1 +; CHECK-NEXT: and a2, a2, a3 +; CHECK-NEXT: .LBB0_3: # %bb +; CHECK-NEXT: andi a4, a1, 4 +; CHECK-NEXT: mv a3, a2 +; CHECK-NEXT: beqz a4, .LBB0_5 +; CHECK-NEXT: # %bb.4: # %bb +; CHECK-NEXT: mv a3, a0 +; CHECK-NEXT: .LBB0_5: # %bb +; CHECK-NEXT: blt a2, a0, .LBB0_7 +; CHECK-NEXT: # %bb.6: # %bb +; CHECK-NEXT: mv a3, a2 +; CHECK-NEXT: .LBB0_7: # %bb +; CHECK-NEXT: andi a5, a1, 8 +; CHECK-NEXT: sext.w a4, a3 +; CHECK-NEXT: mv a2, a3 +; CHECK-NEXT: beqz a5, .LBB0_9 +; CHECK-NEXT: # %bb.8: # %bb +; CHECK-NEXT: mv a2, a0 +; CHECK-NEXT: .LBB0_9: # %bb +; CHECK-NEXT: blt a4, a0, .LBB0_11 +; CHECK-NEXT: # %bb.10: # %bb +; CHECK-NEXT: mv a2, a3 +; CHECK-NEXT: .LBB0_11: # %bb +; CHECK-NEXT: andi a5, a1, 16 +; CHECK-NEXT: sext.w a4, a2 +; CHECK-NEXT: mv a3, a2 +; CHECK-NEXT: beqz a5, .LBB0_13 +; CHECK-NEXT: # %bb.12: # %bb +; CHECK-NEXT: mv a3, a0 +; CHECK-NEXT: .LBB0_13: # %bb +; CHECK-NEXT: blt a4, a0, .LBB0_15 +; CHECK-NEXT: # %bb.14: # %bb +; CHECK-NEXT: mv a3, a2 +; CHECK-NEXT: .LBB0_15: # %bb +; CHECK-NEXT: andi a5, a1, 32 +; CHECK-NEXT: sext.w a4, a3 +; CHECK-NEXT: mv a2, a3 +; CHECK-NEXT: beqz a5, .LBB0_17 +; CHECK-NEXT: # %bb.16: # %bb +; CHECK-NEXT: mv a2, a0 +; CHECK-NEXT: .LBB0_17: # %bb +; CHECK-NEXT: blt a4, a0, .LBB0_19 +; CHECK-NEXT: # %bb.18: # %bb +; CHECK-NEXT: mv a2, a3 +; CHECK-NEXT: .LBB0_19: # %bb +; CHECK-NEXT: andi a5, a1, 64 +; CHECK-NEXT: sext.w a4, a2 +; CHECK-NEXT: mv a3, a2 +; CHECK-NEXT: beqz a5, .LBB0_21 +; CHECK-NEXT: # %bb.20: # %bb +; CHECK-NEXT: mv a3, a0 +; CHECK-NEXT: .LBB0_21: # %bb +; CHECK-NEXT: blt a4, a0, .LBB0_23 +; CHECK-NEXT: # %bb.22: # %bb +; CHECK-NEXT: mv a3, a2 +; CHECK-NEXT: .LBB0_23: # %bb +; CHECK-NEXT: andi a5, a1, 128 +; CHECK-NEXT: sext.w a4, a3 +; CHECK-NEXT: mv a2, a3 +; CHECK-NEXT: beqz a5, .LBB0_25 +; CHECK-NEXT: # %bb.24: # %bb +; CHECK-NEXT: mv a2, a0 +; CHECK-NEXT: .LBB0_25: # %bb +; CHECK-NEXT: blt a4, a0, .LBB0_27 +; CHECK-NEXT: # %bb.26: # %bb +; CHECK-NEXT: mv a2, a3 +; CHECK-NEXT: .LBB0_27: # %bb +; CHECK-NEXT: andi a5, a1, 256 +; CHECK-NEXT: sext.w a4, a2 +; CHECK-NEXT: mv a3, a2 +; CHECK-NEXT: beqz a5, .LBB0_29 +; CHECK-NEXT: # %bb.28: # %bb +; CHECK-NEXT: mv a3, a0 +; CHECK-NEXT: .LBB0_29: # %bb +; CHECK-NEXT: blt a4, a0, .LBB0_31 +; CHECK-NEXT: # %bb.30: # %bb +; CHECK-NEXT: mv a3, a2 +; CHECK-NEXT: .LBB0_31: # %bb +; CHECK-NEXT: andi a5, a1, 512 +; CHECK-NEXT: sext.w a4, a3 +; CHECK-NEXT: mv a2, a3 +; CHECK-NEXT: beqz a5, .LBB0_33 +; CHECK-NEXT: # %bb.32: # %bb +; CHECK-NEXT: mv a2, a0 +; CHECK-NEXT: .LBB0_33: # %bb +; CHECK-NEXT: blt a4, a0, .LBB0_35 +; CHECK-NEXT: # %bb.34: # %bb +; CHECK-NEXT: mv a2, a3 +; CHECK-NEXT: .LBB0_35: # %bb +; CHECK-NEXT: andi a5, a1, 1024 +; CHECK-NEXT: sext.w a4, a2 +; CHECK-NEXT: mv a3, a2 +; CHECK-NEXT: beqz a5, .LBB0_37 +; CHECK-NEXT: # %bb.36: # %bb +; CHECK-NEXT: mv a3, a0 +; CHECK-NEXT: .LBB0_37: # %bb +; CHECK-NEXT: blt a4, a0, .LBB0_39 +; CHECK-NEXT: # %bb.38: # %bb +; CHECK-NEXT: mv a3, a2 +; CHECK-NEXT: .LBB0_39: # %bb +; CHECK-NEXT: slli a5, a1, 52 +; CHECK-NEXT: sext.w a4, a3 +; CHECK-NEXT: mv a2, a3 +; CHECK-NEXT: bgez a5, .LBB0_41 +; CHECK-NEXT: # %bb.40: # %bb +; CHECK-NEXT: mv a2, a0 +; CHECK-NEXT: .LBB0_41: # %bb +; CHECK-NEXT: blt a4, a0, .LBB0_43 +; CHECK-NEXT: # %bb.42: # %bb +; CHECK-NEXT: mv a2, a3 +; CHECK-NEXT: .LBB0_43: # %bb +; CHECK-NEXT: slli a4, a1, 51 +; CHECK-NEXT: sext.w a3, a2 +; CHECK-NEXT: mv a1, a2 +; CHECK-NEXT: bltz a4, .LBB0_49 +; CHECK-NEXT: # %bb.44: # %bb +; CHECK-NEXT: bge a3, a0, .LBB0_50 +; CHECK-NEXT: .LBB0_45: # %bb +; CHECK-NEXT: sext.w a2, a1 +; CHECK-NEXT: blt a2, a0, .LBB0_47 +; CHECK-NEXT: .LBB0_46: # %bb +; CHECK-NEXT: mv a0, a1 +; CHECK-NEXT: .LBB0_47: # %bb +; CHECK-NEXT: sext.w a0, a0 +; CHECK-NEXT: # %bb.48: # %get_tx_mask.exit +; CHECK-NEXT: ret +; CHECK-NEXT: .LBB0_49: # %bb +; CHECK-NEXT: mv a1, a0 +; CHECK-NEXT: blt a3, a0, .LBB0_45 +; CHECK-NEXT: .LBB0_50: # %bb +; CHECK-NEXT: mv a1, a2 +; CHECK-NEXT: sext.w a2, a2 +; CHECK-NEXT: bge a2, a0, .LBB0_46 +; CHECK-NEXT: j .LBB0_47 +._crit_edge.i: + %.in196.i = load i16, ptr null, align 2 + %i2 = load i16, ptr null, align 2 + %i3 = and i16 %i2, %.in196.i + %i9 = trunc nuw i8 0 to i1 + br i1 %i9, label %get_tx_mask.exit, label %bb + +bb: ; preds = %._crit_edge.i + %i13 = load i8, ptr null, align 1 + %i14 = icmp eq i8 %i13, 0 + %spec.select211.i = select i1 %i14, i16 0, i16 %i3 + %i19 = load i32, ptr null, align 4 + %i20 = zext i16 %spec.select211.i to i32 + %i21 = load i32, ptr null, align 4 + %i22 = icmp sgt i32 %i21, -1 + %i23 = and i32 %i20, 1 + %.not203.i = icmp eq i32 %i23, 0 + %spec.select212.i = select i1 %.not203.i, i32 -1, i32 %i21 + %.1174.i = select i1 %i22, i32 %spec.select212.i, i32 -1 + %i28 = icmp sgt i32 0, %.1174.i + %i29 = and i32 %i20, 2 + %.not203.1.not.i = icmp eq i32 %i29, 0 + %spec.select212.1.i = select i1 %.not203.1.not.i, i32 %.1174.i, i32 0 + %.1174.1.i = select i1 %i28, i32 %spec.select212.1.i, i32 %.1174.i + %i30 = load i32, ptr null, align 4 + %i31 = icmp sgt i32 %i30, %.1174.1.i + %i32 = and i32 %i20, 4 + %.not203.2.i = icmp eq i32 %i32, 0 + %spec.select212.2.i = select i1 %.not203.2.i, i32 %.1174.1.i, i32 %i30 + %.1174.2.i = select i1 %i31, i32 %spec.select212.2.i, i32 %.1174.1.i + %i36 = load i32, ptr null, align 4 + %i37 = icmp sgt i32 %i36, %.1174.2.i + %i38 = and i32 %i20, 8 + %.not203.3.i = icmp eq i32 %i38, 0 + %spec.select212.3.i = select i1 %.not203.3.i, i32 %.1174.2.i, i32 %i36 + %.1174.3.i = select i1 %i37, i32 %spec.select212.3.i, i32 %.1174.2.i + %i42 = load i32, ptr null, align 4 + %i43 = icmp sgt i32 %i42, %.1174.3.i + %i44 = and i32 %i20, 16 + %.not203.4.i = icmp eq i32 %i44, 0 + %spec.select212.4.i = select i1 %.not203.4.i, i32 %.1174.3.i, i32 %i42 + %.1174.4.i = select i1 %i43, i32 %spec.select212.4.i, i32 %.1174.3.i + %i48 = load i32, ptr null, align 4 + %i49 = icmp sgt i32 %i48, %.1174.4.i + %i50 = and i32 %i20, 32 + %.not203.5.i = icmp eq i32 %i50, 0 + %spec.select212.5.i = select i1 %.not203.5.i, i32 %.1174.4.i, i32 %i48 + %.1174.5.i = select i1 %i49, i32 %spec.select212.5.i, i32 %.1174.4.i + %i51 = load i32, ptr null, align 4 + %i52 = icmp sgt i32 %i51, %.1174.5.i + %i53 = and i32 %i20, 64 + %.not203.6.i = icmp eq i32 %i53, 0 + %spec.select212.6.i = select i1 %.not203.6.i, i32 %.1174.5.i, i32 %i51 + %.1174.6.i = select i1 %i52, i32 %spec.select212.6.i, i32 %.1174.5.i + %i56 = load i32, ptr null, align 4 + %i57 = icmp sgt i32 %i56, %.1174.6.i + %i58 = and i32 %i20, 128 + %.not203.7.i = icmp eq i32 %i58, 0 + %spec.select212.7.i = select i1 %.not203.7.i, i32 %.1174.6.i, i32 %i56 + %.1174.7.i = select i1 %i57, i32 %spec.select212.7.i, i32 %.1174.6.i + %i60 = load i32, ptr null, align 4 + %i61 = icmp sgt i32 %i60, %.1174.7.i + %i62 = and i32 %i20, 256 + %.not203.8.i = icmp eq i32 %i62, 0 + %spec.select212.8.i = select i1 %.not203.8.i, i32 %.1174.7.i, i32 %i60 + %.1174.8.i = select i1 %i61, i32 %spec.select212.8.i, i32 %.1174.7.i + %i63 = load i32, ptr null, align 4 + %i64 = icmp sgt i32 %i63, %.1174.8.i + %i65 = and i32 %i20, 512 + %.not203.9.i = icmp eq i32 %i65, 0 + %spec.select212.9.i = select i1 %.not203.9.i, i32 %.1174.8.i, i32 %i63 + %.1174.9.i = select i1 %i64, i32 %spec.select212.9.i, i32 %.1174.8.i + %i67 = load i32, ptr null, align 4 + %i68 = icmp sgt i32 %i67, %.1174.9.i + %i69 = and i32 %i20, 1024 + %.not203.10.i = icmp eq i32 %i69, 0 + %spec.select212.10.i = select i1 %.not203.10.i, i32 %.1174.9.i, i32 %i67 + %.1174.10.i = select i1 %i68, i32 %spec.select212.10.i, i32 %.1174.9.i + %i70 = load i32, ptr null, align 4 + %i71 = icmp sgt i32 %i70, %.1174.10.i + %i72 = and i32 %i20, 2048 + %.not203.11.i = icmp eq i32 %i72, 0 + %spec.select212.11.i = select i1 %.not203.11.i, i32 %.1174.10.i, i32 %i70 + %.1174.11.i = select i1 %i71, i32 %spec.select212.11.i, i32 %.1174.10.i + %i75 = load i32, ptr null, align 4 + %i76 = icmp sgt i32 %i75, %.1174.11.i + %i77 = and i32 %i20, 4096 + %.not203.12.i = icmp eq i32 %i77, 0 + %spec.select212.12.i = select i1 %.not203.12.i, i32 %.1174.11.i, i32 %i75 + %.1174.12.i = select i1 %i76, i32 %spec.select212.12.i, i32 %.1174.11.i + %i80 = load i32, ptr null, align 4 + %i81 = icmp sgt i32 %i80, %.1174.12.i + %spec.select212.13.i = select i1 false, i32 %.1174.12.i, i32 %i80 + %.1174.13.i = select i1 %i81, i32 %spec.select212.13.i, i32 %.1174.12.i + %.1172.13.i = select i1 %i81, i32 13, i32 0 + %i84 = icmp sgt i32 0, %.1174.13.i + %.1172.14.i = select i1 %i84, i32 14, i32 %.1172.13.i + %i88 = icmp slt i32 0, %i19 + %i89 = select i1 %i88, i16 -32768, i16 0 + %i90 = zext i16 %i89 to i32 + %i91 = shl nuw nsw i32 1, %.1172.14.i + %i92 = and i32 %i91, %i90 + %.not200.i = icmp eq i32 %i92, 0 + %i93 = trunc nuw i32 %i91 to i16 + %i94 = xor i16 %i93, -1 + %i95 = select i1 %.not200.i, i16 -1, i16 %i94 + %.2177.i = and i16 %i95, %i89 + %i96 = xor i16 %.2177.i, -1 + %i97 = and i16 %spec.select211.i, %i96 + br label %get_tx_mask.exit + +get_tx_mask.exit: ; preds = %._crit_edge.i, %bb + %.1261.i = phi i16 [ %i97, %bb ], [ 0, %._crit_edge.i ] + %i99 = icmp eq i16 %.1261.i, 0 + %.2262.i = select i1 %i99, i16 0, i16 %.1261.i + ret void +} + +attributes #0 = { noimplicitfloat nounwind sspstrong uwtable vscale_range(2,1024) "frame-pointer"="non-leaf" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="generic-rv64" "target-features"="+64bit,+a,+b,+c,+d,+f,+m,+relax,+unaligned-scalar-mem,+unaligned-vector-mem,+v,+zaamo,+zalrsc,+zba,+zbb,+zbs,+zca,+zcd,+zicsr,+zifencei,+zmmul,+zve32f,+zve32x,+zve64d,+zve64f,+zve64x,+zvl128b,+zvl32b,+zvl64b,-e,-experimental-p,-experimental-smctr,-experimental-ssctr,-experimental-svukte,-experimental-xqccmp,-experimental-xqcia,-experimental-xqciac,-experimental-xqcibi,-experimental-xqcibm,-experimental-xqcicli,-experimental-xqcicm,-experimental-xqcics,-experimental-xqcicsr,-experimental-xqciint,-experimental-xqciio,-experimental-xqcilb,-experimental-xqcili,-experimental-xqcilia,-experimental-xqcilo,-experimental-xqcilsm,-experimental-xqcisim,-experimental-xqcisls,-experimental-xqcisync,-experimental-xrivosvisni,-experimental-xrivosvizip,-experimental-xsfmclic,-experimental-xsfsclic,-experimental-zalasr,-experimental-zicfilp,-experimental-zicfiss,-experimental-zvbc32e,-experimental-zvkgs,-experimental-zvqdotq,-h,-q,-sdext,-sdtrig,-sha,-shcounterenw,-shgatpa,-shlcofideleg,-shtvala,-shvsatpa,-shvstvala,-shvstvecd,-smaia,-smcdeleg,-smcntrpmf,-smcsrind,-smdbltrp,-smepmp,-smmpm,-smnpm,-smrnmi,-smstateen,-ssaia,-ssccfg,-ssccptr,-sscofpmf,-sscounterenw,-sscsrind,-ssdbltrp,-ssnpm,-sspm,-ssqosid,-ssstateen,-ssstrict,-sstc,-sstvala,-sstvecd,-ssu64xl,-supm,-svade,-svadu,-svbare,-svinval,-svnapot,-svpbmt,-svvptc,-xandesperf,-xandesvbfhcvt,-xandesvdot,-xandesvpackfph,-xcvalu,-xcvbi,-xcvbitmanip,-xcvelw,-xcvmac,-xcvmem,-xcvsimd,-xmipscmov,-xmipslsp,-xsfcease,-xsfmm128t,-xsfmm16t,-xsfmm32a16f,-xsfmm32a32f,-xsfmm32a8f,-xsfmm32a8i,-xsfmm32t,-xsfmm64a64f,-xsfmm64t,-xsfmmbase,-xsfvcp,-xsfvfnrclipxfqf,-xsfvfwmaccqqq,-xsfvqmaccdod,-xsfvqmaccqoq,-xsifivecdiscarddlone,-xsifivecflushdlone,-xtheadba,-xtheadbb,-xtheadbs,-xtheadcmo,-xtheadcondmov,-xtheadfmemidx,-xtheadmac,-xtheadmemidx,-xtheadmempair,-xtheadsync,-xtheadvdot,-xventanacondops,-xwchc,-za128rs,-za64rs,-zabha,-zacas,-zama16b,-zawrs,-zbc,-zbkb,-zbkc,-zbkx,-zcb,-zce,-zcf,-zclsd,-zcmop,-zcmp,-zcmt,-zdinx,-zfa,-zfbfmin,-zfh,-zfhmin,-zfinx,-zhinx,-zhinxmin,-zic64b,-zicbom,-zicbop,-zicboz,-ziccamoa,-ziccamoc,-ziccif,-zicclsm,-ziccrse,-zicntr,-zicond,-zihintntl,-zihintpause,-zihpm,-zilsd,-zimop,-zk,-zkn,-zknd,-zkne,-zknh,-zkr,-zks,-zksed,-zksh,-zkt,-ztso,-zvbb,-zvbc,-zvfbfmin,-zvfbfwma,-zvfh,-zvfhmin,-zvkb,-zvkg,-zvkn,-zvknc,-zvkned,-zvkng,-zvknha,-zvknhb,-zvks,-zvksc,-zvksed,-zvksg,-zvksh,-zvkt,-zvl1024b,-zvl16384b,-zvl2048b,-zvl256b,-zvl32768b,-zvl4096b,-zvl512b,-zvl65536b,-zvl8192b" } diff --git a/llvm/test/CodeGen/RISCV/rvv/callee-saved-regs.ll b/llvm/test/CodeGen/RISCV/rvv/callee-saved-regs.ll index 96c349d..d166a6e 100644 --- a/llvm/test/CodeGen/RISCV/rvv/callee-saved-regs.ll +++ b/llvm/test/CodeGen/RISCV/rvv/callee-saved-regs.ll @@ -92,6 +92,150 @@ entry: ret <vscale x 1 x i32> %va } +define riscv_vector_cc <vscale x 1 x i32> @test_vector_callee2(<vscale x 1 x i32> %va) nounwind { +; SPILL-O2-LABEL: test_vector_callee2: +; SPILL-O2: # %bb.0: # %entry +; SPILL-O2-NEXT: addi sp, sp, -16 +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: li a1, 12 +; SPILL-O2-NEXT: mul a0, a0, a1 +; SPILL-O2-NEXT: sub sp, sp, a0 +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: li a1, 11 +; SPILL-O2-NEXT: mul a0, a0, a1 +; SPILL-O2-NEXT: add a0, sp, a0 +; SPILL-O2-NEXT: addi a0, a0, 16 +; SPILL-O2-NEXT: vs1r.v v1, (a0) # vscale x 8-byte Folded Spill +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: li a1, 10 +; SPILL-O2-NEXT: mul a0, a0, a1 +; SPILL-O2-NEXT: add a0, sp, a0 +; SPILL-O2-NEXT: addi a0, a0, 16 +; SPILL-O2-NEXT: vs1r.v v3, (a0) # vscale x 8-byte Folded Spill +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: slli a1, a0, 3 +; SPILL-O2-NEXT: add a0, a1, a0 +; SPILL-O2-NEXT: add a0, sp, a0 +; SPILL-O2-NEXT: addi a0, a0, 16 +; SPILL-O2-NEXT: vs1r.v v5, (a0) # vscale x 8-byte Folded Spill +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: slli a0, a0, 3 +; SPILL-O2-NEXT: add a0, sp, a0 +; SPILL-O2-NEXT: addi a0, a0, 16 +; SPILL-O2-NEXT: vs1r.v v7, (a0) # vscale x 8-byte Folded Spill +; SPILL-O2-NEXT: addi a0, sp, 16 +; SPILL-O2-NEXT: vs8r.v v24, (a0) # vscale x 64-byte Folded Spill +; SPILL-O2-NEXT: #APP +; SPILL-O2-NEXT: #NO_APP +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: li a1, 11 +; SPILL-O2-NEXT: mul a0, a0, a1 +; SPILL-O2-NEXT: add a0, sp, a0 +; SPILL-O2-NEXT: addi a0, a0, 16 +; SPILL-O2-NEXT: vl1r.v v1, (a0) # vscale x 8-byte Folded Reload +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: li a1, 10 +; SPILL-O2-NEXT: mul a0, a0, a1 +; SPILL-O2-NEXT: add a0, sp, a0 +; SPILL-O2-NEXT: addi a0, a0, 16 +; SPILL-O2-NEXT: vl1r.v v3, (a0) # vscale x 8-byte Folded Reload +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: slli a1, a0, 3 +; SPILL-O2-NEXT: add a0, a1, a0 +; SPILL-O2-NEXT: add a0, sp, a0 +; SPILL-O2-NEXT: addi a0, a0, 16 +; SPILL-O2-NEXT: vl1r.v v5, (a0) # vscale x 8-byte Folded Reload +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: slli a0, a0, 3 +; SPILL-O2-NEXT: add a0, sp, a0 +; SPILL-O2-NEXT: addi a0, a0, 16 +; SPILL-O2-NEXT: vl1r.v v7, (a0) # vscale x 8-byte Folded Reload +; SPILL-O2-NEXT: addi a0, sp, 16 +; SPILL-O2-NEXT: vl8r.v v24, (a0) # vscale x 64-byte Folded Reload +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: li a1, 12 +; SPILL-O2-NEXT: mul a0, a0, a1 +; SPILL-O2-NEXT: add sp, sp, a0 +; SPILL-O2-NEXT: addi sp, sp, 16 +; SPILL-O2-NEXT: ret +entry: + call void asm sideeffect "", + "~{v1},~{v3},~{v5},~{v7},~{v24m2},~{v25},~{v26},~{v27},~{v28},~{v29},~{v30},~{v31}"() + + ret <vscale x 1 x i32> %va +} + +define riscv_vector_cc <vscale x 1 x i32> @test_vector_callee3(<vscale x 1 x i32> %va) nounwind { +; SPILL-O2-LABEL: test_vector_callee3: +; SPILL-O2: # %bb.0: # %entry +; SPILL-O2-NEXT: addi sp, sp, -16 +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: li a1, 10 +; SPILL-O2-NEXT: mul a0, a0, a1 +; SPILL-O2-NEXT: sub sp, sp, a0 +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: slli a1, a0, 3 +; SPILL-O2-NEXT: add a0, a1, a0 +; SPILL-O2-NEXT: add a0, sp, a0 +; SPILL-O2-NEXT: addi a0, a0, 16 +; SPILL-O2-NEXT: vs1r.v v1, (a0) # vscale x 8-byte Folded Spill +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: slli a0, a0, 3 +; SPILL-O2-NEXT: add a0, sp, a0 +; SPILL-O2-NEXT: addi a0, a0, 16 +; SPILL-O2-NEXT: vs1r.v v24, (a0) # vscale x 8-byte Folded Spill +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: li a1, 6 +; SPILL-O2-NEXT: mul a0, a0, a1 +; SPILL-O2-NEXT: add a0, sp, a0 +; SPILL-O2-NEXT: addi a0, a0, 16 +; SPILL-O2-NEXT: vs2r.v v2, (a0) # vscale x 16-byte Folded Spill +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: slli a0, a0, 2 +; SPILL-O2-NEXT: add a0, sp, a0 +; SPILL-O2-NEXT: addi a0, a0, 16 +; SPILL-O2-NEXT: vs2r.v v26, (a0) # vscale x 16-byte Folded Spill +; SPILL-O2-NEXT: addi a0, sp, 16 +; SPILL-O2-NEXT: vs4r.v v28, (a0) # vscale x 32-byte Folded Spill +; SPILL-O2-NEXT: #APP +; SPILL-O2-NEXT: #NO_APP +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: slli a1, a0, 3 +; SPILL-O2-NEXT: add a0, a1, a0 +; SPILL-O2-NEXT: add a0, sp, a0 +; SPILL-O2-NEXT: addi a0, a0, 16 +; SPILL-O2-NEXT: vl1r.v v1, (a0) # vscale x 8-byte Folded Reload +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: slli a0, a0, 3 +; SPILL-O2-NEXT: add a0, sp, a0 +; SPILL-O2-NEXT: addi a0, a0, 16 +; SPILL-O2-NEXT: vl1r.v v24, (a0) # vscale x 8-byte Folded Reload +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: li a1, 6 +; SPILL-O2-NEXT: mul a0, a0, a1 +; SPILL-O2-NEXT: add a0, sp, a0 +; SPILL-O2-NEXT: addi a0, a0, 16 +; SPILL-O2-NEXT: vl2r.v v2, (a0) # vscale x 16-byte Folded Reload +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: slli a0, a0, 2 +; SPILL-O2-NEXT: add a0, sp, a0 +; SPILL-O2-NEXT: addi a0, a0, 16 +; SPILL-O2-NEXT: vl2r.v v26, (a0) # vscale x 16-byte Folded Reload +; SPILL-O2-NEXT: addi a0, sp, 16 +; SPILL-O2-NEXT: vl4r.v v28, (a0) # vscale x 32-byte Folded Reload +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: li a1, 10 +; SPILL-O2-NEXT: mul a0, a0, a1 +; SPILL-O2-NEXT: add sp, sp, a0 +; SPILL-O2-NEXT: addi sp, sp, 16 +; SPILL-O2-NEXT: ret +entry: + call void asm sideeffect "", + "~{v1},~{v2},~{v3},~{v24},~{v26m2},~{v28m2},~{v29},~{v30},~{v31}"() + + ret <vscale x 1 x i32> %va +} + ; Make sure the local stack allocation pass doesn't count vector registers. The ; sizes are chosen to be on the edge of what RISCVRegister::needsFrameBaseReg ; considers to need a virtual base register. diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-deinterleave-load.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-deinterleave-load.ll index 807651c..dc80225 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-deinterleave-load.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-deinterleave-load.ll @@ -261,7 +261,7 @@ define { <8 x i8>, <8 x i8>, <8 x i8> } @vector_deinterleave_load_factor3(ptr %p ; CHECK-LABEL: vector_deinterleave_load_factor3: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma -; CHECK-NEXT: vlseg3e8.v v6, (a0) +; CHECK-NEXT: vlseg3e8.v v8, (a0) ; CHECK-NEXT: ret %vec = load <24 x i8>, ptr %p %d0 = call {<8 x i8>, <8 x i8>, <8 x i8>} @llvm.vector.deinterleave3(<24 x i8> %vec) @@ -269,8 +269,8 @@ define { <8 x i8>, <8 x i8>, <8 x i8> } @vector_deinterleave_load_factor3(ptr %p %t1 = extractvalue {<8 x i8>, <8 x i8>, <8 x i8>} %d0, 1 %t2 = extractvalue {<8 x i8>, <8 x i8>, <8 x i8>} %d0, 2 %res0 = insertvalue { <8 x i8>, <8 x i8>, <8 x i8> } poison, <8 x i8> %t0, 0 - %res1 = insertvalue { <8 x i8>, <8 x i8>, <8 x i8> } %res0, <8 x i8> %t1, 0 - %res2 = insertvalue { <8 x i8>, <8 x i8>, <8 x i8> } %res1, <8 x i8> %t2, 0 + %res1 = insertvalue { <8 x i8>, <8 x i8>, <8 x i8> } %res0, <8 x i8> %t1, 1 + %res2 = insertvalue { <8 x i8>, <8 x i8>, <8 x i8> } %res1, <8 x i8> %t2, 2 ret { <8 x i8>, <8 x i8>, <8 x i8> } %res2 } diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-insert-subvector.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-insert-subvector.ll index 5747bbb..bd37443 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-insert-subvector.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-insert-subvector.ll @@ -554,9 +554,8 @@ define <vscale x 2 x i1> @insert_nxv2i1_v4i1_0(<vscale x 2 x i1> %v, ptr %svp) { ; VLA-NEXT: vsetivli zero, 4, e8, mf4, ta, ma ; VLA-NEXT: vmv.v.i v10, 0 ; VLA-NEXT: vmv1r.v v0, v8 -; VLA-NEXT: vmerge.vim v8, v10, 1, v0 ; VLA-NEXT: vsetvli zero, zero, e8, mf4, tu, ma -; VLA-NEXT: vmv.v.v v9, v8 +; VLA-NEXT: vmerge.vim v9, v10, 1, v0 ; VLA-NEXT: vsetvli a0, zero, e8, mf4, ta, ma ; VLA-NEXT: vmsne.vi v0, v9, 0 ; VLA-NEXT: ret @@ -568,9 +567,8 @@ define <vscale x 2 x i1> @insert_nxv2i1_v4i1_0(<vscale x 2 x i1> %v, ptr %svp) { ; VLS-NEXT: vmv.v.i v9, 0 ; VLS-NEXT: vmerge.vim v10, v9, 1, v0 ; VLS-NEXT: vmv1r.v v0, v8 -; VLS-NEXT: vmerge.vim v8, v9, 1, v0 ; VLS-NEXT: vsetvli zero, zero, e8, mf4, tu, ma -; VLS-NEXT: vmv.v.v v10, v8 +; VLS-NEXT: vmerge.vim v10, v9, 1, v0 ; VLS-NEXT: vsetvli zero, zero, e8, mf4, ta, ma ; VLS-NEXT: vmsne.vi v0, v10, 0 ; VLS-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-interleaved-access.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-interleaved-access.ll index bdf344d..6eb0b69 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-interleaved-access.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-interleaved-access.ll @@ -190,6 +190,62 @@ define {<4 x i32>, <4 x i32>} @vpload_factor2(ptr %ptr) { ret {<4 x i32>, <4 x i32>} %res1 } +define {<4 x i32>, <4 x i32>} @vpload_factor2_interleaved_mask_intrinsic(ptr %ptr, <4 x i1> %m) { +; CHECK-LABEL: vpload_factor2_interleaved_mask_intrinsic: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; CHECK-NEXT: vlseg2e32.v v8, (a0), v0.t +; CHECK-NEXT: ret + %interleaved.mask = call <8 x i1> @llvm.vector.interleave2(<4 x i1> %m, <4 x i1> %m) + %interleaved.vec = tail call <8 x i32> @llvm.vp.load.v8i32.p0(ptr %ptr, <8 x i1> %interleaved.mask, i32 8) + %v0 = shufflevector <8 x i32> %interleaved.vec, <8 x i32> poison, <4 x i32> <i32 0, i32 2, i32 4, i32 6> + %v1 = shufflevector <8 x i32> %interleaved.vec, <8 x i32> poison, <4 x i32> <i32 1, i32 3, i32 5, i32 7> + %res0 = insertvalue {<4 x i32>, <4 x i32>} undef, <4 x i32> %v0, 0 + %res1 = insertvalue {<4 x i32>, <4 x i32>} %res0, <4 x i32> %v1, 1 + ret {<4 x i32>, <4 x i32>} %res1 +} + +define {<4 x i32>, <4 x i32>} @vpload_factor2_interleaved_mask_shuffle(ptr %ptr, <4 x i1> %m) { +; CHECK-LABEL: vpload_factor2_interleaved_mask_shuffle: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; CHECK-NEXT: vlseg2e32.v v8, (a0), v0.t +; CHECK-NEXT: ret + %interleaved.mask = shufflevector <4 x i1> %m, <4 x i1> poison, <8 x i32> <i32 0, i32 0, i32 1, i32 1, i32 2, i32 2, i32 3, i32 3> + %interleaved.vec = tail call <8 x i32> @llvm.vp.load.v8i32.p0(ptr %ptr, <8 x i1> %interleaved.mask, i32 8) + %v0 = shufflevector <8 x i32> %interleaved.vec, <8 x i32> poison, <4 x i32> <i32 0, i32 2, i32 4, i32 6> + %v1 = shufflevector <8 x i32> %interleaved.vec, <8 x i32> poison, <4 x i32> <i32 1, i32 3, i32 5, i32 7> + %res0 = insertvalue {<4 x i32>, <4 x i32>} undef, <4 x i32> %v0, 0 + %res1 = insertvalue {<4 x i32>, <4 x i32>} %res0, <4 x i32> %v1, 1 + ret {<4 x i32>, <4 x i32>} %res1 +} + +define {<4 x i32>, <4 x i32>} @vpload_factor2_interleaved_mask_shuffle2(ptr %ptr, <2 x i1> %m) { +; CHECK-LABEL: vpload_factor2_interleaved_mask_shuffle2: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma +; CHECK-NEXT: vmv.v.i v8, 0 +; CHECK-NEXT: li a1, -1 +; CHECK-NEXT: vmerge.vim v8, v8, 1, v0 +; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma +; CHECK-NEXT: vwaddu.vv v9, v8, v8 +; CHECK-NEXT: vwmaccu.vx v9, a1, v8 +; CHECK-NEXT: vsetivli zero, 4, e8, mf2, ta, ma +; CHECK-NEXT: vmsne.vi v0, v9, 0 +; CHECK-NEXT: vle32.v v10, (a0), v0.t +; CHECK-NEXT: li a0, 32 +; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; CHECK-NEXT: vnsrl.wi v8, v10, 0 +; CHECK-NEXT: vnsrl.wx v9, v10, a0 +; CHECK-NEXT: ret + %interleaved.mask = shufflevector <2 x i1> %m, <2 x i1> poison, <8 x i32> <i32 0, i32 0, i32 1, i32 1, i32 2, i32 2, i32 3, i32 3> + %interleaved.vec = tail call <8 x i32> @llvm.vp.load.v8i32.p0(ptr %ptr, <8 x i1> %interleaved.mask, i32 4) + %v0 = shufflevector <8 x i32> %interleaved.vec, <8 x i32> poison, <4 x i32> <i32 0, i32 2, i32 4, i32 6> + %v1 = shufflevector <8 x i32> %interleaved.vec, <8 x i32> poison, <4 x i32> <i32 1, i32 3, i32 5, i32 7> + %res0 = insertvalue {<4 x i32>, <4 x i32>} undef, <4 x i32> %v0, 0 + %res1 = insertvalue {<4 x i32>, <4 x i32>} %res0, <4 x i32> %v1, 1 + ret {<4 x i32>, <4 x i32>} %res1 +} define {<4 x i32>, <4 x i32>, <4 x i32>} @vpload_factor3(ptr %ptr) { ; CHECK-LABEL: vpload_factor3: @@ -423,8 +479,8 @@ define {<8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>} @load_ ; RV32-NEXT: li a2, 32 ; RV32-NEXT: lui a3, 12 ; RV32-NEXT: lui a6, 12291 -; RV32-NEXT: lui a7, %hi(.LCPI20_0) -; RV32-NEXT: addi a7, a7, %lo(.LCPI20_0) +; RV32-NEXT: lui a7, %hi(.LCPI23_0) +; RV32-NEXT: addi a7, a7, %lo(.LCPI23_0) ; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, ma ; RV32-NEXT: vle32.v v24, (a5) ; RV32-NEXT: vmv.s.x v0, a3 @@ -509,12 +565,12 @@ define {<8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>} @load_ ; RV32-NEXT: addi a1, a1, 16 ; RV32-NEXT: vs4r.v v8, (a1) # vscale x 32-byte Folded Spill ; RV32-NEXT: lui a7, 49164 -; RV32-NEXT: lui a1, %hi(.LCPI20_1) -; RV32-NEXT: addi a1, a1, %lo(.LCPI20_1) +; RV32-NEXT: lui a1, %hi(.LCPI23_1) +; RV32-NEXT: addi a1, a1, %lo(.LCPI23_1) ; RV32-NEXT: lui t2, 3 ; RV32-NEXT: lui t1, 196656 -; RV32-NEXT: lui a4, %hi(.LCPI20_3) -; RV32-NEXT: addi a4, a4, %lo(.LCPI20_3) +; RV32-NEXT: lui a4, %hi(.LCPI23_3) +; RV32-NEXT: addi a4, a4, %lo(.LCPI23_3) ; RV32-NEXT: lui t0, 786624 ; RV32-NEXT: li a5, 48 ; RV32-NEXT: lui a6, 768 @@ -693,8 +749,8 @@ define {<8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>} @load_ ; RV32-NEXT: vl8r.v v8, (a1) # vscale x 64-byte Folded Reload ; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, ma ; RV32-NEXT: vrgatherei16.vv v24, v8, v2 -; RV32-NEXT: lui a1, %hi(.LCPI20_2) -; RV32-NEXT: addi a1, a1, %lo(.LCPI20_2) +; RV32-NEXT: lui a1, %hi(.LCPI23_2) +; RV32-NEXT: addi a1, a1, %lo(.LCPI23_2) ; RV32-NEXT: lui a3, 3073 ; RV32-NEXT: addi a3, a3, -1024 ; RV32-NEXT: vmv.s.x v0, a3 @@ -758,16 +814,16 @@ define {<8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>} @load_ ; RV32-NEXT: vrgatherei16.vv v28, v8, v3 ; RV32-NEXT: vsetivli zero, 10, e32, m4, tu, ma ; RV32-NEXT: vmv.v.v v28, v24 -; RV32-NEXT: lui a1, %hi(.LCPI20_4) -; RV32-NEXT: addi a1, a1, %lo(.LCPI20_4) -; RV32-NEXT: lui a2, %hi(.LCPI20_5) -; RV32-NEXT: addi a2, a2, %lo(.LCPI20_5) +; RV32-NEXT: lui a1, %hi(.LCPI23_4) +; RV32-NEXT: addi a1, a1, %lo(.LCPI23_4) +; RV32-NEXT: lui a2, %hi(.LCPI23_5) +; RV32-NEXT: addi a2, a2, %lo(.LCPI23_5) ; RV32-NEXT: vsetivli zero, 16, e16, m2, ta, ma ; RV32-NEXT: vle16.v v24, (a2) ; RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; RV32-NEXT: vle16.v v8, (a1) -; RV32-NEXT: lui a1, %hi(.LCPI20_7) -; RV32-NEXT: addi a1, a1, %lo(.LCPI20_7) +; RV32-NEXT: lui a1, %hi(.LCPI23_7) +; RV32-NEXT: addi a1, a1, %lo(.LCPI23_7) ; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; RV32-NEXT: vle16.v v10, (a1) ; RV32-NEXT: csrr a1, vlenb @@ -795,14 +851,14 @@ define {<8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>} @load_ ; RV32-NEXT: vl8r.v v0, (a1) # vscale x 64-byte Folded Reload ; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; RV32-NEXT: vrgatherei16.vv v16, v0, v10 -; RV32-NEXT: lui a1, %hi(.LCPI20_6) -; RV32-NEXT: addi a1, a1, %lo(.LCPI20_6) -; RV32-NEXT: lui a2, %hi(.LCPI20_8) -; RV32-NEXT: addi a2, a2, %lo(.LCPI20_8) +; RV32-NEXT: lui a1, %hi(.LCPI23_6) +; RV32-NEXT: addi a1, a1, %lo(.LCPI23_6) +; RV32-NEXT: lui a2, %hi(.LCPI23_8) +; RV32-NEXT: addi a2, a2, %lo(.LCPI23_8) ; RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; RV32-NEXT: vle16.v v4, (a1) -; RV32-NEXT: lui a1, %hi(.LCPI20_9) -; RV32-NEXT: addi a1, a1, %lo(.LCPI20_9) +; RV32-NEXT: lui a1, %hi(.LCPI23_9) +; RV32-NEXT: addi a1, a1, %lo(.LCPI23_9) ; RV32-NEXT: vsetivli zero, 16, e16, m2, ta, ma ; RV32-NEXT: vle16.v v6, (a1) ; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, ma @@ -889,8 +945,8 @@ define {<8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>} @load_ ; RV64-NEXT: li a4, 128 ; RV64-NEXT: lui a1, 1 ; RV64-NEXT: vle64.v v8, (a3) -; RV64-NEXT: lui a3, %hi(.LCPI20_0) -; RV64-NEXT: addi a3, a3, %lo(.LCPI20_0) +; RV64-NEXT: lui a3, %hi(.LCPI23_0) +; RV64-NEXT: addi a3, a3, %lo(.LCPI23_0) ; RV64-NEXT: vmv.s.x v0, a4 ; RV64-NEXT: csrr a4, vlenb ; RV64-NEXT: li a5, 61 @@ -1078,8 +1134,8 @@ define {<8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>} @load_ ; RV64-NEXT: vl8r.v v16, (a2) # vscale x 64-byte Folded Reload ; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, mu ; RV64-NEXT: vslideup.vi v12, v16, 1, v0.t -; RV64-NEXT: lui a2, %hi(.LCPI20_1) -; RV64-NEXT: addi a2, a2, %lo(.LCPI20_1) +; RV64-NEXT: lui a2, %hi(.LCPI23_1) +; RV64-NEXT: addi a2, a2, %lo(.LCPI23_1) ; RV64-NEXT: li a3, 192 ; RV64-NEXT: vsetivli zero, 16, e16, m2, ta, ma ; RV64-NEXT: vle16.v v6, (a2) @@ -1113,8 +1169,8 @@ define {<8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>} @load_ ; RV64-NEXT: vrgatherei16.vv v24, v16, v6 ; RV64-NEXT: addi a2, sp, 16 ; RV64-NEXT: vs8r.v v24, (a2) # vscale x 64-byte Folded Spill -; RV64-NEXT: lui a2, %hi(.LCPI20_2) -; RV64-NEXT: addi a2, a2, %lo(.LCPI20_2) +; RV64-NEXT: lui a2, %hi(.LCPI23_2) +; RV64-NEXT: addi a2, a2, %lo(.LCPI23_2) ; RV64-NEXT: li a3, 1040 ; RV64-NEXT: vmv.s.x v0, a3 ; RV64-NEXT: addi a1, a1, -2016 @@ -1198,12 +1254,12 @@ define {<8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>} @load_ ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 ; RV64-NEXT: vs4r.v v8, (a1) # vscale x 32-byte Folded Spill -; RV64-NEXT: lui a1, %hi(.LCPI20_3) -; RV64-NEXT: addi a1, a1, %lo(.LCPI20_3) +; RV64-NEXT: lui a1, %hi(.LCPI23_3) +; RV64-NEXT: addi a1, a1, %lo(.LCPI23_3) ; RV64-NEXT: vsetivli zero, 16, e16, m2, ta, ma ; RV64-NEXT: vle16.v v20, (a1) -; RV64-NEXT: lui a1, %hi(.LCPI20_4) -; RV64-NEXT: addi a1, a1, %lo(.LCPI20_4) +; RV64-NEXT: lui a1, %hi(.LCPI23_4) +; RV64-NEXT: addi a1, a1, %lo(.LCPI23_4) ; RV64-NEXT: vle16.v v8, (a1) ; RV64-NEXT: csrr a1, vlenb ; RV64-NEXT: li a2, 77 @@ -1254,8 +1310,8 @@ define {<8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>} @load_ ; RV64-NEXT: vl2r.v v8, (a1) # vscale x 16-byte Folded Reload ; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; RV64-NEXT: vrgatherei16.vv v0, v16, v8 -; RV64-NEXT: lui a1, %hi(.LCPI20_5) -; RV64-NEXT: addi a1, a1, %lo(.LCPI20_5) +; RV64-NEXT: lui a1, %hi(.LCPI23_5) +; RV64-NEXT: addi a1, a1, %lo(.LCPI23_5) ; RV64-NEXT: vle16.v v20, (a1) ; RV64-NEXT: csrr a1, vlenb ; RV64-NEXT: li a2, 61 @@ -1472,6 +1528,19 @@ define void @vpstore_factor2(ptr %ptr, <4 x i32> %v0, <4 x i32> %v1) { ret void } +define void @vpstore_factor2_interleaved_mask_intrinsic(ptr %ptr, <4 x i32> %v0, <4 x i32> %v1, <4 x i1> %m) { +; CHECK-LABEL: vpstore_factor2_interleaved_mask_intrinsic: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; CHECK-NEXT: vsseg2e32.v v8, (a0), v0.t +; CHECK-NEXT: ret + %interleaved.mask = call <8 x i1> @llvm.vector.interleave2(<4 x i1> %m, <4 x i1> %m) + %interleaved.vec = shufflevector <4 x i32> %v0, <4 x i32> %v1, <8 x i32> <i32 0, i32 4, i32 1, i32 5, i32 2, i32 6, i32 3, i32 7> + tail call void @llvm.vp.store.v8i32.p0(<8 x i32> %interleaved.vec, ptr %ptr, <8 x i1> %interleaved.mask, i32 8) + ret void +} + + define void @vpstore_factor3(ptr %ptr, <4 x i32> %v0, <4 x i32> %v1, <4 x i32> %v2) { ; CHECK-LABEL: vpstore_factor3: ; CHECK: # %bb.0: @@ -1559,6 +1628,24 @@ define void @vpstore_factor7(ptr %ptr, <2 x i16> %v0, <2 x i16> %v1, <2 x i16> % ret void } +define void @vpstore_factor7_masked(ptr %ptr, <2 x i16> %v0, <2 x i16> %v1, <2 x i16> %v2, <2 x i16> %v3, <2 x i16> %v4, <2 x i16> %v5, <2 x i16> %v6, <2 x i1> %m) { +; CHECK-LABEL: vpstore_factor7_masked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, ma +; CHECK-NEXT: vsseg7e16.v v8, (a0), v0.t +; CHECK-NEXT: ret + %interleaved.mask = shufflevector <2 x i1> %m, <2 x i1> poison, <14 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1> + %s0 = shufflevector <2 x i16> %v0, <2 x i16> %v1, <4 x i32> <i32 0, i32 1, i32 2, i32 3> + %s1 = shufflevector <2 x i16> %v2, <2 x i16> %v3, <4 x i32> <i32 0, i32 1, i32 2, i32 3> + %s2 = shufflevector <2 x i16> %v4, <2 x i16> %v5, <4 x i32> <i32 0, i32 1, i32 2, i32 3> + %s3 = shufflevector <4 x i16> %s0, <4 x i16> %s1, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> + %s4 = shufflevector <2 x i16> %v6, <2 x i16> poison, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef> + %s5 = shufflevector <4 x i16> %s2, <4 x i16> %s4, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 undef, i32 undef> + %interleaved.vec = shufflevector <8 x i16> %s3, <8 x i16> %s5, <14 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13> + tail call void @llvm.vp.store.v14i16.p0(<14 x i16> %interleaved.vec, ptr %ptr, <14 x i1> %interleaved.mask, i32 14) + ret void +} + define void @vpstore_factor8(ptr %ptr, <2 x i16> %v0, <2 x i16> %v1, <2 x i16> %v2, <2 x i16> %v3, <2 x i16> %v4, <2 x i16> %v5, <2 x i16> %v6, <2 x i16> %v7) { ; CHECK-LABEL: vpstore_factor8: ; CHECK: # %bb.0: @@ -1757,8 +1844,9 @@ define void @store_factor4_one_active(ptr %ptr, <4 x i32> %v) { define void @vpstore_factor4_one_active(ptr %ptr, <4 x i32> %v) { ; CHECK-LABEL: vpstore_factor4_one_active: ; CHECK: # %bb.0: +; CHECK-NEXT: li a1, 16 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; CHECK-NEXT: vsseg4e32.v v8, (a0) +; CHECK-NEXT: vsse32.v v8, (a0), a1 ; CHECK-NEXT: ret %v0 = shufflevector <4 x i32> %v, <4 x i32> poison, <16 x i32> <i32 0, i32 undef, i32 undef, i32 undef, i32 1, i32 undef, i32 undef, i32 undef, i32 2, i32 undef, i32 undef, i32 undef, i32 3, i32 undef, i32 undef, i32 undef> tail call void @llvm.vp.store.v16i32.p0(<16 x i32> %v0, ptr %ptr, <16 x i1> splat (i1 true), i32 16) @@ -1782,7 +1870,7 @@ define void @store_factor4_one_active_fullwidth(ptr %ptr, <16 x i32> %v) { ; CHECK-LABEL: store_factor4_one_active_fullwidth: ; CHECK: # %bb.0: ; CHECK-NEXT: li a1, 16 -; CHECK-NEXT: vsetivli zero, 4, e32, m4, ta, ma +; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; CHECK-NEXT: vsse32.v v8, (a0), a1 ; CHECK-NEXT: ret %v0 = shufflevector <16 x i32> %v, <16 x i32> poison, <16 x i32> <i32 0, i32 undef, i32 undef, i32 undef, i32 1, i32 undef, i32 undef, i32 undef, i32 2, i32 undef, i32 undef, i32 undef, i32 3, i32 undef, i32 undef, i32 undef> @@ -1795,7 +1883,8 @@ define void @store_factor4_one_active_slidedown(ptr %ptr, <4 x i32> %v) { ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; CHECK-NEXT: vslidedown.vi v8, v8, 1 -; CHECK-NEXT: vsseg4e32.v v8, (a0) +; CHECK-NEXT: li a1, 16 +; CHECK-NEXT: vsse32.v v8, (a0), a1 ; CHECK-NEXT: ret %v0 = shufflevector <4 x i32> %v, <4 x i32> poison, <16 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 2, i32 undef, i32 undef, i32 undef, i32 3, i32 undef, i32 undef, i32 undef, i32 4, i32 undef, i32 undef, i32 undef> store <16 x i32> %v0, ptr %ptr @@ -1839,8 +1928,8 @@ define {<4 x i32>, <4 x i32>, <4 x i32>} @invalid_vp_mask(ptr %ptr) { ; RV32-NEXT: vle32.v v12, (a0), v0.t ; RV32-NEXT: li a0, 36 ; RV32-NEXT: vmv.s.x v20, a1 -; RV32-NEXT: lui a1, %hi(.LCPI54_0) -; RV32-NEXT: addi a1, a1, %lo(.LCPI54_0) +; RV32-NEXT: lui a1, %hi(.LCPI59_0) +; RV32-NEXT: addi a1, a1, %lo(.LCPI59_0) ; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma ; RV32-NEXT: vle16.v v21, (a1) ; RV32-NEXT: vcompress.vm v8, v12, v11 @@ -1915,8 +2004,8 @@ define {<4 x i32>, <4 x i32>, <4 x i32>} @invalid_vp_evl(ptr %ptr) { ; RV32-NEXT: vmv.s.x v10, a0 ; RV32-NEXT: li a0, 146 ; RV32-NEXT: vmv.s.x v11, a0 -; RV32-NEXT: lui a0, %hi(.LCPI55_0) -; RV32-NEXT: addi a0, a0, %lo(.LCPI55_0) +; RV32-NEXT: lui a0, %hi(.LCPI60_0) +; RV32-NEXT: addi a0, a0, %lo(.LCPI60_0) ; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma ; RV32-NEXT: vle16.v v20, (a0) ; RV32-NEXT: li a0, 36 @@ -1974,3 +2063,34 @@ define {<4 x i32>, <4 x i32>, <4 x i32>} @invalid_vp_evl(ptr %ptr) { %res2 = insertvalue {<4 x i32>, <4 x i32>, <4 x i32>} %res1, <4 x i32> %v2, 2 ret {<4 x i32>, <4 x i32>, <4 x i32>} %res2 } + +define {<4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>} @maskedload_factor5(ptr %ptr) { +; CHECK-LABEL: maskedload_factor5: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; CHECK-NEXT: vlseg5e32.v v8, (a0) +; CHECK-NEXT: ret + %interleaved.vec = tail call <20 x i32> @llvm.masked.load(ptr %ptr, i32 4, <20 x i1> splat (i1 true), <20 x i32> poison) + %v0 = shufflevector <20 x i32> %interleaved.vec, <20 x i32> poison, <4 x i32> <i32 0, i32 5, i32 10, i32 15> + %v1 = shufflevector <20 x i32> %interleaved.vec, <20 x i32> poison, <4 x i32> <i32 1, i32 6, i32 11, i32 16> + %v2 = shufflevector <20 x i32> %interleaved.vec, <20 x i32> poison, <4 x i32> <i32 2, i32 7, i32 12, i32 17> + %v3 = shufflevector <20 x i32> %interleaved.vec, <20 x i32> poison, <4 x i32> <i32 3, i32 8, i32 13, i32 18> + %v4 = shufflevector <20 x i32> %interleaved.vec, <20 x i32> poison, <4 x i32> <i32 4, i32 9, i32 14, i32 19> + %res0 = insertvalue {<4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>} undef, <4 x i32> %v0, 0 + %res1 = insertvalue {<4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>} %res0, <4 x i32> %v1, 1 + %res2 = insertvalue {<4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>} %res1, <4 x i32> %v2, 2 + %res3 = insertvalue {<4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>} %res2, <4 x i32> %v3, 3 + %res4 = insertvalue {<4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>} %res3, <4 x i32> %v4, 4 + ret {<4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>} %res4 +} + +define void @maskedstore_factor2(ptr %ptr, <4 x i32> %v0, <4 x i32> %v1) { +; CHECK-LABEL: maskedstore_factor2: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; CHECK-NEXT: vsseg2e32.v v8, (a0) +; CHECK-NEXT: ret + %interleaved.vec = shufflevector <4 x i32> %v0, <4 x i32> %v1, <8 x i32> <i32 0, i32 4, i32 1, i32 5, i32 2, i32 6, i32 3, i32 7> + tail call void @llvm.masked.store(<8 x i32> %interleaved.vec, ptr %ptr, i32 4, <8 x i1> splat (i1 true)) + ret void +} diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-segN-load.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-segN-load.ll index 4eed3df..8c3ebb9 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-segN-load.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-segN-load.ll @@ -1,107 +1,72 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mtriple riscv64 -mattr=+zve64x,+zvl128b < %s | FileCheck %s -define <8 x i8> @load_factor2(ptr %ptr) { +define {<8 x i8>, <8 x i8>} @load_factor2(ptr %ptr) { ; CHECK-LABEL: load_factor2: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma -; CHECK-NEXT: vlseg2e8.v v7, (a0) +; CHECK-NEXT: vlseg2e8.v v8, (a0) ; CHECK-NEXT: ret %1 = call { <8 x i8>, <8 x i8> } @llvm.riscv.seg2.load.mask.v8i8.i64(ptr %ptr, <8 x i1> splat (i1 true), i64 8) - %2 = extractvalue { <8 x i8>, <8 x i8> } %1, 0 - %3 = extractvalue { <8 x i8>, <8 x i8> } %1, 1 - ret <8 x i8> %3 + ret {<8 x i8>, <8 x i8>} %1 } -define <8 x i8> @load_factor3(ptr %ptr) { +define {<8 x i8>, <8 x i8>, <8 x i8>} @load_factor3(ptr %ptr) { ; CHECK-LABEL: load_factor3: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma -; CHECK-NEXT: vlseg3e8.v v6, (a0) +; CHECK-NEXT: vlseg3e8.v v8, (a0) ; CHECK-NEXT: ret %1 = call { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.riscv.seg3.load.mask.v8i8.i64(ptr %ptr, <8 x i1> splat (i1 true), i64 8) - %2 = extractvalue { <8 x i8>, <8 x i8>, <8 x i8> } %1, 0 - %3 = extractvalue { <8 x i8>, <8 x i8>, <8 x i8> } %1, 1 - %4 = extractvalue { <8 x i8>, <8 x i8>, <8 x i8> } %1, 2 - ret <8 x i8> %4 + ret { <8 x i8>, <8 x i8>, <8 x i8> } %1 } -define <8 x i8> @load_factor4(ptr %ptr) { +define {<8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>} @load_factor4(ptr %ptr) { ; CHECK-LABEL: load_factor4: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma -; CHECK-NEXT: vlseg4e8.v v5, (a0) +; CHECK-NEXT: vlseg4e8.v v8, (a0) ; CHECK-NEXT: ret %1 = call { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.riscv.seg4.load.mask.v8i8.i64(ptr %ptr, <8 x i1> splat (i1 true), i64 8) - %2 = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %1, 0 - %3 = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %1, 1 - %4 = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %1, 2 - %5 = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %1, 3 - ret <8 x i8> %5 + ret { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %1 } -define <8 x i8> @load_factor5(ptr %ptr) { +define {<8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>} @load_factor5(ptr %ptr) { ; CHECK-LABEL: load_factor5: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma -; CHECK-NEXT: vlseg5e8.v v4, (a0) +; CHECK-NEXT: vlseg5e8.v v8, (a0) ; CHECK-NEXT: ret %1 = call { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.riscv.seg5.load.mask.v8i8.i64(ptr %ptr, <8 x i1> splat (i1 true), i64 8) - %2 = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %1, 0 - %3 = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %1, 1 - %4 = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %1, 2 - %5 = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %1, 3 - %6 = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %1, 4 - ret <8 x i8> %6 + ret { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %1 } -define <8 x i8> @load_factor6(ptr %ptr) { +define {<8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>} @load_factor6(ptr %ptr) { ; CHECK-LABEL: load_factor6: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma -; CHECK-NEXT: vlseg6e8.v v3, (a0) +; CHECK-NEXT: vlseg6e8.v v8, (a0) ; CHECK-NEXT: ret %1 = call { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.riscv.seg6.load.mask.v8i8.i64(ptr %ptr, <8 x i1> splat (i1 true), i64 8) - %2 = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %1, 0 - %3 = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %1, 1 - %4 = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %1, 2 - %5 = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %1, 3 - %6 = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %1, 4 - %7 = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %1, 5 - ret <8 x i8> %7 + ret { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %1 } -define <8 x i8> @load_factor7(ptr %ptr) { +define {<8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>} @load_factor7(ptr %ptr) { ; CHECK-LABEL: load_factor7: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma -; CHECK-NEXT: vlseg7e8.v v2, (a0) +; CHECK-NEXT: vlseg7e8.v v8, (a0) ; CHECK-NEXT: ret %1 = call { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.riscv.seg7.load.mask.v8i8.i64(ptr %ptr, <8 x i1> splat (i1 true), i64 8) - %2 = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %1, 0 - %3 = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %1, 1 - %4 = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %1, 2 - %5 = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %1, 3 - %6 = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %1, 4 - %7 = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %1, 5 - %8 = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %1, 6 - ret <8 x i8> %8 + ret { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %1 } -define <8 x i8> @load_factor8(ptr %ptr) { +define {<8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>} @load_factor8(ptr %ptr) { ; CHECK-LABEL: load_factor8: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma -; CHECK-NEXT: vlseg8e8.v v1, (a0) +; CHECK-NEXT: vlseg8e8.v v8, (a0) ; CHECK-NEXT: ret %1 = call { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.riscv.seg8.load.mask.v8i8.i64(ptr %ptr, <8 x i1> splat (i1 true), i64 8) - %2 = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %1, 0 - %3 = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %1, 1 - %4 = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %1, 2 - %5 = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %1, 3 - %6 = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %1, 4 - %7 = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %1, 5 - %8 = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %1, 6 - %9 = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %1, 7 - ret <8 x i8> %9 + ret { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %1 } diff --git a/llvm/test/CodeGen/RISCV/rvv/interrupt-attr-nocall.ll b/llvm/test/CodeGen/RISCV/rvv/interrupt-attr-nocall.ll index af2e8d3..42c2556 100644 --- a/llvm/test/CodeGen/RISCV/rvv/interrupt-attr-nocall.ll +++ b/llvm/test/CodeGen/RISCV/rvv/interrupt-attr-nocall.ll @@ -14,12 +14,8 @@ define void @foo_lmul1() nounwind #0 { ; CHECK-RV32-NEXT: csrr a0, vlenb ; CHECK-RV32-NEXT: slli a0, a0, 1 ; CHECK-RV32-NEXT: sub sp, sp, a0 -; CHECK-RV32-NEXT: csrr a0, vlenb -; CHECK-RV32-NEXT: add a0, sp, a0 -; CHECK-RV32-NEXT: addi a0, a0, 16 -; CHECK-RV32-NEXT: vs1r.v v8, (a0) # vscale x 8-byte Folded Spill ; CHECK-RV32-NEXT: addi a0, sp, 16 -; CHECK-RV32-NEXT: vs1r.v v9, (a0) # vscale x 8-byte Folded Spill +; CHECK-RV32-NEXT: vs2r.v v8, (a0) # vscale x 16-byte Folded Spill ; CHECK-RV32-NEXT: lui a0, %hi(a) ; CHECK-RV32-NEXT: addi a0, a0, %lo(a) ; CHECK-RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma @@ -31,12 +27,8 @@ define void @foo_lmul1() nounwind #0 { ; CHECK-RV32-NEXT: lui a0, %hi(c) ; CHECK-RV32-NEXT: addi a0, a0, %lo(c) ; CHECK-RV32-NEXT: vse32.v v8, (a0) -; CHECK-RV32-NEXT: csrr a0, vlenb -; CHECK-RV32-NEXT: add a0, sp, a0 -; CHECK-RV32-NEXT: addi a0, a0, 16 -; CHECK-RV32-NEXT: vl1r.v v8, (a0) # vscale x 8-byte Folded Reload ; CHECK-RV32-NEXT: addi a0, sp, 16 -; CHECK-RV32-NEXT: vl1r.v v9, (a0) # vscale x 8-byte Folded Reload +; CHECK-RV32-NEXT: vl2r.v v8, (a0) # vscale x 16-byte Folded Reload ; CHECK-RV32-NEXT: csrr a0, vlenb ; CHECK-RV32-NEXT: slli a0, a0, 1 ; CHECK-RV32-NEXT: add sp, sp, a0 @@ -62,25 +54,8 @@ define void @foo_lmul2() nounwind #0 { ; CHECK-RV32-NEXT: csrr a0, vlenb ; CHECK-RV32-NEXT: slli a0, a0, 2 ; CHECK-RV32-NEXT: sub sp, sp, a0 -; CHECK-RV32-NEXT: csrr a0, vlenb -; CHECK-RV32-NEXT: sw a1, 4(sp) # 4-byte Folded Spill -; CHECK-RV32-NEXT: slli a1, a0, 1 -; CHECK-RV32-NEXT: add a0, a1, a0 -; CHECK-RV32-NEXT: lw a1, 4(sp) # 4-byte Folded Reload -; CHECK-RV32-NEXT: add a0, sp, a0 -; CHECK-RV32-NEXT: addi a0, a0, 16 -; CHECK-RV32-NEXT: vs1r.v v8, (a0) # vscale x 8-byte Folded Spill -; CHECK-RV32-NEXT: csrr a0, vlenb -; CHECK-RV32-NEXT: slli a0, a0, 1 -; CHECK-RV32-NEXT: add a0, sp, a0 -; CHECK-RV32-NEXT: addi a0, a0, 16 -; CHECK-RV32-NEXT: vs1r.v v9, (a0) # vscale x 8-byte Folded Spill -; CHECK-RV32-NEXT: csrr a0, vlenb -; CHECK-RV32-NEXT: add a0, sp, a0 -; CHECK-RV32-NEXT: addi a0, a0, 16 -; CHECK-RV32-NEXT: vs1r.v v10, (a0) # vscale x 8-byte Folded Spill ; CHECK-RV32-NEXT: addi a0, sp, 16 -; CHECK-RV32-NEXT: vs1r.v v11, (a0) # vscale x 8-byte Folded Spill +; CHECK-RV32-NEXT: vs4r.v v8, (a0) # vscale x 32-byte Folded Spill ; CHECK-RV32-NEXT: lui a0, %hi(d) ; CHECK-RV32-NEXT: addi a0, a0, %lo(d) ; CHECK-RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma @@ -92,25 +67,8 @@ define void @foo_lmul2() nounwind #0 { ; CHECK-RV32-NEXT: lui a0, %hi(f) ; CHECK-RV32-NEXT: addi a0, a0, %lo(f) ; CHECK-RV32-NEXT: vse32.v v8, (a0) -; CHECK-RV32-NEXT: csrr a0, vlenb -; CHECK-RV32-NEXT: sw a1, 4(sp) # 4-byte Folded Spill -; CHECK-RV32-NEXT: slli a1, a0, 1 -; CHECK-RV32-NEXT: add a0, a1, a0 -; CHECK-RV32-NEXT: lw a1, 4(sp) # 4-byte Folded Reload -; CHECK-RV32-NEXT: add a0, sp, a0 -; CHECK-RV32-NEXT: addi a0, a0, 16 -; CHECK-RV32-NEXT: vl1r.v v8, (a0) # vscale x 8-byte Folded Reload -; CHECK-RV32-NEXT: csrr a0, vlenb -; CHECK-RV32-NEXT: slli a0, a0, 1 -; CHECK-RV32-NEXT: add a0, sp, a0 -; CHECK-RV32-NEXT: addi a0, a0, 16 -; CHECK-RV32-NEXT: vl1r.v v9, (a0) # vscale x 8-byte Folded Reload -; CHECK-RV32-NEXT: csrr a0, vlenb -; CHECK-RV32-NEXT: add a0, sp, a0 -; CHECK-RV32-NEXT: addi a0, a0, 16 -; CHECK-RV32-NEXT: vl1r.v v10, (a0) # vscale x 8-byte Folded Reload ; CHECK-RV32-NEXT: addi a0, sp, 16 -; CHECK-RV32-NEXT: vl1r.v v11, (a0) # vscale x 8-byte Folded Reload +; CHECK-RV32-NEXT: vl4r.v v8, (a0) # vscale x 32-byte Folded Reload ; CHECK-RV32-NEXT: csrr a0, vlenb ; CHECK-RV32-NEXT: slli a0, a0, 2 ; CHECK-RV32-NEXT: add sp, sp, a0 @@ -136,56 +94,8 @@ define void @foo_lmul4() nounwind #0 { ; CHECK-RV32-NEXT: csrr a0, vlenb ; CHECK-RV32-NEXT: slli a0, a0, 3 ; CHECK-RV32-NEXT: sub sp, sp, a0 -; CHECK-RV32-NEXT: csrr a0, vlenb -; CHECK-RV32-NEXT: sw a1, 4(sp) # 4-byte Folded Spill -; CHECK-RV32-NEXT: slli a1, a0, 3 -; CHECK-RV32-NEXT: sub a0, a1, a0 -; CHECK-RV32-NEXT: lw a1, 4(sp) # 4-byte Folded Reload -; CHECK-RV32-NEXT: add a0, sp, a0 -; CHECK-RV32-NEXT: addi a0, a0, 16 -; CHECK-RV32-NEXT: vs1r.v v8, (a0) # vscale x 8-byte Folded Spill -; CHECK-RV32-NEXT: csrr a0, vlenb -; CHECK-RV32-NEXT: slli a0, a0, 1 -; CHECK-RV32-NEXT: sw a1, 4(sp) # 4-byte Folded Spill -; CHECK-RV32-NEXT: mv a1, a0 -; CHECK-RV32-NEXT: slli a0, a0, 1 -; CHECK-RV32-NEXT: add a0, a0, a1 -; CHECK-RV32-NEXT: lw a1, 4(sp) # 4-byte Folded Reload -; CHECK-RV32-NEXT: add a0, sp, a0 -; CHECK-RV32-NEXT: addi a0, a0, 16 -; CHECK-RV32-NEXT: vs1r.v v9, (a0) # vscale x 8-byte Folded Spill -; CHECK-RV32-NEXT: csrr a0, vlenb -; CHECK-RV32-NEXT: sw a1, 4(sp) # 4-byte Folded Spill -; CHECK-RV32-NEXT: slli a1, a0, 2 -; CHECK-RV32-NEXT: add a0, a1, a0 -; CHECK-RV32-NEXT: lw a1, 4(sp) # 4-byte Folded Reload -; CHECK-RV32-NEXT: add a0, sp, a0 -; CHECK-RV32-NEXT: addi a0, a0, 16 -; CHECK-RV32-NEXT: vs1r.v v10, (a0) # vscale x 8-byte Folded Spill -; CHECK-RV32-NEXT: csrr a0, vlenb -; CHECK-RV32-NEXT: slli a0, a0, 2 -; CHECK-RV32-NEXT: add a0, sp, a0 -; CHECK-RV32-NEXT: addi a0, a0, 16 -; CHECK-RV32-NEXT: vs1r.v v11, (a0) # vscale x 8-byte Folded Spill -; CHECK-RV32-NEXT: csrr a0, vlenb -; CHECK-RV32-NEXT: sw a1, 4(sp) # 4-byte Folded Spill -; CHECK-RV32-NEXT: slli a1, a0, 1 -; CHECK-RV32-NEXT: add a0, a1, a0 -; CHECK-RV32-NEXT: lw a1, 4(sp) # 4-byte Folded Reload -; CHECK-RV32-NEXT: add a0, sp, a0 -; CHECK-RV32-NEXT: addi a0, a0, 16 -; CHECK-RV32-NEXT: vs1r.v v12, (a0) # vscale x 8-byte Folded Spill -; CHECK-RV32-NEXT: csrr a0, vlenb -; CHECK-RV32-NEXT: slli a0, a0, 1 -; CHECK-RV32-NEXT: add a0, sp, a0 -; CHECK-RV32-NEXT: addi a0, a0, 16 -; CHECK-RV32-NEXT: vs1r.v v13, (a0) # vscale x 8-byte Folded Spill -; CHECK-RV32-NEXT: csrr a0, vlenb -; CHECK-RV32-NEXT: add a0, sp, a0 -; CHECK-RV32-NEXT: addi a0, a0, 16 -; CHECK-RV32-NEXT: vs1r.v v14, (a0) # vscale x 8-byte Folded Spill ; CHECK-RV32-NEXT: addi a0, sp, 16 -; CHECK-RV32-NEXT: vs1r.v v15, (a0) # vscale x 8-byte Folded Spill +; CHECK-RV32-NEXT: vs8r.v v8, (a0) # vscale x 64-byte Folded Spill ; CHECK-RV32-NEXT: lui a0, %hi(g) ; CHECK-RV32-NEXT: addi a0, a0, %lo(g) ; CHECK-RV32-NEXT: vsetivli zero, 16, e32, m4, ta, ma @@ -197,50 +107,8 @@ define void @foo_lmul4() nounwind #0 { ; CHECK-RV32-NEXT: lui a0, %hi(i) ; CHECK-RV32-NEXT: addi a0, a0, %lo(i) ; CHECK-RV32-NEXT: vse32.v v8, (a0) -; CHECK-RV32-NEXT: sw a1, 4(sp) # 4-byte Folded Spill -; CHECK-RV32-NEXT: csrr a0, vlenb -; CHECK-RV32-NEXT: slli a1, a0, 3 -; CHECK-RV32-NEXT: sub a0, a1, a0 -; CHECK-RV32-NEXT: add a0, sp, a0 -; CHECK-RV32-NEXT: addi a0, a0, 16 -; CHECK-RV32-NEXT: vl1r.v v8, (a0) # vscale x 8-byte Folded Reload -; CHECK-RV32-NEXT: csrr a0, vlenb -; CHECK-RV32-NEXT: slli a0, a0, 1 -; CHECK-RV32-NEXT: mv a1, a0 -; CHECK-RV32-NEXT: slli a0, a0, 1 -; CHECK-RV32-NEXT: add a0, a0, a1 -; CHECK-RV32-NEXT: add a0, sp, a0 -; CHECK-RV32-NEXT: addi a0, a0, 16 -; CHECK-RV32-NEXT: vl1r.v v9, (a0) # vscale x 8-byte Folded Reload -; CHECK-RV32-NEXT: csrr a0, vlenb -; CHECK-RV32-NEXT: slli a1, a0, 2 -; CHECK-RV32-NEXT: add a0, a1, a0 -; CHECK-RV32-NEXT: add a0, sp, a0 -; CHECK-RV32-NEXT: addi a0, a0, 16 -; CHECK-RV32-NEXT: vl1r.v v10, (a0) # vscale x 8-byte Folded Reload -; CHECK-RV32-NEXT: csrr a0, vlenb -; CHECK-RV32-NEXT: slli a0, a0, 2 -; CHECK-RV32-NEXT: add a0, sp, a0 -; CHECK-RV32-NEXT: addi a0, a0, 16 -; CHECK-RV32-NEXT: vl1r.v v11, (a0) # vscale x 8-byte Folded Reload -; CHECK-RV32-NEXT: csrr a0, vlenb -; CHECK-RV32-NEXT: slli a1, a0, 1 -; CHECK-RV32-NEXT: add a0, a1, a0 -; CHECK-RV32-NEXT: lw a1, 4(sp) # 4-byte Folded Reload -; CHECK-RV32-NEXT: add a0, sp, a0 -; CHECK-RV32-NEXT: addi a0, a0, 16 -; CHECK-RV32-NEXT: vl1r.v v12, (a0) # vscale x 8-byte Folded Reload -; CHECK-RV32-NEXT: csrr a0, vlenb -; CHECK-RV32-NEXT: slli a0, a0, 1 -; CHECK-RV32-NEXT: add a0, sp, a0 -; CHECK-RV32-NEXT: addi a0, a0, 16 -; CHECK-RV32-NEXT: vl1r.v v13, (a0) # vscale x 8-byte Folded Reload -; CHECK-RV32-NEXT: csrr a0, vlenb -; CHECK-RV32-NEXT: add a0, sp, a0 -; CHECK-RV32-NEXT: addi a0, a0, 16 -; CHECK-RV32-NEXT: vl1r.v v14, (a0) # vscale x 8-byte Folded Reload ; CHECK-RV32-NEXT: addi a0, sp, 16 -; CHECK-RV32-NEXT: vl1r.v v15, (a0) # vscale x 8-byte Folded Reload +; CHECK-RV32-NEXT: vl8r.v v8, (a0) # vscale x 64-byte Folded Reload ; CHECK-RV32-NEXT: csrr a0, vlenb ; CHECK-RV32-NEXT: slli a0, a0, 3 ; CHECK-RV32-NEXT: add sp, sp, a0 @@ -268,108 +136,12 @@ define void @foo_lmul8() nounwind #0 { ; CHECK-RV32-NEXT: slli a0, a0, 4 ; CHECK-RV32-NEXT: sub sp, sp, a0 ; CHECK-RV32-NEXT: csrr a0, vlenb -; CHECK-RV32-NEXT: slli a1, a0, 4 -; CHECK-RV32-NEXT: sub a0, a1, a0 -; CHECK-RV32-NEXT: add a0, sp, a0 -; CHECK-RV32-NEXT: addi a0, a0, 16 -; CHECK-RV32-NEXT: vs1r.v v8, (a0) # vscale x 8-byte Folded Spill -; CHECK-RV32-NEXT: csrr a0, vlenb -; CHECK-RV32-NEXT: slli a0, a0, 1 -; CHECK-RV32-NEXT: mv a1, a0 -; CHECK-RV32-NEXT: slli a0, a0, 1 -; CHECK-RV32-NEXT: add a1, a1, a0 -; CHECK-RV32-NEXT: slli a0, a0, 1 -; CHECK-RV32-NEXT: add a0, a0, a1 -; CHECK-RV32-NEXT: add a0, sp, a0 -; CHECK-RV32-NEXT: addi a0, a0, 16 -; CHECK-RV32-NEXT: vs1r.v v9, (a0) # vscale x 8-byte Folded Spill -; CHECK-RV32-NEXT: csrr a0, vlenb -; CHECK-RV32-NEXT: mv a1, a0 -; CHECK-RV32-NEXT: slli a0, a0, 2 -; CHECK-RV32-NEXT: add a1, a1, a0 -; CHECK-RV32-NEXT: slli a0, a0, 1 -; CHECK-RV32-NEXT: add a0, a0, a1 -; CHECK-RV32-NEXT: add a0, sp, a0 -; CHECK-RV32-NEXT: addi a0, a0, 16 -; CHECK-RV32-NEXT: vs1r.v v10, (a0) # vscale x 8-byte Folded Spill -; CHECK-RV32-NEXT: csrr a0, vlenb -; CHECK-RV32-NEXT: slli a0, a0, 2 -; CHECK-RV32-NEXT: mv a1, a0 -; CHECK-RV32-NEXT: slli a0, a0, 1 -; CHECK-RV32-NEXT: add a0, a0, a1 -; CHECK-RV32-NEXT: add a0, sp, a0 -; CHECK-RV32-NEXT: addi a0, a0, 16 -; CHECK-RV32-NEXT: vs1r.v v11, (a0) # vscale x 8-byte Folded Spill -; CHECK-RV32-NEXT: csrr a0, vlenb -; CHECK-RV32-NEXT: mv a1, a0 -; CHECK-RV32-NEXT: slli a0, a0, 1 -; CHECK-RV32-NEXT: add a1, a1, a0 -; CHECK-RV32-NEXT: slli a0, a0, 2 -; CHECK-RV32-NEXT: add a0, a0, a1 -; CHECK-RV32-NEXT: add a0, sp, a0 -; CHECK-RV32-NEXT: addi a0, a0, 16 -; CHECK-RV32-NEXT: vs1r.v v12, (a0) # vscale x 8-byte Folded Spill -; CHECK-RV32-NEXT: csrr a0, vlenb -; CHECK-RV32-NEXT: slli a0, a0, 1 -; CHECK-RV32-NEXT: mv a1, a0 -; CHECK-RV32-NEXT: slli a0, a0, 2 -; CHECK-RV32-NEXT: add a0, a0, a1 -; CHECK-RV32-NEXT: add a0, sp, a0 -; CHECK-RV32-NEXT: addi a0, a0, 16 -; CHECK-RV32-NEXT: vs1r.v v13, (a0) # vscale x 8-byte Folded Spill -; CHECK-RV32-NEXT: csrr a0, vlenb -; CHECK-RV32-NEXT: slli a1, a0, 3 -; CHECK-RV32-NEXT: add a0, a1, a0 -; CHECK-RV32-NEXT: add a0, sp, a0 -; CHECK-RV32-NEXT: addi a0, a0, 16 -; CHECK-RV32-NEXT: vs1r.v v14, (a0) # vscale x 8-byte Folded Spill -; CHECK-RV32-NEXT: csrr a0, vlenb ; CHECK-RV32-NEXT: slli a0, a0, 3 ; CHECK-RV32-NEXT: add a0, sp, a0 ; CHECK-RV32-NEXT: addi a0, a0, 16 -; CHECK-RV32-NEXT: vs1r.v v15, (a0) # vscale x 8-byte Folded Spill -; CHECK-RV32-NEXT: csrr a0, vlenb -; CHECK-RV32-NEXT: slli a1, a0, 3 -; CHECK-RV32-NEXT: sub a0, a1, a0 -; CHECK-RV32-NEXT: add a0, sp, a0 -; CHECK-RV32-NEXT: addi a0, a0, 16 -; CHECK-RV32-NEXT: vs1r.v v16, (a0) # vscale x 8-byte Folded Spill -; CHECK-RV32-NEXT: csrr a0, vlenb -; CHECK-RV32-NEXT: slli a0, a0, 1 -; CHECK-RV32-NEXT: mv a1, a0 -; CHECK-RV32-NEXT: slli a0, a0, 1 -; CHECK-RV32-NEXT: add a0, a0, a1 -; CHECK-RV32-NEXT: add a0, sp, a0 -; CHECK-RV32-NEXT: addi a0, a0, 16 -; CHECK-RV32-NEXT: vs1r.v v17, (a0) # vscale x 8-byte Folded Spill -; CHECK-RV32-NEXT: csrr a0, vlenb -; CHECK-RV32-NEXT: slli a1, a0, 2 -; CHECK-RV32-NEXT: add a0, a1, a0 -; CHECK-RV32-NEXT: add a0, sp, a0 -; CHECK-RV32-NEXT: addi a0, a0, 16 -; CHECK-RV32-NEXT: vs1r.v v18, (a0) # vscale x 8-byte Folded Spill -; CHECK-RV32-NEXT: csrr a0, vlenb -; CHECK-RV32-NEXT: slli a0, a0, 2 -; CHECK-RV32-NEXT: add a0, sp, a0 -; CHECK-RV32-NEXT: addi a0, a0, 16 -; CHECK-RV32-NEXT: vs1r.v v19, (a0) # vscale x 8-byte Folded Spill -; CHECK-RV32-NEXT: csrr a0, vlenb -; CHECK-RV32-NEXT: slli a1, a0, 1 -; CHECK-RV32-NEXT: add a0, a1, a0 -; CHECK-RV32-NEXT: add a0, sp, a0 -; CHECK-RV32-NEXT: addi a0, a0, 16 -; CHECK-RV32-NEXT: vs1r.v v20, (a0) # vscale x 8-byte Folded Spill -; CHECK-RV32-NEXT: csrr a0, vlenb -; CHECK-RV32-NEXT: slli a0, a0, 1 -; CHECK-RV32-NEXT: add a0, sp, a0 -; CHECK-RV32-NEXT: addi a0, a0, 16 -; CHECK-RV32-NEXT: vs1r.v v21, (a0) # vscale x 8-byte Folded Spill -; CHECK-RV32-NEXT: csrr a0, vlenb -; CHECK-RV32-NEXT: add a0, sp, a0 -; CHECK-RV32-NEXT: addi a0, a0, 16 -; CHECK-RV32-NEXT: vs1r.v v22, (a0) # vscale x 8-byte Folded Spill +; CHECK-RV32-NEXT: vs8r.v v8, (a0) # vscale x 64-byte Folded Spill ; CHECK-RV32-NEXT: addi a0, sp, 16 -; CHECK-RV32-NEXT: vs1r.v v23, (a0) # vscale x 8-byte Folded Spill +; CHECK-RV32-NEXT: vs8r.v v16, (a0) # vscale x 64-byte Folded Spill ; CHECK-RV32-NEXT: lui a0, %hi(j) ; CHECK-RV32-NEXT: addi a0, a0, %lo(j) ; CHECK-RV32-NEXT: li a1, 32 @@ -383,108 +155,12 @@ define void @foo_lmul8() nounwind #0 { ; CHECK-RV32-NEXT: addi a0, a0, %lo(l) ; CHECK-RV32-NEXT: vse32.v v8, (a0) ; CHECK-RV32-NEXT: csrr a0, vlenb -; CHECK-RV32-NEXT: slli a1, a0, 4 -; CHECK-RV32-NEXT: sub a0, a1, a0 -; CHECK-RV32-NEXT: add a0, sp, a0 -; CHECK-RV32-NEXT: addi a0, a0, 16 -; CHECK-RV32-NEXT: vl1r.v v8, (a0) # vscale x 8-byte Folded Reload -; CHECK-RV32-NEXT: csrr a0, vlenb -; CHECK-RV32-NEXT: slli a0, a0, 1 -; CHECK-RV32-NEXT: mv a1, a0 -; CHECK-RV32-NEXT: slli a0, a0, 1 -; CHECK-RV32-NEXT: add a1, a1, a0 -; CHECK-RV32-NEXT: slli a0, a0, 1 -; CHECK-RV32-NEXT: add a0, a0, a1 -; CHECK-RV32-NEXT: add a0, sp, a0 -; CHECK-RV32-NEXT: addi a0, a0, 16 -; CHECK-RV32-NEXT: vl1r.v v9, (a0) # vscale x 8-byte Folded Reload -; CHECK-RV32-NEXT: csrr a0, vlenb -; CHECK-RV32-NEXT: mv a1, a0 -; CHECK-RV32-NEXT: slli a0, a0, 2 -; CHECK-RV32-NEXT: add a1, a1, a0 -; CHECK-RV32-NEXT: slli a0, a0, 1 -; CHECK-RV32-NEXT: add a0, a0, a1 -; CHECK-RV32-NEXT: add a0, sp, a0 -; CHECK-RV32-NEXT: addi a0, a0, 16 -; CHECK-RV32-NEXT: vl1r.v v10, (a0) # vscale x 8-byte Folded Reload -; CHECK-RV32-NEXT: csrr a0, vlenb -; CHECK-RV32-NEXT: slli a0, a0, 2 -; CHECK-RV32-NEXT: mv a1, a0 -; CHECK-RV32-NEXT: slli a0, a0, 1 -; CHECK-RV32-NEXT: add a0, a0, a1 -; CHECK-RV32-NEXT: add a0, sp, a0 -; CHECK-RV32-NEXT: addi a0, a0, 16 -; CHECK-RV32-NEXT: vl1r.v v11, (a0) # vscale x 8-byte Folded Reload -; CHECK-RV32-NEXT: csrr a0, vlenb -; CHECK-RV32-NEXT: mv a1, a0 -; CHECK-RV32-NEXT: slli a0, a0, 1 -; CHECK-RV32-NEXT: add a1, a1, a0 -; CHECK-RV32-NEXT: slli a0, a0, 2 -; CHECK-RV32-NEXT: add a0, a0, a1 -; CHECK-RV32-NEXT: add a0, sp, a0 -; CHECK-RV32-NEXT: addi a0, a0, 16 -; CHECK-RV32-NEXT: vl1r.v v12, (a0) # vscale x 8-byte Folded Reload -; CHECK-RV32-NEXT: csrr a0, vlenb -; CHECK-RV32-NEXT: slli a0, a0, 1 -; CHECK-RV32-NEXT: mv a1, a0 -; CHECK-RV32-NEXT: slli a0, a0, 2 -; CHECK-RV32-NEXT: add a0, a0, a1 -; CHECK-RV32-NEXT: add a0, sp, a0 -; CHECK-RV32-NEXT: addi a0, a0, 16 -; CHECK-RV32-NEXT: vl1r.v v13, (a0) # vscale x 8-byte Folded Reload -; CHECK-RV32-NEXT: csrr a0, vlenb -; CHECK-RV32-NEXT: slli a1, a0, 3 -; CHECK-RV32-NEXT: add a0, a1, a0 -; CHECK-RV32-NEXT: add a0, sp, a0 -; CHECK-RV32-NEXT: addi a0, a0, 16 -; CHECK-RV32-NEXT: vl1r.v v14, (a0) # vscale x 8-byte Folded Reload -; CHECK-RV32-NEXT: csrr a0, vlenb ; CHECK-RV32-NEXT: slli a0, a0, 3 ; CHECK-RV32-NEXT: add a0, sp, a0 ; CHECK-RV32-NEXT: addi a0, a0, 16 -; CHECK-RV32-NEXT: vl1r.v v15, (a0) # vscale x 8-byte Folded Reload -; CHECK-RV32-NEXT: csrr a0, vlenb -; CHECK-RV32-NEXT: slli a1, a0, 3 -; CHECK-RV32-NEXT: sub a0, a1, a0 -; CHECK-RV32-NEXT: add a0, sp, a0 -; CHECK-RV32-NEXT: addi a0, a0, 16 -; CHECK-RV32-NEXT: vl1r.v v16, (a0) # vscale x 8-byte Folded Reload -; CHECK-RV32-NEXT: csrr a0, vlenb -; CHECK-RV32-NEXT: slli a0, a0, 1 -; CHECK-RV32-NEXT: mv a1, a0 -; CHECK-RV32-NEXT: slli a0, a0, 1 -; CHECK-RV32-NEXT: add a0, a0, a1 -; CHECK-RV32-NEXT: add a0, sp, a0 -; CHECK-RV32-NEXT: addi a0, a0, 16 -; CHECK-RV32-NEXT: vl1r.v v17, (a0) # vscale x 8-byte Folded Reload -; CHECK-RV32-NEXT: csrr a0, vlenb -; CHECK-RV32-NEXT: slli a1, a0, 2 -; CHECK-RV32-NEXT: add a0, a1, a0 -; CHECK-RV32-NEXT: add a0, sp, a0 -; CHECK-RV32-NEXT: addi a0, a0, 16 -; CHECK-RV32-NEXT: vl1r.v v18, (a0) # vscale x 8-byte Folded Reload -; CHECK-RV32-NEXT: csrr a0, vlenb -; CHECK-RV32-NEXT: slli a0, a0, 2 -; CHECK-RV32-NEXT: add a0, sp, a0 -; CHECK-RV32-NEXT: addi a0, a0, 16 -; CHECK-RV32-NEXT: vl1r.v v19, (a0) # vscale x 8-byte Folded Reload -; CHECK-RV32-NEXT: csrr a0, vlenb -; CHECK-RV32-NEXT: slli a1, a0, 1 -; CHECK-RV32-NEXT: add a0, a1, a0 -; CHECK-RV32-NEXT: add a0, sp, a0 -; CHECK-RV32-NEXT: addi a0, a0, 16 -; CHECK-RV32-NEXT: vl1r.v v20, (a0) # vscale x 8-byte Folded Reload -; CHECK-RV32-NEXT: csrr a0, vlenb -; CHECK-RV32-NEXT: slli a0, a0, 1 -; CHECK-RV32-NEXT: add a0, sp, a0 -; CHECK-RV32-NEXT: addi a0, a0, 16 -; CHECK-RV32-NEXT: vl1r.v v21, (a0) # vscale x 8-byte Folded Reload -; CHECK-RV32-NEXT: csrr a0, vlenb -; CHECK-RV32-NEXT: add a0, sp, a0 -; CHECK-RV32-NEXT: addi a0, a0, 16 -; CHECK-RV32-NEXT: vl1r.v v22, (a0) # vscale x 8-byte Folded Reload +; CHECK-RV32-NEXT: vl8r.v v8, (a0) # vscale x 64-byte Folded Reload ; CHECK-RV32-NEXT: addi a0, sp, 16 -; CHECK-RV32-NEXT: vl1r.v v23, (a0) # vscale x 8-byte Folded Reload +; CHECK-RV32-NEXT: vl8r.v v16, (a0) # vscale x 64-byte Folded Reload ; CHECK-RV32-NEXT: csrr a0, vlenb ; CHECK-RV32-NEXT: slli a0, a0, 4 ; CHECK-RV32-NEXT: add sp, sp, a0 diff --git a/llvm/test/CodeGen/RISCV/rvv/memset-inline.ll b/llvm/test/CodeGen/RISCV/rvv/memset-inline.ll index 8963940..2c11bd1 100644 --- a/llvm/test/CodeGen/RISCV/rvv/memset-inline.ll +++ b/llvm/test/CodeGen/RISCV/rvv/memset-inline.ll @@ -360,13 +360,13 @@ define void @aligned_memset_64(ptr align 64 %a, i8 %value) nounwind { ; ///////////////////////////////////////////////////////////////////////////// -define void @bzero_1(ptr %a) nounwind { -; RV32-BOTH-LABEL: bzero_1: +define void @memset_zero_1(ptr %a) nounwind { +; RV32-BOTH-LABEL: memset_zero_1: ; RV32-BOTH: # %bb.0: ; RV32-BOTH-NEXT: sb zero, 0(a0) ; RV32-BOTH-NEXT: ret ; -; RV64-BOTH-LABEL: bzero_1: +; RV64-BOTH-LABEL: memset_zero_1: ; RV64-BOTH: # %bb.0: ; RV64-BOTH-NEXT: sb zero, 0(a0) ; RV64-BOTH-NEXT: ret @@ -374,25 +374,25 @@ define void @bzero_1(ptr %a) nounwind { ret void } -define void @bzero_2(ptr %a) nounwind { -; RV32-LABEL: bzero_2: +define void @memset_zero_2(ptr %a) nounwind { +; RV32-LABEL: memset_zero_2: ; RV32: # %bb.0: ; RV32-NEXT: sb zero, 0(a0) ; RV32-NEXT: sb zero, 1(a0) ; RV32-NEXT: ret ; -; RV64-LABEL: bzero_2: +; RV64-LABEL: memset_zero_2: ; RV64: # %bb.0: ; RV64-NEXT: sb zero, 0(a0) ; RV64-NEXT: sb zero, 1(a0) ; RV64-NEXT: ret ; -; RV32-FAST-LABEL: bzero_2: +; RV32-FAST-LABEL: memset_zero_2: ; RV32-FAST: # %bb.0: ; RV32-FAST-NEXT: sh zero, 0(a0) ; RV32-FAST-NEXT: ret ; -; RV64-FAST-LABEL: bzero_2: +; RV64-FAST-LABEL: memset_zero_2: ; RV64-FAST: # %bb.0: ; RV64-FAST-NEXT: sh zero, 0(a0) ; RV64-FAST-NEXT: ret @@ -400,8 +400,8 @@ define void @bzero_2(ptr %a) nounwind { ret void } -define void @bzero_4(ptr %a) nounwind { -; RV32-LABEL: bzero_4: +define void @memset_zero_4(ptr %a) nounwind { +; RV32-LABEL: memset_zero_4: ; RV32: # %bb.0: ; RV32-NEXT: sb zero, 0(a0) ; RV32-NEXT: sb zero, 1(a0) @@ -409,7 +409,7 @@ define void @bzero_4(ptr %a) nounwind { ; RV32-NEXT: sb zero, 3(a0) ; RV32-NEXT: ret ; -; RV64-LABEL: bzero_4: +; RV64-LABEL: memset_zero_4: ; RV64: # %bb.0: ; RV64-NEXT: sb zero, 0(a0) ; RV64-NEXT: sb zero, 1(a0) @@ -417,12 +417,12 @@ define void @bzero_4(ptr %a) nounwind { ; RV64-NEXT: sb zero, 3(a0) ; RV64-NEXT: ret ; -; RV32-FAST-LABEL: bzero_4: +; RV32-FAST-LABEL: memset_zero_4: ; RV32-FAST: # %bb.0: ; RV32-FAST-NEXT: sw zero, 0(a0) ; RV32-FAST-NEXT: ret ; -; RV64-FAST-LABEL: bzero_4: +; RV64-FAST-LABEL: memset_zero_4: ; RV64-FAST: # %bb.0: ; RV64-FAST-NEXT: sw zero, 0(a0) ; RV64-FAST-NEXT: ret @@ -430,8 +430,8 @@ define void @bzero_4(ptr %a) nounwind { ret void } -define void @bzero_8(ptr %a) nounwind { -; RV32-LABEL: bzero_8: +define void @memset_zero_8(ptr %a) nounwind { +; RV32-LABEL: memset_zero_8: ; RV32: # %bb.0: ; RV32-NEXT: sb zero, 4(a0) ; RV32-NEXT: sb zero, 5(a0) @@ -443,7 +443,7 @@ define void @bzero_8(ptr %a) nounwind { ; RV32-NEXT: sb zero, 3(a0) ; RV32-NEXT: ret ; -; RV64-LABEL: bzero_8: +; RV64-LABEL: memset_zero_8: ; RV64: # %bb.0: ; RV64-NEXT: sb zero, 4(a0) ; RV64-NEXT: sb zero, 5(a0) @@ -455,13 +455,13 @@ define void @bzero_8(ptr %a) nounwind { ; RV64-NEXT: sb zero, 3(a0) ; RV64-NEXT: ret ; -; RV32-FAST-LABEL: bzero_8: +; RV32-FAST-LABEL: memset_zero_8: ; RV32-FAST: # %bb.0: ; RV32-FAST-NEXT: sw zero, 0(a0) ; RV32-FAST-NEXT: sw zero, 4(a0) ; RV32-FAST-NEXT: ret ; -; RV64-FAST-LABEL: bzero_8: +; RV64-FAST-LABEL: memset_zero_8: ; RV64-FAST: # %bb.0: ; RV64-FAST-NEXT: sd zero, 0(a0) ; RV64-FAST-NEXT: ret @@ -469,29 +469,29 @@ define void @bzero_8(ptr %a) nounwind { ret void } -define void @bzero_16(ptr %a) nounwind { -; RV32-LABEL: bzero_16: +define void @memset_zero_16(ptr %a) nounwind { +; RV32-LABEL: memset_zero_16: ; RV32: # %bb.0: ; RV32-NEXT: vsetivli zero, 16, e8, m1, ta, ma ; RV32-NEXT: vmv.v.i v8, 0 ; RV32-NEXT: vse8.v v8, (a0) ; RV32-NEXT: ret ; -; RV64-LABEL: bzero_16: +; RV64-LABEL: memset_zero_16: ; RV64: # %bb.0: ; RV64-NEXT: vsetivli zero, 16, e8, m1, ta, ma ; RV64-NEXT: vmv.v.i v8, 0 ; RV64-NEXT: vse8.v v8, (a0) ; RV64-NEXT: ret ; -; RV32-FAST-LABEL: bzero_16: +; RV32-FAST-LABEL: memset_zero_16: ; RV32-FAST: # %bb.0: ; RV32-FAST-NEXT: vsetivli zero, 2, e64, m1, ta, ma ; RV32-FAST-NEXT: vmv.v.i v8, 0 ; RV32-FAST-NEXT: vse64.v v8, (a0) ; RV32-FAST-NEXT: ret ; -; RV64-FAST-LABEL: bzero_16: +; RV64-FAST-LABEL: memset_zero_16: ; RV64-FAST: # %bb.0: ; RV64-FAST-NEXT: vsetivli zero, 2, e64, m1, ta, ma ; RV64-FAST-NEXT: vmv.v.i v8, 0 @@ -501,8 +501,8 @@ define void @bzero_16(ptr %a) nounwind { ret void } -define void @bzero_32(ptr %a) nounwind { -; RV32-LABEL: bzero_32: +define void @memset_zero_32(ptr %a) nounwind { +; RV32-LABEL: memset_zero_32: ; RV32: # %bb.0: ; RV32-NEXT: vsetivli zero, 16, e8, m1, ta, ma ; RV32-NEXT: vmv.v.i v8, 0 @@ -511,7 +511,7 @@ define void @bzero_32(ptr %a) nounwind { ; RV32-NEXT: vse8.v v8, (a0) ; RV32-NEXT: ret ; -; RV64-LABEL: bzero_32: +; RV64-LABEL: memset_zero_32: ; RV64: # %bb.0: ; RV64-NEXT: vsetivli zero, 16, e8, m1, ta, ma ; RV64-NEXT: vmv.v.i v8, 0 @@ -520,7 +520,7 @@ define void @bzero_32(ptr %a) nounwind { ; RV64-NEXT: vse8.v v8, (a0) ; RV64-NEXT: ret ; -; RV32-FAST-LABEL: bzero_32: +; RV32-FAST-LABEL: memset_zero_32: ; RV32-FAST: # %bb.0: ; RV32-FAST-NEXT: vsetivli zero, 2, e64, m1, ta, ma ; RV32-FAST-NEXT: vmv.v.i v8, 0 @@ -529,7 +529,7 @@ define void @bzero_32(ptr %a) nounwind { ; RV32-FAST-NEXT: vse64.v v8, (a0) ; RV32-FAST-NEXT: ret ; -; RV64-FAST-LABEL: bzero_32: +; RV64-FAST-LABEL: memset_zero_32: ; RV64-FAST: # %bb.0: ; RV64-FAST-NEXT: vsetivli zero, 2, e64, m1, ta, ma ; RV64-FAST-NEXT: vmv.v.i v8, 0 @@ -541,8 +541,8 @@ define void @bzero_32(ptr %a) nounwind { ret void } -define void @bzero_64(ptr %a) nounwind { -; RV32-LABEL: bzero_64: +define void @memset_zero_64(ptr %a) nounwind { +; RV32-LABEL: memset_zero_64: ; RV32: # %bb.0: ; RV32-NEXT: li a1, 64 ; RV32-NEXT: vsetvli zero, a1, e8, m4, ta, ma @@ -550,7 +550,7 @@ define void @bzero_64(ptr %a) nounwind { ; RV32-NEXT: vse8.v v8, (a0) ; RV32-NEXT: ret ; -; RV64-LABEL: bzero_64: +; RV64-LABEL: memset_zero_64: ; RV64: # %bb.0: ; RV64-NEXT: li a1, 64 ; RV64-NEXT: vsetvli zero, a1, e8, m4, ta, ma @@ -558,14 +558,14 @@ define void @bzero_64(ptr %a) nounwind { ; RV64-NEXT: vse8.v v8, (a0) ; RV64-NEXT: ret ; -; RV32-FAST-LABEL: bzero_64: +; RV32-FAST-LABEL: memset_zero_64: ; RV32-FAST: # %bb.0: ; RV32-FAST-NEXT: vsetivli zero, 8, e64, m4, ta, ma ; RV32-FAST-NEXT: vmv.v.i v8, 0 ; RV32-FAST-NEXT: vse64.v v8, (a0) ; RV32-FAST-NEXT: ret ; -; RV64-FAST-LABEL: bzero_64: +; RV64-FAST-LABEL: memset_zero_64: ; RV64-FAST: # %bb.0: ; RV64-FAST-NEXT: vsetivli zero, 8, e64, m4, ta, ma ; RV64-FAST-NEXT: vmv.v.i v8, 0 @@ -577,13 +577,13 @@ define void @bzero_64(ptr %a) nounwind { ; ///////////////////////////////////////////////////////////////////////////// -define void @aligned_bzero_2(ptr %a) nounwind { -; RV32-BOTH-LABEL: aligned_bzero_2: +define void @aligned_memset_zero_2(ptr %a) nounwind { +; RV32-BOTH-LABEL: aligned_memset_zero_2: ; RV32-BOTH: # %bb.0: ; RV32-BOTH-NEXT: sh zero, 0(a0) ; RV32-BOTH-NEXT: ret ; -; RV64-BOTH-LABEL: aligned_bzero_2: +; RV64-BOTH-LABEL: aligned_memset_zero_2: ; RV64-BOTH: # %bb.0: ; RV64-BOTH-NEXT: sh zero, 0(a0) ; RV64-BOTH-NEXT: ret @@ -591,13 +591,13 @@ define void @aligned_bzero_2(ptr %a) nounwind { ret void } -define void @aligned_bzero_4(ptr %a) nounwind { -; RV32-BOTH-LABEL: aligned_bzero_4: +define void @aligned_memset_zero_4(ptr %a) nounwind { +; RV32-BOTH-LABEL: aligned_memset_zero_4: ; RV32-BOTH: # %bb.0: ; RV32-BOTH-NEXT: sw zero, 0(a0) ; RV32-BOTH-NEXT: ret ; -; RV64-BOTH-LABEL: aligned_bzero_4: +; RV64-BOTH-LABEL: aligned_memset_zero_4: ; RV64-BOTH: # %bb.0: ; RV64-BOTH-NEXT: sw zero, 0(a0) ; RV64-BOTH-NEXT: ret @@ -605,14 +605,14 @@ define void @aligned_bzero_4(ptr %a) nounwind { ret void } -define void @aligned_bzero_8(ptr %a) nounwind { -; RV32-BOTH-LABEL: aligned_bzero_8: +define void @aligned_memset_zero_8(ptr %a) nounwind { +; RV32-BOTH-LABEL: aligned_memset_zero_8: ; RV32-BOTH: # %bb.0: ; RV32-BOTH-NEXT: sw zero, 0(a0) ; RV32-BOTH-NEXT: sw zero, 4(a0) ; RV32-BOTH-NEXT: ret ; -; RV64-BOTH-LABEL: aligned_bzero_8: +; RV64-BOTH-LABEL: aligned_memset_zero_8: ; RV64-BOTH: # %bb.0: ; RV64-BOTH-NEXT: sd zero, 0(a0) ; RV64-BOTH-NEXT: ret @@ -621,15 +621,15 @@ define void @aligned_bzero_8(ptr %a) nounwind { } -define void @aligned_bzero_16(ptr %a) nounwind { -; RV32-BOTH-LABEL: aligned_bzero_16: +define void @aligned_memset_zero_16(ptr %a) nounwind { +; RV32-BOTH-LABEL: aligned_memset_zero_16: ; RV32-BOTH: # %bb.0: ; RV32-BOTH-NEXT: vsetivli zero, 2, e64, m1, ta, ma ; RV32-BOTH-NEXT: vmv.v.i v8, 0 ; RV32-BOTH-NEXT: vse64.v v8, (a0) ; RV32-BOTH-NEXT: ret ; -; RV64-BOTH-LABEL: aligned_bzero_16: +; RV64-BOTH-LABEL: aligned_memset_zero_16: ; RV64-BOTH: # %bb.0: ; RV64-BOTH-NEXT: vsetivli zero, 2, e64, m1, ta, ma ; RV64-BOTH-NEXT: vmv.v.i v8, 0 @@ -639,8 +639,8 @@ define void @aligned_bzero_16(ptr %a) nounwind { ret void } -define void @aligned_bzero_32(ptr %a) nounwind { -; RV32-BOTH-LABEL: aligned_bzero_32: +define void @aligned_memset_zero_32(ptr %a) nounwind { +; RV32-BOTH-LABEL: aligned_memset_zero_32: ; RV32-BOTH: # %bb.0: ; RV32-BOTH-NEXT: vsetivli zero, 2, e64, m1, ta, ma ; RV32-BOTH-NEXT: vmv.v.i v8, 0 @@ -649,7 +649,7 @@ define void @aligned_bzero_32(ptr %a) nounwind { ; RV32-BOTH-NEXT: vse64.v v8, (a0) ; RV32-BOTH-NEXT: ret ; -; RV64-BOTH-LABEL: aligned_bzero_32: +; RV64-BOTH-LABEL: aligned_memset_zero_32: ; RV64-BOTH: # %bb.0: ; RV64-BOTH-NEXT: vsetivli zero, 2, e64, m1, ta, ma ; RV64-BOTH-NEXT: vmv.v.i v8, 0 @@ -661,15 +661,15 @@ define void @aligned_bzero_32(ptr %a) nounwind { ret void } -define void @aligned_bzero_64(ptr %a) nounwind { -; RV32-BOTH-LABEL: aligned_bzero_64: +define void @aligned_memset_zero_64(ptr %a) nounwind { +; RV32-BOTH-LABEL: aligned_memset_zero_64: ; RV32-BOTH: # %bb.0: ; RV32-BOTH-NEXT: vsetivli zero, 8, e64, m4, ta, ma ; RV32-BOTH-NEXT: vmv.v.i v8, 0 ; RV32-BOTH-NEXT: vse64.v v8, (a0) ; RV32-BOTH-NEXT: ret ; -; RV64-BOTH-LABEL: aligned_bzero_64: +; RV64-BOTH-LABEL: aligned_memset_zero_64: ; RV64-BOTH: # %bb.0: ; RV64-BOTH-NEXT: vsetivli zero, 8, e64, m4, ta, ma ; RV64-BOTH-NEXT: vmv.v.i v8, 0 @@ -679,8 +679,8 @@ define void @aligned_bzero_64(ptr %a) nounwind { ret void } -define void @aligned_bzero_66(ptr %a) nounwind { -; RV32-BOTH-LABEL: aligned_bzero_66: +define void @aligned_memset_zero_66(ptr %a) nounwind { +; RV32-BOTH-LABEL: aligned_memset_zero_66: ; RV32-BOTH: # %bb.0: ; RV32-BOTH-NEXT: sh zero, 64(a0) ; RV32-BOTH-NEXT: vsetivli zero, 8, e64, m4, ta, ma @@ -688,7 +688,7 @@ define void @aligned_bzero_66(ptr %a) nounwind { ; RV32-BOTH-NEXT: vse64.v v8, (a0) ; RV32-BOTH-NEXT: ret ; -; RV64-BOTH-LABEL: aligned_bzero_66: +; RV64-BOTH-LABEL: aligned_memset_zero_66: ; RV64-BOTH: # %bb.0: ; RV64-BOTH-NEXT: sh zero, 64(a0) ; RV64-BOTH-NEXT: vsetivli zero, 8, e64, m4, ta, ma @@ -699,8 +699,8 @@ define void @aligned_bzero_66(ptr %a) nounwind { ret void } -define void @aligned_bzero_96(ptr %a) nounwind { -; RV32-BOTH-LABEL: aligned_bzero_96: +define void @aligned_memset_zero_96(ptr %a) nounwind { +; RV32-BOTH-LABEL: aligned_memset_zero_96: ; RV32-BOTH: # %bb.0: ; RV32-BOTH-NEXT: vsetivli zero, 8, e64, m4, ta, ma ; RV32-BOTH-NEXT: vmv.v.i v8, 0 @@ -713,7 +713,7 @@ define void @aligned_bzero_96(ptr %a) nounwind { ; RV32-BOTH-NEXT: vse64.v v8, (a0) ; RV32-BOTH-NEXT: ret ; -; RV64-BOTH-LABEL: aligned_bzero_96: +; RV64-BOTH-LABEL: aligned_memset_zero_96: ; RV64-BOTH: # %bb.0: ; RV64-BOTH-NEXT: vsetivli zero, 8, e64, m4, ta, ma ; RV64-BOTH-NEXT: vmv.v.i v8, 0 @@ -729,15 +729,15 @@ define void @aligned_bzero_96(ptr %a) nounwind { ret void } -define void @aligned_bzero_128(ptr %a) nounwind { -; RV32-BOTH-LABEL: aligned_bzero_128: +define void @aligned_memset_zero_128(ptr %a) nounwind { +; RV32-BOTH-LABEL: aligned_memset_zero_128: ; RV32-BOTH: # %bb.0: ; RV32-BOTH-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; RV32-BOTH-NEXT: vmv.v.i v8, 0 ; RV32-BOTH-NEXT: vse64.v v8, (a0) ; RV32-BOTH-NEXT: ret ; -; RV64-BOTH-LABEL: aligned_bzero_128: +; RV64-BOTH-LABEL: aligned_memset_zero_128: ; RV64-BOTH: # %bb.0: ; RV64-BOTH-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; RV64-BOTH-NEXT: vmv.v.i v8, 0 @@ -747,8 +747,8 @@ define void @aligned_bzero_128(ptr %a) nounwind { ret void } -define void @aligned_bzero_256(ptr %a) nounwind { -; RV32-BOTH-LABEL: aligned_bzero_256: +define void @aligned_memset_zero_256(ptr %a) nounwind { +; RV32-BOTH-LABEL: aligned_memset_zero_256: ; RV32-BOTH: # %bb.0: ; RV32-BOTH-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; RV32-BOTH-NEXT: vmv.v.i v8, 0 @@ -757,7 +757,7 @@ define void @aligned_bzero_256(ptr %a) nounwind { ; RV32-BOTH-NEXT: vse64.v v8, (a0) ; RV32-BOTH-NEXT: ret ; -; RV64-BOTH-LABEL: aligned_bzero_256: +; RV64-BOTH-LABEL: aligned_memset_zero_256: ; RV64-BOTH: # %bb.0: ; RV64-BOTH-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; RV64-BOTH-NEXT: vmv.v.i v8, 0 diff --git a/llvm/test/CodeGen/RISCV/rvv/pr141907.ll b/llvm/test/CodeGen/RISCV/rvv/pr141907.ll index 648b47d..f93f88a 100644 --- a/llvm/test/CodeGen/RISCV/rvv/pr141907.ll +++ b/llvm/test/CodeGen/RISCV/rvv/pr141907.ll @@ -9,27 +9,29 @@ define void @pr141907(ptr %0) nounwind { ; CHECK-NEXT: slli a1, a1, 2 ; CHECK-NEXT: sub sp, sp, a1 ; CHECK-NEXT: vsetivli zero, 0, e32, m1, ta, ma -; CHECK-NEXT: vmv.v.i v9, 0 +; CHECK-NEXT: vmv.v.i v8, 0 ; CHECK-NEXT: vmclr.m v0 ; CHECK-NEXT: li a1, 0 -; CHECK-NEXT: vsetvli a3, zero, e16, mf2, ta, ma -; CHECK-NEXT: vmv.v.i v12, 0 +; CHECK-NEXT: vsetvli a5, zero, e16, mf2, ta, ma +; CHECK-NEXT: vmv.v.i v10, 0 ; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: addi a3, sp, 20 +; CHECK-NEXT: li a4, 12 ; CHECK-NEXT: .LBB0_1: # %vector.body ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: vs4r.v v8, (a2) ; CHECK-NEXT: vsetvli a1, a1, e8, mf8, ta, ma ; CHECK-NEXT: vsetivli zero, 0, e16, mf2, ta, ma -; CHECK-NEXT: vnsrl.wi v11, v9, 0, v0.t -; CHECK-NEXT: vsetvli a3, zero, e32, m1, ta, ma -; CHECK-NEXT: vlseg3e32.v v8, (a2) +; CHECK-NEXT: vnsrl.wi v9, v8, 0, v0.t +; CHECK-NEXT: vsetvli a5, zero, e32, m1, ta, ma +; CHECK-NEXT: vlse32.v v8, (a3), a4 ; CHECK-NEXT: vsetivli zero, 0, e16, mf2, ta, ma -; CHECK-NEXT: vsseg2e16.v v11, (zero) +; CHECK-NEXT: vsseg2e16.v v9, (zero) ; CHECK-NEXT: bnez a1, .LBB0_1 ; CHECK-NEXT: .LBB0_2: # %while.body5 ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma -; CHECK-NEXT: vse16.v v9, (a0) +; CHECK-NEXT: vse16.v v8, (a0) ; CHECK-NEXT: j .LBB0_2 entry: br label %vector.body diff --git a/llvm/test/CodeGen/RISCV/rvv/rvv-peephole-vmerge-to-vmv.mir b/llvm/test/CodeGen/RISCV/rvv/rvv-peephole-vmerge-to-vmv.mir index a050034..a7eaf39 100644 --- a/llvm/test/CodeGen/RISCV/rvv/rvv-peephole-vmerge-to-vmv.mir +++ b/llvm/test/CodeGen/RISCV/rvv/rvv-peephole-vmerge-to-vmv.mir @@ -78,12 +78,12 @@ body: | ; CHECK-NEXT: %false:vrnov0 = COPY $v9 ; CHECK-NEXT: %mask:vmv0 = COPY $v0 ; CHECK-NEXT: %true:vrnov0 = PseudoVADD_VV_M1_MASK %false, $noreg, $noreg, %mask, 4, 5 /* e32 */, 0 /* tu, mu */ - ; CHECK-NEXT: %x:vr = PseudoVMV_V_V_M1 %pt, %true, 8, 5 /* e32 */, 0 /* tu, mu */ + ; CHECK-NEXT: %x:vr = PseudoVMV_V_V_M1 %pt, %true, 4, 5 /* e32 */, 0 /* tu, mu */ %pt:vrnov0 = COPY $v8 %false:vrnov0 = COPY $v9 %mask:vmv0 = COPY $v0 - %true:vrnov0 = PseudoVADD_VV_M1_MASK %false, $noreg, $noreg, %mask, 4, 5 /* e32 */, 0 /* tu, mu */ - %x:vrnov0 = PseudoVMERGE_VVM_M1 %pt, %false, %true, %mask, 8, 5 /* e32 */ + %true:vrnov0 = PseudoVADD_VV_M1_MASK %false, $noreg, $noreg, %mask, 8, 5 /* e32 */, 0 /* tu, mu */ + %x:vrnov0 = PseudoVMERGE_VVM_M1 %pt, %false, %true, %mask, 4, 5 /* e32 */ ... --- # Shouldn't be converted because false operands are different @@ -163,3 +163,47 @@ body: | %true:vrnov0 = PseudoVADD_VV_M1_MASK $noreg, $noreg, $noreg, %mask, 4, 5 /* e32 */, 0 /* tu, mu */ bb.1: %5:vrnov0 = PseudoVMERGE_VVM_M1 $noreg, %false, %true, %mask, 4, 5 /* e32 */ +... +--- +# Shouldn't be converted because vmerge adds back in elements from false past avl that would be lost if we converted to vmv.v.v +name: preserve_false +body: | + bb.0: + liveins: $v8, $v9, $v0, $x8, $x9 + ; CHECK-LABEL: name: preserve_false + ; CHECK: liveins: $v8, $v9, $v0, $x8, $x9 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: %pt:vrnov0 = COPY $v8 + ; CHECK-NEXT: %false:vr = COPY $v9 + ; CHECK-NEXT: %mask:vmv0 = COPY $v0 + ; CHECK-NEXT: %avl1:gprnox0 = COPY $x8 + ; CHECK-NEXT: %avl2:gprnox0 = COPY $x9 + ; CHECK-NEXT: %true:vrnov0 = PseudoVADD_VV_M1_MASK $noreg, $noreg, $noreg, %mask, %avl1, 5 /* e32 */, 3 /* ta, ma */ + ; CHECK-NEXT: [[PseudoVMERGE_VVM_M1_:%[0-9]+]]:vrnov0 = PseudoVMERGE_VVM_M1 %pt, %false, %true, %mask, %avl2, 5 /* e32 */ + %pt:vrnov0 = COPY $v8 + %false:vr = COPY $v9 + %mask:vmv0 = COPY $v0 + %avl1:gprnox0 = COPY $x8 + %avl2:gprnox0 = COPY $x9 + %true:vrnov0 = PseudoVADD_VV_M1_MASK $noreg, $noreg, $noreg, %mask, %avl1, 5 /* e32 */, 3 /* ta, ma */ + %5:vrnov0 = PseudoVMERGE_VVM_M1 %pt, %false, %true, %mask, %avl2, 5 /* e32 */ +... +--- +# But we can convert this one because vmerge's avl being <= true's means we don't lose any false elements past avl. +name: preserve_false_avl_known_le +body: | + bb.0: + liveins: $v8, $v9, $v0 + ; CHECK-LABEL: name: preserve_false_avl_known_le + ; CHECK: liveins: $v8, $v9, $v0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: %pt:vr = COPY $v8 + ; CHECK-NEXT: %false:vrnov0 = COPY $v9 + ; CHECK-NEXT: %mask:vmv0 = COPY $v0 + ; CHECK-NEXT: %true:vrnov0 = PseudoVADD_VV_M1_MASK %false, $noreg, $noreg, %mask, 1, 5 /* e32 */, 3 /* ta, ma */ + ; CHECK-NEXT: [[PseudoVMV_V_V_M1_:%[0-9]+]]:vr = PseudoVMV_V_V_M1 %pt, %true, 1, 5 /* e32 */, 0 /* tu, mu */ + %pt:vrnov0 = COPY $v8 + %false:vr = COPY $v9 + %mask:vmv0 = COPY $v0 + %true:vrnov0 = PseudoVADD_VV_M1_MASK $noreg, $noreg, $noreg, %mask, 2, 5 /* e32 */, 3 /* ta, ma */ + %5:vrnov0 = PseudoVMERGE_VVM_M1 %pt, %false, %true, %mask, 1, 5 /* e32 */ diff --git a/llvm/test/CodeGen/RISCV/rvv/rvv-vmerge-to-vmv.ll b/llvm/test/CodeGen/RISCV/rvv/rvv-vmerge-to-vmv.ll index 3aeb4e8..9ffc84a 100644 --- a/llvm/test/CodeGen/RISCV/rvv/rvv-vmerge-to-vmv.ll +++ b/llvm/test/CodeGen/RISCV/rvv/rvv-vmerge-to-vmv.ll @@ -71,10 +71,31 @@ define <vscale x 8 x i64> @vpmerge_m8(<vscale x 8 x i64> %x, <vscale x 8 x i64> ret <vscale x 8 x i64> %1 } -declare <vscale x 1 x i8> @llvm.vp.merge.nxv1i8(<vscale x 1 x i1>, <vscale x 1 x i8>, <vscale x 1 x i8>, i32) -declare <vscale x 2 x i8> @llvm.vp.merge.nxv2i8(<vscale x 2 x i1>, <vscale x 2 x i8>, <vscale x 2 x i8>, i32) -declare <vscale x 4 x i8> @llvm.vp.merge.nxv4i8(<vscale x 4 x i1>, <vscale x 4 x i8>, <vscale x 4 x i8>, i32) -declare <vscale x 8 x i8> @llvm.vp.merge.nxv8i8(<vscale x 8 x i1>, <vscale x 8 x i8>, <vscale x 8 x i8>, i32) -declare <vscale x 8 x i16> @llvm.vp.merge.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>, <vscale x 8 x i16>, i32) -declare <vscale x 8 x i32> @llvm.vp.merge.nxv8i32(<vscale x 8 x i1>, <vscale x 8 x i32>, <vscale x 8 x i32>, i32) -declare <vscale x 8 x i64> @llvm.vp.merge.nxv8i64(<vscale x 8 x i1>, <vscale x 8 x i64>, <vscale x 8 x i64>, i32) +; Shouldn't be converted because vmerge adds back in elements from false past avl that would be lost if we converted to vmv.v.v +define <vscale x 2 x i32> @preserve_false(ptr %p, <vscale x 2 x i32> %pt, <vscale x 2 x i32> %false, <vscale x 2 x i1> %mask, i64 %avl1, i64 %avl2) { +; CHECK-LABEL: preserve_false: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma +; CHECK-NEXT: vmv1r.v v10, v9 +; CHECK-NEXT: vle32.v v10, (a0), v0.t +; CHECK-NEXT: vsetvli zero, a2, e32, m1, tu, ma +; CHECK-NEXT: vmerge.vvm v8, v9, v10, v0 +; CHECK-NEXT: ret + %true = call <vscale x 2 x i32> @llvm.riscv.vle.mask(<vscale x 2 x i32> %false, ptr %p, <vscale x 2 x i1> %mask, i64 %avl1, i64 3) + %res = call <vscale x 2 x i32> @llvm.riscv.vmerge(<vscale x 2 x i32> %pt, <vscale x 2 x i32> %false, <vscale x 2 x i32> %true, <vscale x 2 x i1> %mask, i64 %avl2) + ret <vscale x 2 x i32> %res +} + +; Can fold this because its avl is known to be <= than true, so no elements from false need to be introduced past avl. +define <vscale x 2 x i32> @preserve_false_avl_known_le(ptr %p, <vscale x 2 x i32> %pt, <vscale x 2 x i32> %false, <vscale x 2 x i1> %mask) { +; CHECK-LABEL: preserve_false_avl_known_le: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma +; CHECK-NEXT: vle32.v v9, (a0), v0.t +; CHECK-NEXT: vsetvli zero, zero, e32, m1, tu, ma +; CHECK-NEXT: vmv.v.v v8, v9 +; CHECK-NEXT: ret + %true = call <vscale x 2 x i32> @llvm.riscv.vle.mask(<vscale x 2 x i32> %false, ptr %p, <vscale x 2 x i1> %mask, i64 2, i64 3) + %res = call <vscale x 2 x i32> @llvm.riscv.vmerge(<vscale x 2 x i32> %pt, <vscale x 2 x i32> %false, <vscale x 2 x i32> %true, <vscale x 2 x i1> %mask, i64 1) + ret <vscale x 2 x i32> %res +} diff --git a/llvm/test/CodeGen/RISCV/rvv/vector-deinterleave-load.ll b/llvm/test/CodeGen/RISCV/rvv/vector-deinterleave-load.ll index 96a7b14..c4284bf 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vector-deinterleave-load.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vector-deinterleave-load.ll @@ -372,7 +372,7 @@ define { <vscale x 8 x i8>, <vscale x 8 x i8>, <vscale x 8 x i8> } @vector_deint ; CHECK-LABEL: vector_deinterleave_load_factor3: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a1, zero, e8, m1, ta, ma -; CHECK-NEXT: vlseg3e8.v v6, (a0) +; CHECK-NEXT: vlseg3e8.v v8, (a0) ; CHECK-NEXT: ret %vec = load <vscale x 24 x i8>, ptr %p %d0 = call {<vscale x 8 x i8>, <vscale x 8 x i8>, <vscale x 8 x i8>} @llvm.vector.deinterleave3(<vscale x 24 x i8> %vec) @@ -380,8 +380,8 @@ define { <vscale x 8 x i8>, <vscale x 8 x i8>, <vscale x 8 x i8> } @vector_deint %t1 = extractvalue {<vscale x 8 x i8>, <vscale x 8 x i8>, <vscale x 8 x i8>} %d0, 1 %t2 = extractvalue {<vscale x 8 x i8>, <vscale x 8 x i8>, <vscale x 8 x i8>} %d0, 2 %res0 = insertvalue { <vscale x 8 x i8>, <vscale x 8 x i8>, <vscale x 8 x i8> } poison, <vscale x 8 x i8> %t0, 0 - %res1 = insertvalue { <vscale x 8 x i8>, <vscale x 8 x i8>, <vscale x 8 x i8> } %res0, <vscale x 8 x i8> %t1, 0 - %res2 = insertvalue { <vscale x 8 x i8>, <vscale x 8 x i8>, <vscale x 8 x i8> } %res1, <vscale x 8 x i8> %t2, 0 + %res1 = insertvalue { <vscale x 8 x i8>, <vscale x 8 x i8>, <vscale x 8 x i8> } %res0, <vscale x 8 x i8> %t1, 1 + %res2 = insertvalue { <vscale x 8 x i8>, <vscale x 8 x i8>, <vscale x 8 x i8> } %res1, <vscale x 8 x i8> %t2, 2 ret { <vscale x 8 x i8>, <vscale x 8 x i8>, <vscale x 8 x i8> } %res2 } @@ -407,8 +407,9 @@ define { <vscale x 8 x i8>, <vscale x 8 x i8>, <vscale x 8 x i8>, <vscale x 8 x define <vscale x 8 x i8> @vector_deinterleave_load_factor4_oneactive(ptr %p) { ; CHECK-LABEL: vector_deinterleave_load_factor4_oneactive: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a1, zero, e8, m1, ta, ma -; CHECK-NEXT: vlseg4e8.v v8, (a0) +; CHECK-NEXT: li a1, 4 +; CHECK-NEXT: vsetvli a2, zero, e8, m1, ta, ma +; CHECK-NEXT: vlse8.v v8, (a0), a1 ; CHECK-NEXT: ret %vec = load <vscale x 32 x i8>, ptr %p %d0 = call { <vscale x 8 x i8>, <vscale x 8 x i8>, <vscale x 8 x i8>, <vscale x 8 x i8> } @llvm.vector.deinterleave4(<vscale x 32 x i8> %vec) @@ -419,8 +420,10 @@ define <vscale x 8 x i8> @vector_deinterleave_load_factor4_oneactive(ptr %p) { define <vscale x 8 x i8> @vector_deinterleave_load_factor4_oneactive2(ptr %p) { ; CHECK-LABEL: vector_deinterleave_load_factor4_oneactive2: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a1, zero, e8, m1, ta, ma -; CHECK-NEXT: vlseg4e8.v v5, (a0) +; CHECK-NEXT: addi a0, a0, 3 +; CHECK-NEXT: li a1, 4 +; CHECK-NEXT: vsetvli a2, zero, e8, m1, ta, ma +; CHECK-NEXT: vlse8.v v8, (a0), a1 ; CHECK-NEXT: ret %vec = load <vscale x 32 x i8>, ptr %p %d0 = call { <vscale x 8 x i8>, <vscale x 8 x i8>, <vscale x 8 x i8>, <vscale x 8 x i8> } @llvm.vector.deinterleave4(<vscale x 32 x i8> %vec) @@ -550,8 +553,8 @@ define {<vscale x 16 x i8>, <vscale x 16 x i8>} @masked_load_factor2(ptr %p) { ret {<vscale x 16 x i8>, <vscale x 16 x i8>} %deinterleaved.results } -define {<vscale x 8 x i8>, <vscale x 8 x i8>, <vscale x 8 x i8>, <vscale x 8 x i8>} @masked_loat_factor4(ptr %p) { -; CHECK-LABEL: masked_loat_factor4: +define {<vscale x 8 x i8>, <vscale x 8 x i8>, <vscale x 8 x i8>, <vscale x 8 x i8>} @masked_load_factor4(ptr %p) { +; CHECK-LABEL: masked_load_factor4: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a1, zero, e8, m1, ta, ma ; CHECK-NEXT: vlseg4e8.v v8, (a0) @@ -561,8 +564,8 @@ define {<vscale x 8 x i8>, <vscale x 8 x i8>, <vscale x 8 x i8>, <vscale x 8 x i ret {<vscale x 8 x i8>, <vscale x 8 x i8>, <vscale x 8 x i8>, <vscale x 8 x i8>} %deinterleaved.results } -define {<vscale x 8 x i8>, <vscale x 8 x i8>, <vscale x 8 x i8>, <vscale x 8 x i8>} @masked_loat_factor4_mask(ptr %p, <vscale x 8 x i1> %mask) { -; CHECK-LABEL: masked_loat_factor4_mask: +define {<vscale x 8 x i8>, <vscale x 8 x i8>, <vscale x 8 x i8>, <vscale x 8 x i8>} @masked_load_factor4_mask(ptr %p, <vscale x 8 x i1> %mask) { +; CHECK-LABEL: masked_load_factor4_mask: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a1, zero, e8, m1, ta, ma ; CHECK-NEXT: vlseg4e8.v v8, (a0), v0.t @@ -575,8 +578,8 @@ define {<vscale x 8 x i8>, <vscale x 8 x i8>, <vscale x 8 x i8>, <vscale x 8 x i ; Negative test - some of the deinterleaved elements might come from the ; passthru not the load -define {<vscale x 8 x i8>, <vscale x 8 x i8>, <vscale x 8 x i8>, <vscale x 8 x i8>} @masked_loat_factor4_passthru(ptr %p, <vscale x 8 x i1> %mask, <vscale x 32 x i8> %passthru) { -; CHECK-LABEL: masked_loat_factor4_passthru: +define {<vscale x 8 x i8>, <vscale x 8 x i8>, <vscale x 8 x i8>, <vscale x 8 x i8>} @masked_load_factor4_passthru(ptr %p, <vscale x 8 x i1> %mask, <vscale x 32 x i8> %passthru) { +; CHECK-LABEL: masked_load_factor4_passthru: ; CHECK: # %bb.0: ; CHECK-NEXT: addi sp, sp, -16 ; CHECK-NEXT: .cfi_def_cfa_offset 16 @@ -634,3 +637,19 @@ define {<vscale x 8 x i8>, <vscale x 8 x i8>, <vscale x 8 x i8>, <vscale x 8 x i %deinterleaved.results = call {<vscale x 8 x i8>, <vscale x 8 x i8>, <vscale x 8 x i8>, <vscale x 8 x i8>} @llvm.vector.deinterleave4.nxv32i8(<vscale x 32 x i8> %vec) ret {<vscale x 8 x i8>, <vscale x 8 x i8>, <vscale x 8 x i8>, <vscale x 8 x i8>} %deinterleaved.results } + +define { <8 x float>, <8 x float> } @deinterleave_unrelated(<16 x float> %arg) { +; CHECK-LABEL: deinterleave_unrelated: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, ma +; CHECK-NEXT: vfabs.v v12, v8 +; CHECK-NEXT: li a0, 32 +; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; CHECK-NEXT: vnsrl.wx v10, v12, a0 +; CHECK-NEXT: vnsrl.wi v8, v12, 0 +; CHECK-NEXT: ret +entry: + %abs = call <16 x float> @llvm.fabs(<16 x float> %arg) + %res = call { <8 x float>, <8 x float> } @llvm.vector.deinterleave2.v16f32(<16 x float> %abs) + ret { <8 x float>, <8 x float> } %res +} diff --git a/llvm/test/CodeGen/RISCV/rvv/vector-deinterleave.ll b/llvm/test/CodeGen/RISCV/rvv/vector-deinterleave.ll index 0a96e4f..ac9f263 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vector-deinterleave.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vector-deinterleave.ll @@ -3712,8 +3712,9 @@ define <vscale x 1 x float> @vector_deinterleave_nxv1f32_nxv8f32_oneactive(<vsca ; CHECK-NEXT: sub sp, sp, a0 ; CHECK-NEXT: addi a0, sp, 16 ; CHECK-NEXT: vs4r.v v8, (a0) -; CHECK-NEXT: vsetvli a1, zero, e32, mf2, ta, ma -; CHECK-NEXT: vlseg8e32.v v8, (a0) +; CHECK-NEXT: li a1, 32 +; CHECK-NEXT: vsetvli a2, zero, e32, mf2, ta, ma +; CHECK-NEXT: vlse32.v v8, (a0), a1 ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 2 ; CHECK-NEXT: add sp, sp, a0 @@ -3732,9 +3733,11 @@ define <vscale x 1 x float> @vector_deinterleave_nxv1f32_nxv8f32_oneactive2(<vsc ; CHECK-NEXT: slli a0, a0, 2 ; CHECK-NEXT: sub sp, sp, a0 ; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: addi a1, sp, 36 ; CHECK-NEXT: vs4r.v v8, (a0) -; CHECK-NEXT: vsetvli a1, zero, e32, mf2, ta, ma -; CHECK-NEXT: vlseg8e32.v v3, (a0) +; CHECK-NEXT: li a0, 32 +; CHECK-NEXT: vsetvli a2, zero, e32, mf2, ta, ma +; CHECK-NEXT: vlse32.v v8, (a1), a0 ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 2 ; CHECK-NEXT: add sp, sp, a0 @@ -3744,3 +3747,61 @@ define <vscale x 1 x float> @vector_deinterleave_nxv1f32_nxv8f32_oneactive2(<vsc %ext = extractvalue {<vscale x 1 x float>, <vscale x 1 x float>, <vscale x 1 x float>, <vscale x 1 x float>, <vscale x 1 x float>, <vscale x 1 x float>, <vscale x 1 x float>, <vscale x 1 x float>} %res, 5 ret <vscale x 1 x float> %ext } + + +define { <8 x float>, <8 x float> } @interleave_deinterleave2(<8 x float> %a, <8 x float> %b) { +; V-LABEL: interleave_deinterleave2: +; V: # %bb.0: # %entry +; V-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; V-NEXT: vwaddu.vv v12, v8, v10 +; V-NEXT: li a0, -1 +; V-NEXT: vwmaccu.vx v12, a0, v10 +; V-NEXT: li a0, 32 +; V-NEXT: vnsrl.wx v10, v12, a0 +; V-NEXT: vnsrl.wi v8, v12, 0 +; V-NEXT: ret +; +; ZIP-LABEL: interleave_deinterleave2: +; ZIP: # %bb.0: # %entry +; ZIP-NEXT: vsetivli zero, 16, e32, m4, ta, ma +; ZIP-NEXT: vmv2r.v v12, v10 +; ZIP-NEXT: li a0, 32 +; ZIP-NEXT: ri.vzip2a.vv v16, v8, v12 +; ZIP-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; ZIP-NEXT: vnsrl.wx v10, v16, a0 +; ZIP-NEXT: vnsrl.wi v8, v16, 0 +; ZIP-NEXT: ret +entry: + %0 = call <16 x float> @llvm.vector.interleave2.v16f32(<8 x float> %a, <8 x float> %b) + %1 = call { <8 x float>, <8 x float> } @llvm.vector.deinterleave2.v16f32(<16 x float> %0) + ret { <8 x float>, <8 x float> } %1 +} + +define <16 x float> @deinterleave_interleave2(<16 x float> %arg) { +; V-LABEL: deinterleave_interleave2: +; V: # %bb.0: # %entry +; V-NEXT: li a0, 32 +; V-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; V-NEXT: vnsrl.wi v12, v8, 0 +; V-NEXT: vnsrl.wx v14, v8, a0 +; V-NEXT: vwaddu.vv v8, v12, v14 +; V-NEXT: li a0, -1 +; V-NEXT: vwmaccu.vx v8, a0, v14 +; V-NEXT: ret +; +; ZIP-LABEL: deinterleave_interleave2: +; ZIP: # %bb.0: # %entry +; ZIP-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; ZIP-NEXT: vnsrl.wi v12, v8, 0 +; ZIP-NEXT: li a0, 32 +; ZIP-NEXT: vnsrl.wx v16, v8, a0 +; ZIP-NEXT: vsetivli zero, 16, e32, m4, ta, ma +; ZIP-NEXT: ri.vzip2a.vv v8, v12, v16 +; ZIP-NEXT: ret +entry: + %0 = call { <8 x float>, <8 x float> } @llvm.vector.deinterleave2.v16f32(<16 x float> %arg) + %a = extractvalue { <8 x float>, <8 x float> } %0, 0 + %b = extractvalue { <8 x float>, <8 x float> } %0, 1 + %res = call <16 x float> @llvm.vector.interleave2.v16f32(<8 x float> %a, <8 x float> %b) + ret <16 x float> %res +} diff --git a/llvm/test/CodeGen/RISCV/rvv/vector-interleave-fixed.ll b/llvm/test/CodeGen/RISCV/rvv/vector-interleave-fixed.ll index 3dc83d5..38d38f7 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vector-interleave-fixed.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vector-interleave-fixed.ll @@ -1636,3 +1636,49 @@ define <8 x half> @vector_interleave8_v8f16_v1f16(<1 x half> %a, <1 x half> %b, %res = call <8 x half> @llvm.vector.interleave8.v8f16(<1 x half> %a, <1 x half> %b, <1 x half> %c, <1 x half> %d, <1 x half> %e, <1 x half> %f, <1 x half> %g, <1 x half> %h) ret <8 x half> %res } + +define <8 x i16> @interleave4_const_splat_v8i16(<2 x i16> %a) { +; CHECK-LABEL: interleave4_const_splat_v8i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; CHECK-NEXT: vmv.v.i v8, 3 +; CHECK-NEXT: ret +; +; ZVBB-LABEL: interleave4_const_splat_v8i16: +; ZVBB: # %bb.0: +; ZVBB-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; ZVBB-NEXT: vmv.v.i v8, 3 +; ZVBB-NEXT: ret +; +; ZIP-LABEL: interleave4_const_splat_v8i16: +; ZIP: # %bb.0: +; ZIP-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; ZIP-NEXT: vmv.v.i v8, 3 +; ZIP-NEXT: ret + %retval = call <8 x i16> @llvm.vector.interleave4.v8i16(<2 x i16> splat(i16 3), <2 x i16> splat(i16 3), <2 x i16> splat(i16 3), <2 x i16> splat(i16 3)) + ret <8 x i16> %retval +} + +define <8 x i16> @interleave4_same_nonconst_splat_v8i16(i16 %a) { +; CHECK-LABEL: interleave4_same_nonconst_splat_v8i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; CHECK-NEXT: vmv.v.x v8, a0 +; CHECK-NEXT: ret +; +; ZVBB-LABEL: interleave4_same_nonconst_splat_v8i16: +; ZVBB: # %bb.0: +; ZVBB-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; ZVBB-NEXT: vmv.v.x v8, a0 +; ZVBB-NEXT: ret +; +; ZIP-LABEL: interleave4_same_nonconst_splat_v8i16: +; ZIP: # %bb.0: +; ZIP-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; ZIP-NEXT: vmv.v.x v8, a0 +; ZIP-NEXT: ret + %ins = insertelement <2 x i16> poison, i16 %a, i32 0 + %splat = shufflevector <2 x i16> %ins, <2 x i16> poison, <2 x i32> zeroinitializer + %retval = call <8 x i16> @llvm.vector.interleave4.v8i16(<2 x i16> %splat, <2 x i16> %splat, <2 x i16> %splat, <2 x i16> %splat) + ret <8 x i16> %retval +} diff --git a/llvm/test/CodeGen/RISCV/rvv/vector-interleave-store.ll b/llvm/test/CodeGen/RISCV/rvv/vector-interleave-store.ll index af55aaa..2e2f12a 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vector-interleave-store.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vector-interleave-store.ll @@ -303,3 +303,62 @@ define void @vector_interleave_store_factor8(<vscale x 2 x i32> %a, <vscale x 2 store <vscale x 16 x i32> %v, ptr %p ret void } + +define void @masked_store_factor3(<vscale x 2 x i32> %a, <vscale x 2 x i32> %b, <vscale x 2 x i32> %c, ptr %p) { +; CHECK-LABEL: masked_store_factor3: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e32, m1, ta, ma +; CHECK-NEXT: vsseg3e32.v v8, (a0) +; CHECK-NEXT: ret + %v = call <vscale x 6 x i32> @llvm.vector.interleave3(<vscale x 2 x i32> %a, <vscale x 2 x i32> %b, <vscale x 2 x i32> %c) + call void @llvm.masked.store(<vscale x 6 x i32> %v, ptr %p, i32 4, <vscale x 6 x i1> splat (i1 true)) + ret void +} + +define void @masked_store_factor3_masked(<vscale x 2 x i32> %a, <vscale x 2 x i32> %b, <vscale x 2 x i32> %c, ptr %p, <vscale x 2 x i1> %m) { +; CHECK-LABEL: masked_store_factor3_masked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e32, m1, ta, ma +; CHECK-NEXT: vsseg3e32.v v8, (a0), v0.t +; CHECK-NEXT: ret + %interleaved.mask = call <vscale x 6 x i1> @llvm.vector.interleave3(<vscale x 2 x i1> %m, <vscale x 2 x i1> %m, <vscale x 2 x i1> %m) + %v = call <vscale x 6 x i32> @llvm.vector.interleave3(<vscale x 2 x i32> %a, <vscale x 2 x i32> %b, <vscale x 2 x i32> %c) + call void @llvm.masked.store(<vscale x 6 x i32> %v, ptr %p, i32 4, <vscale x 6 x i1> %interleaved.mask) + ret void +} + +define void @store_factor2_oneactive(<vscale x 2 x i32> %a, ptr %p) { +; CHECK-LABEL: store_factor2_oneactive: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e32, m1, ta, ma +; CHECK-NEXT: vsseg2e32.v v8, (a0) +; CHECK-NEXT: ret + %v = call <vscale x 4 x i32> @llvm.vector.interleave2(<vscale x 2 x i32> %a, <vscale x 2 x i32> poison) + store <vscale x 4 x i32> %v, ptr %p + ret void +} + +define void @store_factor3_oneactive(<vscale x 2 x i32> %a, ptr %p) { +; CHECK-LABEL: store_factor3_oneactive: +; CHECK: # %bb.0: +; CHECK-NEXT: li a1, 12 +; CHECK-NEXT: vsetvli a2, zero, e32, m1, ta, ma +; CHECK-NEXT: vsse32.v v8, (a0), a1 +; CHECK-NEXT: ret + %v = call <vscale x 6 x i32> @llvm.vector.interleave3(<vscale x 2 x i32> %a, <vscale x 2 x i32> poison, <vscale x 2 x i32> poison) + store <vscale x 6 x i32> %v, ptr %p + ret void +} + +define void @store_factor7_oneactive(<vscale x 2 x i32> %a, ptr %p) { +; CHECK-LABEL: store_factor7_oneactive: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a0, a0, 24 +; CHECK-NEXT: li a1, 28 +; CHECK-NEXT: vsetvli a2, zero, e32, m1, ta, ma +; CHECK-NEXT: vsse32.v v8, (a0), a1 +; CHECK-NEXT: ret + %v = call <vscale x 14 x i32> @llvm.vector.interleave7(<vscale x 2 x i32> poison, <vscale x 2 x i32> poison, <vscale x 2 x i32> poison, <vscale x 2 x i32> poison, <vscale x 2 x i32> poison, <vscale x 2 x i32> poison, <vscale x 2 x i32> %a) + store <vscale x 14 x i32> %v, ptr %p + ret void +} diff --git a/llvm/test/CodeGen/RISCV/rvv/vector-interleave.ll b/llvm/test/CodeGen/RISCV/rvv/vector-interleave.ll index 01cc5c5..ee38257 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vector-interleave.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vector-interleave.ll @@ -14947,3 +14947,147 @@ define <vscale x 16 x double> @vector_interleave_nxv16f64_nxv2f64(<vscale x 2 x %res = call <vscale x 16 x double> @llvm.vector.interleave8.nxv16f64(<vscale x 2 x double> %v0, <vscale x 2 x double> %v1, <vscale x 2 x double> %v2, <vscale x 2 x double> %v3, <vscale x 2 x double> %v4, <vscale x 2 x double> %v5, <vscale x 2 x double> %v6, <vscale x 2 x double> %v7) ret <vscale x 16 x double> %res } + +define <vscale x 4 x i16> @interleave2_same_const_splat_nxv4i16() { +; CHECK-LABEL: interleave2_same_const_splat_nxv4i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; CHECK-NEXT: vmv.v.i v8, 3 +; CHECK-NEXT: ret +; +; ZVBB-LABEL: interleave2_same_const_splat_nxv4i16: +; ZVBB: # %bb.0: +; ZVBB-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; ZVBB-NEXT: vmv.v.i v8, 3 +; ZVBB-NEXT: ret + %retval = call <vscale x 4 x i16> @llvm.vector.interleave2.nxv4i16(<vscale x 2 x i16> splat(i16 3), <vscale x 2 x i16> splat(i16 3)) + ret <vscale x 4 x i16> %retval +} + +define <vscale x 4 x i16> @interleave2_diff_const_splat_nxv4i16() { +; V-LABEL: interleave2_diff_const_splat_nxv4i16: +; V: # %bb.0: +; V-NEXT: vsetvli a0, zero, e16, mf2, ta, ma +; V-NEXT: vmv.v.i v9, 3 +; V-NEXT: li a0, 4 +; V-NEXT: vmv.v.i v10, -1 +; V-NEXT: vwaddu.vx v8, v9, a0 +; V-NEXT: vwmaccu.vx v8, a0, v10 +; V-NEXT: csrr a0, vlenb +; V-NEXT: srli a0, a0, 2 +; V-NEXT: vsetvli a1, zero, e16, m1, ta, ma +; V-NEXT: vslidedown.vx v9, v8, a0 +; V-NEXT: vslideup.vx v8, v9, a0 +; V-NEXT: ret +; +; ZVBB-LABEL: interleave2_diff_const_splat_nxv4i16: +; ZVBB: # %bb.0: +; ZVBB-NEXT: vsetvli a0, zero, e16, mf2, ta, ma +; ZVBB-NEXT: vmv.v.i v8, 4 +; ZVBB-NEXT: li a0, 3 +; ZVBB-NEXT: vwsll.vi v9, v8, 16 +; ZVBB-NEXT: vwaddu.wx v8, v9, a0 +; ZVBB-NEXT: csrr a0, vlenb +; ZVBB-NEXT: srli a0, a0, 2 +; ZVBB-NEXT: vsetvli a1, zero, e16, m1, ta, ma +; ZVBB-NEXT: vslidedown.vx v9, v8, a0 +; ZVBB-NEXT: vslideup.vx v8, v9, a0 +; ZVBB-NEXT: ret +; +; ZIP-LABEL: interleave2_diff_const_splat_nxv4i16: +; ZIP: # %bb.0: +; ZIP-NEXT: vsetvli a0, zero, e16, mf2, ta, ma +; ZIP-NEXT: vmv.v.i v9, 4 +; ZIP-NEXT: vmv.v.i v10, 3 +; ZIP-NEXT: csrr a0, vlenb +; ZIP-NEXT: ri.vzip2b.vv v11, v10, v9 +; ZIP-NEXT: ri.vzip2a.vv v8, v10, v9 +; ZIP-NEXT: srli a0, a0, 2 +; ZIP-NEXT: vsetvli a1, zero, e16, m1, ta, ma +; ZIP-NEXT: vslideup.vx v8, v11, a0 +; ZIP-NEXT: ret + %retval = call <vscale x 4 x i16> @llvm.vector.interleave2.v4i16(<vscale x 2 x i16> splat(i16 3), <vscale x 2 x i16> splat(i16 4)) + ret <vscale x 4 x i16> %retval +} + +define <vscale x 4 x i16> @interleave2_same_nonconst_splat_nxv4i16(i16 %a) { +; CHECK-LABEL: interleave2_same_nonconst_splat_nxv4i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, ma +; CHECK-NEXT: vmv.v.x v8, a0 +; CHECK-NEXT: ret +; +; ZVBB-LABEL: interleave2_same_nonconst_splat_nxv4i16: +; ZVBB: # %bb.0: +; ZVBB-NEXT: vsetvli a1, zero, e16, m1, ta, ma +; ZVBB-NEXT: vmv.v.x v8, a0 +; ZVBB-NEXT: ret + %ins = insertelement <vscale x 2 x i16> poison, i16 %a, i32 0 + %splat = shufflevector <vscale x 2 x i16> %ins, <vscale x 2 x i16> poison, <vscale x 2 x i32> zeroinitializer + %retval = call <vscale x 4 x i16> @llvm.vector.interleave2.nxv4i16(<vscale x 2 x i16> %splat, <vscale x 2 x i16> %splat) + ret <vscale x 4 x i16> %retval +} + +define <vscale x 4 x i16> @interleave2_diff_nonconst_splat_nxv4i16(i16 %a, i16 %b) { +; V-LABEL: interleave2_diff_nonconst_splat_nxv4i16: +; V: # %bb.0: +; V-NEXT: vsetvli a2, zero, e16, mf2, ta, ma +; V-NEXT: vmv.v.x v9, a0 +; V-NEXT: vmv.v.i v10, -1 +; V-NEXT: csrr a0, vlenb +; V-NEXT: vwaddu.vx v8, v9, a1 +; V-NEXT: vwmaccu.vx v8, a1, v10 +; V-NEXT: srli a0, a0, 2 +; V-NEXT: vsetvli a1, zero, e16, m1, ta, ma +; V-NEXT: vslidedown.vx v9, v8, a0 +; V-NEXT: vslideup.vx v8, v9, a0 +; V-NEXT: ret +; +; ZVBB-LABEL: interleave2_diff_nonconst_splat_nxv4i16: +; ZVBB: # %bb.0: +; ZVBB-NEXT: vsetvli a2, zero, e16, mf2, ta, ma +; ZVBB-NEXT: vmv.v.x v8, a1 +; ZVBB-NEXT: csrr a1, vlenb +; ZVBB-NEXT: vwsll.vi v9, v8, 16 +; ZVBB-NEXT: vwaddu.wx v8, v9, a0 +; ZVBB-NEXT: srli a1, a1, 2 +; ZVBB-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; ZVBB-NEXT: vslidedown.vx v9, v8, a1 +; ZVBB-NEXT: vslideup.vx v8, v9, a1 +; ZVBB-NEXT: ret +; +; ZIP-LABEL: interleave2_diff_nonconst_splat_nxv4i16: +; ZIP: # %bb.0: +; ZIP-NEXT: vsetvli a2, zero, e16, mf2, ta, ma +; ZIP-NEXT: vmv.v.x v9, a0 +; ZIP-NEXT: vmv.v.x v10, a1 +; ZIP-NEXT: csrr a0, vlenb +; ZIP-NEXT: ri.vzip2b.vv v11, v9, v10 +; ZIP-NEXT: ri.vzip2a.vv v8, v9, v10 +; ZIP-NEXT: srli a0, a0, 2 +; ZIP-NEXT: vsetvli a1, zero, e16, m1, ta, ma +; ZIP-NEXT: vslideup.vx v8, v11, a0 +; ZIP-NEXT: ret + %ins1 = insertelement <vscale x 2 x i16> poison, i16 %a, i32 0 + %splat1 = shufflevector <vscale x 2 x i16> %ins1, <vscale x 2 x i16> poison, <vscale x 2 x i32> zeroinitializer + %ins2 = insertelement <vscale x 2 x i16> poison, i16 %b, i32 0 + %splat2 = shufflevector <vscale x 2 x i16> %ins2, <vscale x 2 x i16> poison, <vscale x 2 x i32> zeroinitializer + %retval = call <vscale x 4 x i16> @llvm.vector.interleave2.nxv4i16(<vscale x 2 x i16> %splat1, <vscale x 2 x i16> %splat2) + ret <vscale x 4 x i16> %retval +} + +define <vscale x 8 x i16> @interleave4_same_const_splat_nxv8i16() { +; CHECK-LABEL: interleave4_same_const_splat_nxv8i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma +; CHECK-NEXT: vmv.v.i v8, 3 +; CHECK-NEXT: ret +; +; ZVBB-LABEL: interleave4_same_const_splat_nxv8i16: +; ZVBB: # %bb.0: +; ZVBB-NEXT: vsetvli a0, zero, e16, m2, ta, ma +; ZVBB-NEXT: vmv.v.i v8, 3 +; ZVBB-NEXT: ret + %retval = call <vscale x 8 x i16> @llvm.vector.interleave4.nxv8i16(<vscale x 2 x i16> splat(i16 3), <vscale x 2 x i16> splat(i16 3), <vscale x 2 x i16> splat(i16 3), <vscale x 2 x i16> splat(i16 3)) + ret <vscale x 8 x i16> %retval +} diff --git a/llvm/test/CodeGen/RISCV/rvv/vl-opt-instrs.ll b/llvm/test/CodeGen/RISCV/rvv/vl-opt-instrs.ll index 4883a4d..dbe0ecc 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vl-opt-instrs.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vl-opt-instrs.ll @@ -1,3159 +1,1907 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 -; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v,+zvbb,+zvfbfwma -riscv-enable-vl-optimizer=false -verify-machineinstrs | FileCheck %s --check-prefixes=NOVLOPT -; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v,+zvbb,+zvfbfwma -riscv-enable-vl-optimizer=false -verify-machineinstrs | FileCheck %s --check-prefixes=NOVLOPT -; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v,+zvbb,+zvfbfwma -riscv-enable-vl-optimizer -verify-machineinstrs | FileCheck %s --check-prefixes=VLOPT -; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v,+zvbb,+zvfbfwma -riscv-enable-vl-optimizer -verify-machineinstrs | FileCheck %s --check-prefixes=VLOPT +; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v,+zvbb,+zvfbfwma -verify-machineinstrs | FileCheck %s +; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v,+zvbb,+zvfbfwma -verify-machineinstrs | FileCheck %s ; The purpose of this file is to check the behavior of specific instructions as it relates to the VL optimizer define <vscale x 4 x i32> @vadd_vi(<vscale x 4 x i32> %a, iXLen %vl) { -; NOVLOPT-LABEL: vadd_vi: -; NOVLOPT: # %bb.0: -; NOVLOPT-NEXT: vsetvli a1, zero, e32, m2, ta, ma -; NOVLOPT-NEXT: vadd.vi v10, v8, 5 -; NOVLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; NOVLOPT-NEXT: vadd.vv v8, v10, v8 -; NOVLOPT-NEXT: ret -; -; VLOPT-LABEL: vadd_vi: -; VLOPT: # %bb.0: -; VLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; VLOPT-NEXT: vadd.vi v10, v8, 5 -; VLOPT-NEXT: vadd.vv v8, v10, v8 -; VLOPT-NEXT: ret +; CHECK-LABEL: vadd_vi: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; CHECK-NEXT: vadd.vi v10, v8, 5 +; CHECK-NEXT: vadd.vv v8, v10, v8 +; CHECK-NEXT: ret %1 = call <vscale x 4 x i32> @llvm.riscv.vadd.nxv4i32.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i32> %a, i32 5, iXLen -1) %2 = call <vscale x 4 x i32> @llvm.riscv.vadd.nxv4i32.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i32> %1, <vscale x 4 x i32> %a, iXLen %vl) ret <vscale x 4 x i32> %2 } define <vscale x 4 x i32> @vadd_vv(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b, iXLen %vl) { -; NOVLOPT-LABEL: vadd_vv: -; NOVLOPT: # %bb.0: -; NOVLOPT-NEXT: vsetvli a1, zero, e32, m2, ta, ma -; NOVLOPT-NEXT: vadd.vv v8, v8, v10 -; NOVLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; NOVLOPT-NEXT: vadd.vv v8, v8, v10 -; NOVLOPT-NEXT: ret -; -; VLOPT-LABEL: vadd_vv: -; VLOPT: # %bb.0: -; VLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; VLOPT-NEXT: vadd.vv v8, v8, v10 -; VLOPT-NEXT: vadd.vv v8, v8, v10 -; VLOPT-NEXT: ret +; CHECK-LABEL: vadd_vv: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; CHECK-NEXT: vadd.vv v8, v8, v10 +; CHECK-NEXT: vadd.vv v8, v8, v10 +; CHECK-NEXT: ret %1 = call <vscale x 4 x i32> @llvm.riscv.vadd.nxv4i32.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b, iXLen -1) %2 = call <vscale x 4 x i32> @llvm.riscv.vadd.nxv4i32.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i32> %1, <vscale x 4 x i32> %b, iXLen %vl) ret <vscale x 4 x i32> %2 } define <vscale x 4 x i32> @vadd_vx(<vscale x 4 x i32> %a, i32 %b, iXLen %vl) { -; NOVLOPT-LABEL: vadd_vx: -; NOVLOPT: # %bb.0: -; NOVLOPT-NEXT: vsetvli a2, zero, e32, m2, ta, ma -; NOVLOPT-NEXT: vadd.vx v10, v8, a0 -; NOVLOPT-NEXT: vsetvli zero, a1, e32, m2, ta, ma -; NOVLOPT-NEXT: vadd.vv v8, v10, v8 -; NOVLOPT-NEXT: ret -; -; VLOPT-LABEL: vadd_vx: -; VLOPT: # %bb.0: -; VLOPT-NEXT: vsetvli zero, a1, e32, m2, ta, ma -; VLOPT-NEXT: vadd.vx v10, v8, a0 -; VLOPT-NEXT: vadd.vv v8, v10, v8 -; VLOPT-NEXT: ret +; CHECK-LABEL: vadd_vx: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, ma +; CHECK-NEXT: vadd.vx v10, v8, a0 +; CHECK-NEXT: vadd.vv v8, v10, v8 +; CHECK-NEXT: ret %1 = call <vscale x 4 x i32> @llvm.riscv.vadd.nxv4i32.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i32> %a, i32 %b, iXLen -1) %2 = call <vscale x 4 x i32> @llvm.riscv.vadd.nxv4i32.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i32> %1, <vscale x 4 x i32> %a, iXLen %vl) ret <vscale x 4 x i32> %2 } define <vscale x 4 x i32> @vsub_vv(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b, iXLen %vl) { -; NOVLOPT-LABEL: vsub_vv: -; NOVLOPT: # %bb.0: -; NOVLOPT-NEXT: vsetvli a1, zero, e32, m2, ta, ma -; NOVLOPT-NEXT: vsub.vv v8, v8, v10 -; NOVLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; NOVLOPT-NEXT: vadd.vv v8, v8, v10 -; NOVLOPT-NEXT: ret -; -; VLOPT-LABEL: vsub_vv: -; VLOPT: # %bb.0: -; VLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; VLOPT-NEXT: vsub.vv v8, v8, v10 -; VLOPT-NEXT: vadd.vv v8, v8, v10 -; VLOPT-NEXT: ret +; CHECK-LABEL: vsub_vv: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; CHECK-NEXT: vsub.vv v8, v8, v10 +; CHECK-NEXT: vadd.vv v8, v8, v10 +; CHECK-NEXT: ret %1 = call <vscale x 4 x i32> @llvm.riscv.vsub.nxv4i32.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b, iXLen -1) %2 = call <vscale x 4 x i32> @llvm.riscv.vadd.nxv4i32.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i32> %1, <vscale x 4 x i32> %b, iXLen %vl) ret <vscale x 4 x i32> %2 } define <vscale x 4 x i32> @vsub_vx(<vscale x 4 x i32> %a, i32 %b, iXLen %vl) { -; NOVLOPT-LABEL: vsub_vx: -; NOVLOPT: # %bb.0: -; NOVLOPT-NEXT: vsetvli a2, zero, e32, m2, ta, ma -; NOVLOPT-NEXT: vsub.vx v10, v8, a0 -; NOVLOPT-NEXT: vsetvli zero, a1, e32, m2, ta, ma -; NOVLOPT-NEXT: vadd.vv v8, v10, v8 -; NOVLOPT-NEXT: ret -; -; VLOPT-LABEL: vsub_vx: -; VLOPT: # %bb.0: -; VLOPT-NEXT: vsetvli zero, a1, e32, m2, ta, ma -; VLOPT-NEXT: vsub.vx v10, v8, a0 -; VLOPT-NEXT: vadd.vv v8, v10, v8 -; VLOPT-NEXT: ret +; CHECK-LABEL: vsub_vx: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, ma +; CHECK-NEXT: vsub.vx v10, v8, a0 +; CHECK-NEXT: vadd.vv v8, v10, v8 +; CHECK-NEXT: ret %1 = call <vscale x 4 x i32> @llvm.riscv.vsub.nxv4i32.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i32> %a, i32 %b, iXLen -1) %2 = call <vscale x 4 x i32> @llvm.riscv.vadd.nxv4i32.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i32> %1, <vscale x 4 x i32> %a, iXLen %vl) ret <vscale x 4 x i32> %2 } define <vscale x 4 x i32> @vrsub_vi(<vscale x 4 x i32> %a, iXLen %vl) { -; NOVLOPT-LABEL: vrsub_vi: -; NOVLOPT: # %bb.0: -; NOVLOPT-NEXT: vsetvli a1, zero, e32, m2, ta, ma -; NOVLOPT-NEXT: vrsub.vi v10, v8, 5 -; NOVLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; NOVLOPT-NEXT: vadd.vv v8, v10, v8 -; NOVLOPT-NEXT: ret -; -; VLOPT-LABEL: vrsub_vi: -; VLOPT: # %bb.0: -; VLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; VLOPT-NEXT: vrsub.vi v10, v8, 5 -; VLOPT-NEXT: vadd.vv v8, v10, v8 -; VLOPT-NEXT: ret +; CHECK-LABEL: vrsub_vi: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; CHECK-NEXT: vrsub.vi v10, v8, 5 +; CHECK-NEXT: vadd.vv v8, v10, v8 +; CHECK-NEXT: ret %1 = call <vscale x 4 x i32> @llvm.riscv.vrsub.nxv4i32.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i32> %a, i32 5, iXLen -1) %2 = call <vscale x 4 x i32> @llvm.riscv.vadd.nxv4i32.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i32> %1, <vscale x 4 x i32> %a, iXLen %vl) ret <vscale x 4 x i32> %2 } define <vscale x 4 x i32> @vrsub_vx(<vscale x 4 x i32> %a, i32 %b, iXLen %vl) { -; NOVLOPT-LABEL: vrsub_vx: -; NOVLOPT: # %bb.0: -; NOVLOPT-NEXT: vsetvli a2, zero, e32, m2, ta, ma -; NOVLOPT-NEXT: vrsub.vx v10, v8, a0 -; NOVLOPT-NEXT: vsetvli zero, a1, e32, m2, ta, ma -; NOVLOPT-NEXT: vadd.vv v8, v10, v8 -; NOVLOPT-NEXT: ret -; -; VLOPT-LABEL: vrsub_vx: -; VLOPT: # %bb.0: -; VLOPT-NEXT: vsetvli zero, a1, e32, m2, ta, ma -; VLOPT-NEXT: vrsub.vx v10, v8, a0 -; VLOPT-NEXT: vadd.vv v8, v10, v8 -; VLOPT-NEXT: ret +; CHECK-LABEL: vrsub_vx: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, ma +; CHECK-NEXT: vrsub.vx v10, v8, a0 +; CHECK-NEXT: vadd.vv v8, v10, v8 +; CHECK-NEXT: ret %1 = call <vscale x 4 x i32> @llvm.riscv.vrsub.nxv4i32.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i32> %a, i32 %b, iXLen -1) %2 = call <vscale x 4 x i32> @llvm.riscv.vadd.nxv4i32.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i32> %1, <vscale x 4 x i32> %a, iXLen %vl) ret <vscale x 4 x i32> %2 } define <vscale x 4 x i32> @vand_vi(<vscale x 4 x i32> %a, iXLen %vl) { -; NOVLOPT-LABEL: vand_vi: -; NOVLOPT: # %bb.0: -; NOVLOPT-NEXT: vsetvli a1, zero, e32, m2, ta, ma -; NOVLOPT-NEXT: vand.vi v10, v8, 5 -; NOVLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; NOVLOPT-NEXT: vadd.vv v8, v10, v8 -; NOVLOPT-NEXT: ret -; -; VLOPT-LABEL: vand_vi: -; VLOPT: # %bb.0: -; VLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; VLOPT-NEXT: vand.vi v10, v8, 5 -; VLOPT-NEXT: vadd.vv v8, v10, v8 -; VLOPT-NEXT: ret +; CHECK-LABEL: vand_vi: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; CHECK-NEXT: vand.vi v10, v8, 5 +; CHECK-NEXT: vadd.vv v8, v10, v8 +; CHECK-NEXT: ret %1 = call <vscale x 4 x i32> @llvm.riscv.vand.nxv4i32.i32(<vscale x 4 x i32> poison, <vscale x 4 x i32> %a, i32 5, iXLen -1) %2 = call <vscale x 4 x i32> @llvm.riscv.vadd.nxv4i32.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i32> %1, <vscale x 4 x i32> %a, iXLen %vl) ret <vscale x 4 x i32> %2 } define <vscale x 4 x i32> @vand_vv(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b, iXLen %vl) { -; NOVLOPT-LABEL: vand_vv: -; NOVLOPT: # %bb.0: -; NOVLOPT-NEXT: vsetvli a1, zero, e32, m2, ta, ma -; NOVLOPT-NEXT: vand.vv v8, v8, v10 -; NOVLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; NOVLOPT-NEXT: vadd.vv v8, v8, v10 -; NOVLOPT-NEXT: ret -; -; VLOPT-LABEL: vand_vv: -; VLOPT: # %bb.0: -; VLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; VLOPT-NEXT: vand.vv v8, v8, v10 -; VLOPT-NEXT: vadd.vv v8, v8, v10 -; VLOPT-NEXT: ret +; CHECK-LABEL: vand_vv: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; CHECK-NEXT: vand.vv v8, v8, v10 +; CHECK-NEXT: vadd.vv v8, v8, v10 +; CHECK-NEXT: ret %1 = call <vscale x 4 x i32> @llvm.riscv.vand.nxv4i32.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b, iXLen -1) %2 = call <vscale x 4 x i32> @llvm.riscv.vadd.nxv4i32.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i32> %1, <vscale x 4 x i32> %b, iXLen %vl) ret <vscale x 4 x i32> %2 } define <vscale x 4 x i32> @vand_vx(<vscale x 4 x i32> %a, i32 %b, iXLen %vl) { -; NOVLOPT-LABEL: vand_vx: -; NOVLOPT: # %bb.0: -; NOVLOPT-NEXT: vsetvli a2, zero, e32, m2, ta, ma -; NOVLOPT-NEXT: vand.vx v10, v8, a0 -; NOVLOPT-NEXT: vsetvli zero, a1, e32, m2, ta, ma -; NOVLOPT-NEXT: vadd.vv v8, v10, v8 -; NOVLOPT-NEXT: ret -; -; VLOPT-LABEL: vand_vx: -; VLOPT: # %bb.0: -; VLOPT-NEXT: vsetvli zero, a1, e32, m2, ta, ma -; VLOPT-NEXT: vand.vx v10, v8, a0 -; VLOPT-NEXT: vadd.vv v8, v10, v8 -; VLOPT-NEXT: ret +; CHECK-LABEL: vand_vx: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, ma +; CHECK-NEXT: vand.vx v10, v8, a0 +; CHECK-NEXT: vadd.vv v8, v10, v8 +; CHECK-NEXT: ret %1 = call <vscale x 4 x i32> @llvm.riscv.vand.nxv4i32.i32(<vscale x 4 x i32> poison, <vscale x 4 x i32> %a, i32 %b, iXLen -1) %2 = call <vscale x 4 x i32> @llvm.riscv.vadd.nxv4i32.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i32> %1, <vscale x 4 x i32> %a, iXLen %vl) ret <vscale x 4 x i32> %2 } define <vscale x 4 x i32> @vor_vi(<vscale x 4 x i32> %a, iXLen %vl) { -; NOVLOPT-LABEL: vor_vi: -; NOVLOPT: # %bb.0: -; NOVLOPT-NEXT: vsetvli a1, zero, e32, m2, ta, ma -; NOVLOPT-NEXT: vor.vi v10, v8, 5 -; NOVLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; NOVLOPT-NEXT: vadd.vv v8, v10, v8 -; NOVLOPT-NEXT: ret -; -; VLOPT-LABEL: vor_vi: -; VLOPT: # %bb.0: -; VLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; VLOPT-NEXT: vor.vi v10, v8, 5 -; VLOPT-NEXT: vadd.vv v8, v10, v8 -; VLOPT-NEXT: ret +; CHECK-LABEL: vor_vi: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; CHECK-NEXT: vor.vi v10, v8, 5 +; CHECK-NEXT: vadd.vv v8, v10, v8 +; CHECK-NEXT: ret %1 = call <vscale x 4 x i32> @llvm.riscv.vor.nxv4i32.i32(<vscale x 4 x i32> poison, <vscale x 4 x i32> %a, i32 5, iXLen -1) %2 = call <vscale x 4 x i32> @llvm.riscv.vadd.nxv4i32.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i32> %1, <vscale x 4 x i32> %a, iXLen %vl) ret <vscale x 4 x i32> %2 } define <vscale x 4 x i32> @vor_vv(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b, iXLen %vl) { -; NOVLOPT-LABEL: vor_vv: -; NOVLOPT: # %bb.0: -; NOVLOPT-NEXT: vsetvli a1, zero, e32, m2, ta, ma -; NOVLOPT-NEXT: vor.vv v8, v8, v10 -; NOVLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; NOVLOPT-NEXT: vadd.vv v8, v8, v10 -; NOVLOPT-NEXT: ret -; -; VLOPT-LABEL: vor_vv: -; VLOPT: # %bb.0: -; VLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; VLOPT-NEXT: vor.vv v8, v8, v10 -; VLOPT-NEXT: vadd.vv v8, v8, v10 -; VLOPT-NEXT: ret +; CHECK-LABEL: vor_vv: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; CHECK-NEXT: vor.vv v8, v8, v10 +; CHECK-NEXT: vadd.vv v8, v8, v10 +; CHECK-NEXT: ret %1 = call <vscale x 4 x i32> @llvm.riscv.vor.nxv4i32.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b, iXLen -1) %2 = call <vscale x 4 x i32> @llvm.riscv.vadd.nxv4i32.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i32> %1, <vscale x 4 x i32> %b, iXLen %vl) ret <vscale x 4 x i32> %2 } define <vscale x 4 x i32> @vor_vx(<vscale x 4 x i32> %a, i32 %b, iXLen %vl) { -; NOVLOPT-LABEL: vor_vx: -; NOVLOPT: # %bb.0: -; NOVLOPT-NEXT: vsetvli a2, zero, e32, m2, ta, ma -; NOVLOPT-NEXT: vor.vx v10, v8, a0 -; NOVLOPT-NEXT: vsetvli zero, a1, e32, m2, ta, ma -; NOVLOPT-NEXT: vadd.vv v8, v10, v8 -; NOVLOPT-NEXT: ret -; -; VLOPT-LABEL: vor_vx: -; VLOPT: # %bb.0: -; VLOPT-NEXT: vsetvli zero, a1, e32, m2, ta, ma -; VLOPT-NEXT: vor.vx v10, v8, a0 -; VLOPT-NEXT: vadd.vv v8, v10, v8 -; VLOPT-NEXT: ret +; CHECK-LABEL: vor_vx: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, ma +; CHECK-NEXT: vor.vx v10, v8, a0 +; CHECK-NEXT: vadd.vv v8, v10, v8 +; CHECK-NEXT: ret %1 = call <vscale x 4 x i32> @llvm.riscv.vor.nxv4i32.i32(<vscale x 4 x i32> poison, <vscale x 4 x i32> %a, i32 %b, iXLen -1) %2 = call <vscale x 4 x i32> @llvm.riscv.vadd.nxv4i32.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i32> %1, <vscale x 4 x i32> %a, iXLen %vl) ret <vscale x 4 x i32> %2 } define <vscale x 4 x i32> @vxor_vi(<vscale x 4 x i32> %a, iXLen %vl) { -; NOVLOPT-LABEL: vxor_vi: -; NOVLOPT: # %bb.0: -; NOVLOPT-NEXT: vsetvli a1, zero, e32, m2, ta, ma -; NOVLOPT-NEXT: vxor.vi v10, v8, 5 -; NOVLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; NOVLOPT-NEXT: vadd.vv v8, v10, v8 -; NOVLOPT-NEXT: ret -; -; VLOPT-LABEL: vxor_vi: -; VLOPT: # %bb.0: -; VLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; VLOPT-NEXT: vxor.vi v10, v8, 5 -; VLOPT-NEXT: vadd.vv v8, v10, v8 -; VLOPT-NEXT: ret +; CHECK-LABEL: vxor_vi: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; CHECK-NEXT: vxor.vi v10, v8, 5 +; CHECK-NEXT: vadd.vv v8, v10, v8 +; CHECK-NEXT: ret %1 = call <vscale x 4 x i32> @llvm.riscv.vxor.nxv4i32.i32(<vscale x 4 x i32> poison, <vscale x 4 x i32> %a, i32 5, iXLen -1) %2 = call <vscale x 4 x i32> @llvm.riscv.vadd.nxv4i32.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i32> %1, <vscale x 4 x i32> %a, iXLen %vl) ret <vscale x 4 x i32> %2 } define <vscale x 4 x i32> @vxor_vv(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b, iXLen %vl) { -; NOVLOPT-LABEL: vxor_vv: -; NOVLOPT: # %bb.0: -; NOVLOPT-NEXT: vsetvli a1, zero, e32, m2, ta, ma -; NOVLOPT-NEXT: vxor.vv v8, v8, v10 -; NOVLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; NOVLOPT-NEXT: vadd.vv v8, v8, v10 -; NOVLOPT-NEXT: ret -; -; VLOPT-LABEL: vxor_vv: -; VLOPT: # %bb.0: -; VLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; VLOPT-NEXT: vxor.vv v8, v8, v10 -; VLOPT-NEXT: vadd.vv v8, v8, v10 -; VLOPT-NEXT: ret +; CHECK-LABEL: vxor_vv: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; CHECK-NEXT: vxor.vv v8, v8, v10 +; CHECK-NEXT: vadd.vv v8, v8, v10 +; CHECK-NEXT: ret %1 = call <vscale x 4 x i32> @llvm.riscv.vxor.nxv4i32.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b, iXLen -1) %2 = call <vscale x 4 x i32> @llvm.riscv.vadd.nxv4i32.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i32> %1, <vscale x 4 x i32> %b, iXLen %vl) ret <vscale x 4 x i32> %2 } define <vscale x 4 x i32> @vxor_vx(<vscale x 4 x i32> %a, i32 %b, iXLen %vl) { -; NOVLOPT-LABEL: vxor_vx: -; NOVLOPT: # %bb.0: -; NOVLOPT-NEXT: vsetvli a2, zero, e32, m2, ta, ma -; NOVLOPT-NEXT: vxor.vx v10, v8, a0 -; NOVLOPT-NEXT: vsetvli zero, a1, e32, m2, ta, ma -; NOVLOPT-NEXT: vadd.vv v8, v10, v8 -; NOVLOPT-NEXT: ret -; -; VLOPT-LABEL: vxor_vx: -; VLOPT: # %bb.0: -; VLOPT-NEXT: vsetvli zero, a1, e32, m2, ta, ma -; VLOPT-NEXT: vxor.vx v10, v8, a0 -; VLOPT-NEXT: vadd.vv v8, v10, v8 -; VLOPT-NEXT: ret +; CHECK-LABEL: vxor_vx: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, ma +; CHECK-NEXT: vxor.vx v10, v8, a0 +; CHECK-NEXT: vadd.vv v8, v10, v8 +; CHECK-NEXT: ret %1 = call <vscale x 4 x i32> @llvm.riscv.vxor.nxv4i32.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i32> %a, i32 %b, iXLen -1) %2 = call <vscale x 4 x i32> @llvm.riscv.vadd.nxv4i32.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i32> %1, <vscale x 4 x i32> %a, iXLen %vl) ret <vscale x 4 x i32> %2 } define <vscale x 4 x i32> @vsll_vi(<vscale x 4 x i32> %a, iXLen %vl) { -; NOVLOPT-LABEL: vsll_vi: -; NOVLOPT: # %bb.0: -; NOVLOPT-NEXT: vsetvli a1, zero, e32, m2, ta, ma -; NOVLOPT-NEXT: vsll.vi v10, v8, 5 -; NOVLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; NOVLOPT-NEXT: vadd.vv v8, v10, v8 -; NOVLOPT-NEXT: ret -; -; VLOPT-LABEL: vsll_vi: -; VLOPT: # %bb.0: -; VLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; VLOPT-NEXT: vsll.vi v10, v8, 5 -; VLOPT-NEXT: vadd.vv v8, v10, v8 -; VLOPT-NEXT: ret +; CHECK-LABEL: vsll_vi: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; CHECK-NEXT: vsll.vi v10, v8, 5 +; CHECK-NEXT: vadd.vv v8, v10, v8 +; CHECK-NEXT: ret %1 = call <vscale x 4 x i32> @llvm.riscv.vsll.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i32> %a, iXLen 5, iXLen -1) %2 = call <vscale x 4 x i32> @llvm.riscv.vadd.nxv4i32.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i32> %1, <vscale x 4 x i32> %a, iXLen %vl) ret <vscale x 4 x i32> %2 } define <vscale x 4 x i32> @vsll_vv(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b, iXLen %vl) { -; NOVLOPT-LABEL: vsll_vv: -; NOVLOPT: # %bb.0: -; NOVLOPT-NEXT: vsetvli a1, zero, e32, m2, ta, ma -; NOVLOPT-NEXT: vsll.vv v8, v8, v10 -; NOVLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; NOVLOPT-NEXT: vadd.vv v8, v8, v10 -; NOVLOPT-NEXT: ret -; -; VLOPT-LABEL: vsll_vv: -; VLOPT: # %bb.0: -; VLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; VLOPT-NEXT: vsll.vv v8, v8, v10 -; VLOPT-NEXT: vadd.vv v8, v8, v10 -; VLOPT-NEXT: ret +; CHECK-LABEL: vsll_vv: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; CHECK-NEXT: vsll.vv v8, v8, v10 +; CHECK-NEXT: vadd.vv v8, v8, v10 +; CHECK-NEXT: ret %1 = call <vscale x 4 x i32> @llvm.riscv.vsll.nxv4i32.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b, iXLen -1) %2 = call <vscale x 4 x i32> @llvm.riscv.vadd.nxv4i32.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i32> %1, <vscale x 4 x i32> %b, iXLen %vl) ret <vscale x 4 x i32> %2 } define <vscale x 4 x i32> @vsll_vx(<vscale x 4 x i32> %a, iXLen %b, iXLen %vl) { -; NOVLOPT-LABEL: vsll_vx: -; NOVLOPT: # %bb.0: -; NOVLOPT-NEXT: vsetvli a2, zero, e32, m2, ta, ma -; NOVLOPT-NEXT: vsll.vx v10, v8, a0 -; NOVLOPT-NEXT: vsetvli zero, a1, e32, m2, ta, ma -; NOVLOPT-NEXT: vadd.vv v8, v10, v8 -; NOVLOPT-NEXT: ret -; -; VLOPT-LABEL: vsll_vx: -; VLOPT: # %bb.0: -; VLOPT-NEXT: vsetvli zero, a1, e32, m2, ta, ma -; VLOPT-NEXT: vsll.vx v10, v8, a0 -; VLOPT-NEXT: vadd.vv v8, v10, v8 -; VLOPT-NEXT: ret +; CHECK-LABEL: vsll_vx: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, ma +; CHECK-NEXT: vsll.vx v10, v8, a0 +; CHECK-NEXT: vadd.vv v8, v10, v8 +; CHECK-NEXT: ret %1 = call <vscale x 4 x i32> @llvm.riscv.vsll.nxv4i32.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i32> %a, iXLen %b, iXLen -1) %2 = call <vscale x 4 x i32> @llvm.riscv.vadd.nxv4i32.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i32> %1, <vscale x 4 x i32> %a, iXLen %vl) ret <vscale x 4 x i32> %2 } define <vscale x 4 x i64> @vwaddu_vv(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b, iXLen %vl) { -; NOVLOPT-LABEL: vwaddu_vv: -; NOVLOPT: # %bb.0: -; NOVLOPT-NEXT: vsetvli a1, zero, e32, m2, ta, ma -; NOVLOPT-NEXT: vwaddu.vv v12, v8, v10 -; NOVLOPT-NEXT: vsetvli zero, a0, e64, m4, ta, ma -; NOVLOPT-NEXT: vadd.vv v8, v12, v12 -; NOVLOPT-NEXT: ret -; -; VLOPT-LABEL: vwaddu_vv: -; VLOPT: # %bb.0: -; VLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; VLOPT-NEXT: vwaddu.vv v12, v8, v10 -; VLOPT-NEXT: vsetvli zero, zero, e64, m4, ta, ma -; VLOPT-NEXT: vadd.vv v8, v12, v12 -; VLOPT-NEXT: ret +; CHECK-LABEL: vwaddu_vv: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; CHECK-NEXT: vwaddu.vv v12, v8, v10 +; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, ma +; CHECK-NEXT: vadd.vv v8, v12, v12 +; CHECK-NEXT: ret %1 = call <vscale x 4 x i64> @llvm.riscv.vwaddu.nxv4i64.nxv4i32.nxv4i32(<vscale x 4 x i64> poison, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b, iXLen -1) %2 = call <vscale x 4 x i64> @llvm.riscv.vadd.nxv4i64.nxv4i64(<vscale x 4 x i64> poison, <vscale x 4 x i64> %1, <vscale x 4 x i64> %1, iXLen %vl) ret <vscale x 4 x i64> %2 } define <vscale x 4 x i32> @vsrl_vi(<vscale x 4 x i32> %a, iXLen %vl) { -; NOVLOPT-LABEL: vsrl_vi: -; NOVLOPT: # %bb.0: -; NOVLOPT-NEXT: vsetvli a1, zero, e32, m2, ta, ma -; NOVLOPT-NEXT: vsrl.vi v10, v8, 5 -; NOVLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; NOVLOPT-NEXT: vadd.vv v8, v10, v8 -; NOVLOPT-NEXT: ret -; -; VLOPT-LABEL: vsrl_vi: -; VLOPT: # %bb.0: -; VLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; VLOPT-NEXT: vsrl.vi v10, v8, 5 -; VLOPT-NEXT: vadd.vv v8, v10, v8 -; VLOPT-NEXT: ret +; CHECK-LABEL: vsrl_vi: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; CHECK-NEXT: vsrl.vi v10, v8, 5 +; CHECK-NEXT: vadd.vv v8, v10, v8 +; CHECK-NEXT: ret %1 = call <vscale x 4 x i32> @llvm.riscv.vsrl.nxv4i32.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i32> %a, iXLen 5, iXLen -1) %2 = call <vscale x 4 x i32> @llvm.riscv.vadd.nxv4i32.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i32> %1, <vscale x 4 x i32> %a, iXLen %vl) ret <vscale x 4 x i32> %2 } define <vscale x 4 x i32> @vsrl_vv(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b, iXLen %vl) { -; NOVLOPT-LABEL: vsrl_vv: -; NOVLOPT: # %bb.0: -; NOVLOPT-NEXT: vsetvli a1, zero, e32, m2, ta, ma -; NOVLOPT-NEXT: vsrl.vv v8, v8, v10 -; NOVLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; NOVLOPT-NEXT: vadd.vv v8, v8, v10 -; NOVLOPT-NEXT: ret -; -; VLOPT-LABEL: vsrl_vv: -; VLOPT: # %bb.0: -; VLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; VLOPT-NEXT: vsrl.vv v8, v8, v10 -; VLOPT-NEXT: vadd.vv v8, v8, v10 -; VLOPT-NEXT: ret +; CHECK-LABEL: vsrl_vv: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; CHECK-NEXT: vsrl.vv v8, v8, v10 +; CHECK-NEXT: vadd.vv v8, v8, v10 +; CHECK-NEXT: ret %1 = call <vscale x 4 x i32> @llvm.riscv.vsrl.nxv4i32.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b, iXLen -1) %2 = call <vscale x 4 x i32> @llvm.riscv.vadd.nxv4i32.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i32> %1, <vscale x 4 x i32> %b, iXLen %vl) ret <vscale x 4 x i32> %2 } define <vscale x 4 x i32> @vsrl_vx(<vscale x 4 x i32> %a, iXLen %b, iXLen %vl) { -; NOVLOPT-LABEL: vsrl_vx: -; NOVLOPT: # %bb.0: -; NOVLOPT-NEXT: vsetvli a2, zero, e32, m2, ta, ma -; NOVLOPT-NEXT: vsrl.vx v10, v8, a0 -; NOVLOPT-NEXT: vsetvli zero, a1, e32, m2, ta, ma -; NOVLOPT-NEXT: vadd.vv v8, v10, v8 -; NOVLOPT-NEXT: ret -; -; VLOPT-LABEL: vsrl_vx: -; VLOPT: # %bb.0: -; VLOPT-NEXT: vsetvli zero, a1, e32, m2, ta, ma -; VLOPT-NEXT: vsrl.vx v10, v8, a0 -; VLOPT-NEXT: vadd.vv v8, v10, v8 -; VLOPT-NEXT: ret +; CHECK-LABEL: vsrl_vx: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, ma +; CHECK-NEXT: vsrl.vx v10, v8, a0 +; CHECK-NEXT: vadd.vv v8, v10, v8 +; CHECK-NEXT: ret %1 = call <vscale x 4 x i32> @llvm.riscv.vsrl.nxv4i32.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i32> %a, iXLen %b, iXLen -1) %2 = call <vscale x 4 x i32> @llvm.riscv.vadd.nxv4i32.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i32> %1, <vscale x 4 x i32> %a, iXLen %vl) ret <vscale x 4 x i32> %2 } define <vscale x 4 x i32> @vsra_vi(<vscale x 4 x i32> %a, iXLen %vl) { -; NOVLOPT-LABEL: vsra_vi: -; NOVLOPT: # %bb.0: -; NOVLOPT-NEXT: vsetvli a1, zero, e32, m2, ta, ma -; NOVLOPT-NEXT: vsra.vi v10, v8, 5 -; NOVLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; NOVLOPT-NEXT: vadd.vv v8, v10, v8 -; NOVLOPT-NEXT: ret -; -; VLOPT-LABEL: vsra_vi: -; VLOPT: # %bb.0: -; VLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; VLOPT-NEXT: vsra.vi v10, v8, 5 -; VLOPT-NEXT: vadd.vv v8, v10, v8 -; VLOPT-NEXT: ret +; CHECK-LABEL: vsra_vi: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; CHECK-NEXT: vsra.vi v10, v8, 5 +; CHECK-NEXT: vadd.vv v8, v10, v8 +; CHECK-NEXT: ret %1 = call <vscale x 4 x i32> @llvm.riscv.vsra.nxv4i32.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i32> %a, iXLen 5, iXLen -1) %2 = call <vscale x 4 x i32> @llvm.riscv.vadd.nxv4i32.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i32> %1, <vscale x 4 x i32> %a, iXLen %vl) ret <vscale x 4 x i32> %2 } define <vscale x 4 x i32> @vsra_vv(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b, iXLen %vl) { -; NOVLOPT-LABEL: vsra_vv: -; NOVLOPT: # %bb.0: -; NOVLOPT-NEXT: vsetvli a1, zero, e32, m2, ta, ma -; NOVLOPT-NEXT: vsra.vv v8, v8, v10 -; NOVLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; NOVLOPT-NEXT: vadd.vv v8, v8, v10 -; NOVLOPT-NEXT: ret -; -; VLOPT-LABEL: vsra_vv: -; VLOPT: # %bb.0: -; VLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; VLOPT-NEXT: vsra.vv v8, v8, v10 -; VLOPT-NEXT: vadd.vv v8, v8, v10 -; VLOPT-NEXT: ret +; CHECK-LABEL: vsra_vv: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; CHECK-NEXT: vsra.vv v8, v8, v10 +; CHECK-NEXT: vadd.vv v8, v8, v10 +; CHECK-NEXT: ret %1 = call <vscale x 4 x i32> @llvm.riscv.vsra.nxv4i32.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b, iXLen -1) %2 = call <vscale x 4 x i32> @llvm.riscv.vadd.nxv4i32.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i32> %1, <vscale x 4 x i32> %b, iXLen %vl) ret <vscale x 4 x i32> %2 } define <vscale x 4 x i32> @vsra_vx(<vscale x 4 x i32> %a, iXLen %b, iXLen %vl) { -; NOVLOPT-LABEL: vsra_vx: -; NOVLOPT: # %bb.0: -; NOVLOPT-NEXT: vsetvli a2, zero, e32, m2, ta, ma -; NOVLOPT-NEXT: vsra.vx v10, v8, a0 -; NOVLOPT-NEXT: vsetvli zero, a1, e32, m2, ta, ma -; NOVLOPT-NEXT: vadd.vv v8, v10, v8 -; NOVLOPT-NEXT: ret -; -; VLOPT-LABEL: vsra_vx: -; VLOPT: # %bb.0: -; VLOPT-NEXT: vsetvli zero, a1, e32, m2, ta, ma -; VLOPT-NEXT: vsra.vx v10, v8, a0 -; VLOPT-NEXT: vadd.vv v8, v10, v8 -; VLOPT-NEXT: ret +; CHECK-LABEL: vsra_vx: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, ma +; CHECK-NEXT: vsra.vx v10, v8, a0 +; CHECK-NEXT: vadd.vv v8, v10, v8 +; CHECK-NEXT: ret %1 = call <vscale x 4 x i32> @llvm.riscv.vsra.nxv4i32.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i32> %a, iXLen %b, iXLen -1) %2 = call <vscale x 4 x i32> @llvm.riscv.vadd.nxv4i32.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i32> %1, <vscale x 4 x i32> %a, iXLen %vl) ret <vscale x 4 x i32> %2 } define <vscale x 4 x i64> @vwaddu_vx(<vscale x 4 x i32> %a, i32 %b, iXLen %vl) { -; NOVLOPT-LABEL: vwaddu_vx: -; NOVLOPT: # %bb.0: -; NOVLOPT-NEXT: vsetvli a2, zero, e32, m2, ta, ma -; NOVLOPT-NEXT: vwaddu.vx v12, v8, a0 -; NOVLOPT-NEXT: vsetvli zero, a1, e64, m4, ta, ma -; NOVLOPT-NEXT: vadd.vv v8, v12, v12 -; NOVLOPT-NEXT: ret -; -; VLOPT-LABEL: vwaddu_vx: -; VLOPT: # %bb.0: -; VLOPT-NEXT: vsetvli zero, a1, e32, m2, ta, ma -; VLOPT-NEXT: vwaddu.vx v12, v8, a0 -; VLOPT-NEXT: vsetvli zero, zero, e64, m4, ta, ma -; VLOPT-NEXT: vadd.vv v8, v12, v12 -; VLOPT-NEXT: ret +; CHECK-LABEL: vwaddu_vx: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, ma +; CHECK-NEXT: vwaddu.vx v12, v8, a0 +; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, ma +; CHECK-NEXT: vadd.vv v8, v12, v12 +; CHECK-NEXT: ret %1 = call <vscale x 4 x i64> @llvm.riscv.vwaddu.nxv4i64.nxv4i32.i32(<vscale x 4 x i64> poison, <vscale x 4 x i32> %a, i32 %b, iXLen -1) %2 = call <vscale x 4 x i64> @llvm.riscv.vadd.nxv4i64.nxv4i64(<vscale x 4 x i64> poison, <vscale x 4 x i64> %1, <vscale x 4 x i64> %1, iXLen %vl) ret <vscale x 4 x i64> %2 } define <vscale x 4 x i64> @vwsubu_vv(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b, iXLen %vl) { -; NOVLOPT-LABEL: vwsubu_vv: -; NOVLOPT: # %bb.0: -; NOVLOPT-NEXT: vsetvli a1, zero, e32, m2, ta, ma -; NOVLOPT-NEXT: vwsubu.vv v12, v8, v10 -; NOVLOPT-NEXT: vsetvli zero, a0, e64, m4, ta, ma -; NOVLOPT-NEXT: vadd.vv v8, v12, v12 -; NOVLOPT-NEXT: ret -; -; VLOPT-LABEL: vwsubu_vv: -; VLOPT: # %bb.0: -; VLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; VLOPT-NEXT: vwsubu.vv v12, v8, v10 -; VLOPT-NEXT: vsetvli zero, zero, e64, m4, ta, ma -; VLOPT-NEXT: vadd.vv v8, v12, v12 -; VLOPT-NEXT: ret +; CHECK-LABEL: vwsubu_vv: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; CHECK-NEXT: vwsubu.vv v12, v8, v10 +; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, ma +; CHECK-NEXT: vadd.vv v8, v12, v12 +; CHECK-NEXT: ret %1 = call <vscale x 4 x i64> @llvm.riscv.vwsubu.nxv4i64.nxv4i32.nxv4i32(<vscale x 4 x i64> poison, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b, iXLen -1) %2 = call <vscale x 4 x i64> @llvm.riscv.vadd.nxv4i64.nxv4i64(<vscale x 4 x i64> poison, <vscale x 4 x i64> %1, <vscale x 4 x i64> %1, iXLen %vl) ret <vscale x 4 x i64> %2 } define <vscale x 4 x i64> @vwsubu_vx(<vscale x 4 x i32> %a, i32 %b, iXLen %vl) { -; NOVLOPT-LABEL: vwsubu_vx: -; NOVLOPT: # %bb.0: -; NOVLOPT-NEXT: vsetvli a2, zero, e32, m2, ta, ma -; NOVLOPT-NEXT: vwsubu.vx v12, v8, a0 -; NOVLOPT-NEXT: vsetvli zero, a1, e64, m4, ta, ma -; NOVLOPT-NEXT: vadd.vv v8, v12, v12 -; NOVLOPT-NEXT: ret -; -; VLOPT-LABEL: vwsubu_vx: -; VLOPT: # %bb.0: -; VLOPT-NEXT: vsetvli zero, a1, e32, m2, ta, ma -; VLOPT-NEXT: vwsubu.vx v12, v8, a0 -; VLOPT-NEXT: vsetvli zero, zero, e64, m4, ta, ma -; VLOPT-NEXT: vadd.vv v8, v12, v12 -; VLOPT-NEXT: ret +; CHECK-LABEL: vwsubu_vx: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, ma +; CHECK-NEXT: vwsubu.vx v12, v8, a0 +; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, ma +; CHECK-NEXT: vadd.vv v8, v12, v12 +; CHECK-NEXT: ret %1 = call <vscale x 4 x i64> @llvm.riscv.vwsubu.nxv4i64.nxv4i32.i32(<vscale x 4 x i64> poison, <vscale x 4 x i32> %a, i32 %b, iXLen -1) %2 = call <vscale x 4 x i64> @llvm.riscv.vadd.nxv4i64.nxv4i64(<vscale x 4 x i64> poison, <vscale x 4 x i64> %1, <vscale x 4 x i64> %1, iXLen %vl) ret <vscale x 4 x i64> %2 } define <vscale x 4 x i64> @vwadd_vv(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b, iXLen %vl) { -; NOVLOPT-LABEL: vwadd_vv: -; NOVLOPT: # %bb.0: -; NOVLOPT-NEXT: vsetvli a1, zero, e32, m2, ta, ma -; NOVLOPT-NEXT: vwadd.vv v12, v8, v10 -; NOVLOPT-NEXT: vsetvli zero, a0, e64, m4, ta, ma -; NOVLOPT-NEXT: vadd.vv v8, v12, v12 -; NOVLOPT-NEXT: ret -; -; VLOPT-LABEL: vwadd_vv: -; VLOPT: # %bb.0: -; VLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; VLOPT-NEXT: vwadd.vv v12, v8, v10 -; VLOPT-NEXT: vsetvli zero, zero, e64, m4, ta, ma -; VLOPT-NEXT: vadd.vv v8, v12, v12 -; VLOPT-NEXT: ret +; CHECK-LABEL: vwadd_vv: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; CHECK-NEXT: vwadd.vv v12, v8, v10 +; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, ma +; CHECK-NEXT: vadd.vv v8, v12, v12 +; CHECK-NEXT: ret %1 = call <vscale x 4 x i64> @llvm.riscv.vwadd.nxv4i64.nxv4i32.nxv4i32(<vscale x 4 x i64> poison, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b, iXLen -1) %2 = call <vscale x 4 x i64> @llvm.riscv.vadd.nxv4i64.nxv4i64(<vscale x 4 x i64> poison, <vscale x 4 x i64> %1, <vscale x 4 x i64> %1, iXLen %vl) ret <vscale x 4 x i64> %2 } define <vscale x 4 x i64> @vwadd_vx(<vscale x 4 x i32> %a, i32 %b, iXLen %vl) { -; NOVLOPT-LABEL: vwadd_vx: -; NOVLOPT: # %bb.0: -; NOVLOPT-NEXT: vsetvli a2, zero, e32, m2, ta, ma -; NOVLOPT-NEXT: vwadd.vx v12, v8, a0 -; NOVLOPT-NEXT: vsetvli zero, a1, e64, m4, ta, ma -; NOVLOPT-NEXT: vadd.vv v8, v12, v12 -; NOVLOPT-NEXT: ret -; -; VLOPT-LABEL: vwadd_vx: -; VLOPT: # %bb.0: -; VLOPT-NEXT: vsetvli zero, a1, e32, m2, ta, ma -; VLOPT-NEXT: vwadd.vx v12, v8, a0 -; VLOPT-NEXT: vsetvli zero, zero, e64, m4, ta, ma -; VLOPT-NEXT: vadd.vv v8, v12, v12 -; VLOPT-NEXT: ret +; CHECK-LABEL: vwadd_vx: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, ma +; CHECK-NEXT: vwadd.vx v12, v8, a0 +; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, ma +; CHECK-NEXT: vadd.vv v8, v12, v12 +; CHECK-NEXT: ret %1 = call <vscale x 4 x i64> @llvm.riscv.vwadd.nxv4i64.nxv4i32.i32(<vscale x 4 x i64> poison, <vscale x 4 x i32> %a, i32 %b, iXLen -1) %2 = call <vscale x 4 x i64> @llvm.riscv.vadd.nxv4i64.nxv4i64(<vscale x 4 x i64> poison, <vscale x 4 x i64> %1, <vscale x 4 x i64> %1, iXLen %vl) ret <vscale x 4 x i64> %2 } define <vscale x 4 x i64> @vwsub_vv(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b, iXLen %vl) { -; NOVLOPT-LABEL: vwsub_vv: -; NOVLOPT: # %bb.0: -; NOVLOPT-NEXT: vsetvli a1, zero, e32, m2, ta, ma -; NOVLOPT-NEXT: vwsub.vv v12, v8, v10 -; NOVLOPT-NEXT: vsetvli zero, a0, e64, m4, ta, ma -; NOVLOPT-NEXT: vadd.vv v8, v12, v12 -; NOVLOPT-NEXT: ret -; -; VLOPT-LABEL: vwsub_vv: -; VLOPT: # %bb.0: -; VLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; VLOPT-NEXT: vwsub.vv v12, v8, v10 -; VLOPT-NEXT: vsetvli zero, zero, e64, m4, ta, ma -; VLOPT-NEXT: vadd.vv v8, v12, v12 -; VLOPT-NEXT: ret +; CHECK-LABEL: vwsub_vv: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; CHECK-NEXT: vwsub.vv v12, v8, v10 +; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, ma +; CHECK-NEXT: vadd.vv v8, v12, v12 +; CHECK-NEXT: ret %1 = call <vscale x 4 x i64> @llvm.riscv.vwsub.nxv4i64.nxv4i32.nxv4i32(<vscale x 4 x i64> poison, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b, iXLen -1) %2 = call <vscale x 4 x i64> @llvm.riscv.vadd.nxv4i64.nxv4i64(<vscale x 4 x i64> poison, <vscale x 4 x i64> %1, <vscale x 4 x i64> %1, iXLen %vl) ret <vscale x 4 x i64> %2 } define <vscale x 4 x i64> @vwsub_vx(<vscale x 4 x i32> %a, i32 %b, iXLen %vl) { -; NOVLOPT-LABEL: vwsub_vx: -; NOVLOPT: # %bb.0: -; NOVLOPT-NEXT: vsetvli a2, zero, e32, m2, ta, ma -; NOVLOPT-NEXT: vwsub.vx v12, v8, a0 -; NOVLOPT-NEXT: vsetvli zero, a1, e64, m4, ta, ma -; NOVLOPT-NEXT: vadd.vv v8, v12, v12 -; NOVLOPT-NEXT: ret -; -; VLOPT-LABEL: vwsub_vx: -; VLOPT: # %bb.0: -; VLOPT-NEXT: vsetvli zero, a1, e32, m2, ta, ma -; VLOPT-NEXT: vwsub.vx v12, v8, a0 -; VLOPT-NEXT: vsetvli zero, zero, e64, m4, ta, ma -; VLOPT-NEXT: vadd.vv v8, v12, v12 -; VLOPT-NEXT: ret +; CHECK-LABEL: vwsub_vx: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, ma +; CHECK-NEXT: vwsub.vx v12, v8, a0 +; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, ma +; CHECK-NEXT: vadd.vv v8, v12, v12 +; CHECK-NEXT: ret %1 = call <vscale x 4 x i64> @llvm.riscv.vwsub.nxv4i64.nxv4i32.i32(<vscale x 4 x i64> poison, <vscale x 4 x i32> %a, i32 %b, iXLen -1) %2 = call <vscale x 4 x i64> @llvm.riscv.vadd.nxv4i64.nxv4i64(<vscale x 4 x i64> poison, <vscale x 4 x i64> %1, <vscale x 4 x i64> %1, iXLen %vl) ret <vscale x 4 x i64> %2 } define <vscale x 4 x i64> @vwaddu_wv(<vscale x 4 x i64> %a, <vscale x 4 x i32> %b, iXLen %vl) { -; NOVLOPT-LABEL: vwaddu_wv: -; NOVLOPT: # %bb.0: -; NOVLOPT-NEXT: vsetvli a1, zero, e32, m2, ta, ma -; NOVLOPT-NEXT: vwaddu.wv v8, v8, v12 -; NOVLOPT-NEXT: vsetvli zero, a0, e64, m4, ta, ma -; NOVLOPT-NEXT: vadd.vv v8, v8, v8 -; NOVLOPT-NEXT: ret -; -; VLOPT-LABEL: vwaddu_wv: -; VLOPT: # %bb.0: -; VLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; VLOPT-NEXT: vwaddu.wv v8, v8, v12 -; VLOPT-NEXT: vsetvli zero, zero, e64, m4, ta, ma -; VLOPT-NEXT: vadd.vv v8, v8, v8 -; VLOPT-NEXT: ret +; CHECK-LABEL: vwaddu_wv: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; CHECK-NEXT: vwaddu.wv v8, v8, v12 +; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, ma +; CHECK-NEXT: vadd.vv v8, v8, v8 +; CHECK-NEXT: ret %1 = call <vscale x 4 x i64> @llvm.riscv.vwaddu.w.nxv4i64.nxv4i32.nxv4i32(<vscale x 4 x i64> poison, <vscale x 4 x i64> %a, <vscale x 4 x i32> %b, iXLen -1) %2 = call <vscale x 4 x i64> @llvm.riscv.vadd.nxv4i64.nxv4i64(<vscale x 4 x i64> poison, <vscale x 4 x i64> %1, <vscale x 4 x i64> %1, iXLen %vl) ret <vscale x 4 x i64> %2 } define <vscale x 4 x i64> @vwaddu_wx(<vscale x 4 x i64> %a, i32 %b, iXLen %vl) { -; NOVLOPT-LABEL: vwaddu_wx: -; NOVLOPT: # %bb.0: -; NOVLOPT-NEXT: vsetvli a2, zero, e32, m2, ta, ma -; NOVLOPT-NEXT: vwaddu.wx v8, v8, a0 -; NOVLOPT-NEXT: vsetvli zero, a1, e64, m4, ta, ma -; NOVLOPT-NEXT: vadd.vv v8, v8, v8 -; NOVLOPT-NEXT: ret -; -; VLOPT-LABEL: vwaddu_wx: -; VLOPT: # %bb.0: -; VLOPT-NEXT: vsetvli zero, a1, e32, m2, ta, ma -; VLOPT-NEXT: vwaddu.wx v8, v8, a0 -; VLOPT-NEXT: vsetvli zero, zero, e64, m4, ta, ma -; VLOPT-NEXT: vadd.vv v8, v8, v8 -; VLOPT-NEXT: ret +; CHECK-LABEL: vwaddu_wx: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, ma +; CHECK-NEXT: vwaddu.wx v8, v8, a0 +; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, ma +; CHECK-NEXT: vadd.vv v8, v8, v8 +; CHECK-NEXT: ret %1 = call <vscale x 4 x i64> @llvm.riscv.vwaddu.w.xv4i64.nxv4i32.i32(<vscale x 4 x i64> poison, <vscale x 4 x i64> %a, i32 %b, iXLen -1) %2 = call <vscale x 4 x i64> @llvm.riscv.vadd.nxv4i64.nxv4i64(<vscale x 4 x i64> poison, <vscale x 4 x i64> %1, <vscale x 4 x i64> %1, iXLen %vl) ret <vscale x 4 x i64> %2 } define <vscale x 4 x i64> @vwsubu_wv(<vscale x 4 x i64> %a, <vscale x 4 x i32> %b, iXLen %vl) { -; NOVLOPT-LABEL: vwsubu_wv: -; NOVLOPT: # %bb.0: -; NOVLOPT-NEXT: vsetvli a1, zero, e32, m2, ta, ma -; NOVLOPT-NEXT: vwsubu.wv v8, v8, v12 -; NOVLOPT-NEXT: vsetvli zero, a0, e64, m4, ta, ma -; NOVLOPT-NEXT: vadd.vv v8, v8, v8 -; NOVLOPT-NEXT: ret -; -; VLOPT-LABEL: vwsubu_wv: -; VLOPT: # %bb.0: -; VLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; VLOPT-NEXT: vwsubu.wv v8, v8, v12 -; VLOPT-NEXT: vsetvli zero, zero, e64, m4, ta, ma -; VLOPT-NEXT: vadd.vv v8, v8, v8 -; VLOPT-NEXT: ret +; CHECK-LABEL: vwsubu_wv: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; CHECK-NEXT: vwsubu.wv v8, v8, v12 +; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, ma +; CHECK-NEXT: vadd.vv v8, v8, v8 +; CHECK-NEXT: ret %1 = call <vscale x 4 x i64> @llvm.riscv.vwsubu.w.nxv4i64.nxv4i32.nxv4i32(<vscale x 4 x i64> poison, <vscale x 4 x i64> %a, <vscale x 4 x i32> %b, iXLen -1) %2 = call <vscale x 4 x i64> @llvm.riscv.vadd.nxv4i64.nxv4i64(<vscale x 4 x i64> poison, <vscale x 4 x i64> %1, <vscale x 4 x i64> %1, iXLen %vl) ret <vscale x 4 x i64> %2 } define <vscale x 4 x i64> @vwsubu_wx(<vscale x 4 x i64> %a, i32 %b, iXLen %vl) { -; NOVLOPT-LABEL: vwsubu_wx: -; NOVLOPT: # %bb.0: -; NOVLOPT-NEXT: vsetvli a2, zero, e32, m2, ta, ma -; NOVLOPT-NEXT: vwsubu.wx v8, v8, a0 -; NOVLOPT-NEXT: vsetvli zero, a1, e64, m4, ta, ma -; NOVLOPT-NEXT: vadd.vv v8, v8, v8 -; NOVLOPT-NEXT: ret -; -; VLOPT-LABEL: vwsubu_wx: -; VLOPT: # %bb.0: -; VLOPT-NEXT: vsetvli zero, a1, e32, m2, ta, ma -; VLOPT-NEXT: vwsubu.wx v8, v8, a0 -; VLOPT-NEXT: vsetvli zero, zero, e64, m4, ta, ma -; VLOPT-NEXT: vadd.vv v8, v8, v8 -; VLOPT-NEXT: ret +; CHECK-LABEL: vwsubu_wx: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, ma +; CHECK-NEXT: vwsubu.wx v8, v8, a0 +; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, ma +; CHECK-NEXT: vadd.vv v8, v8, v8 +; CHECK-NEXT: ret %1 = call <vscale x 4 x i64> @llvm.riscv.vwsubu.w.nxv4i64.nxv4i32.i32(<vscale x 4 x i64> poison, <vscale x 4 x i64> %a, i32 %b, iXLen -1) %2 = call <vscale x 4 x i64> @llvm.riscv.vadd.nxv4i64.nxv4i64(<vscale x 4 x i64> poison, <vscale x 4 x i64> %1, <vscale x 4 x i64> %1, iXLen %vl) ret <vscale x 4 x i64> %2 } define <vscale x 4 x i64> @vwadd_wv(<vscale x 4 x i64> %a, <vscale x 4 x i32> %b, iXLen %vl) { -; NOVLOPT-LABEL: vwadd_wv: -; NOVLOPT: # %bb.0: -; NOVLOPT-NEXT: vsetvli a1, zero, e32, m2, ta, ma -; NOVLOPT-NEXT: vwadd.wv v8, v8, v12 -; NOVLOPT-NEXT: vsetvli zero, a0, e64, m4, ta, ma -; NOVLOPT-NEXT: vadd.vv v8, v8, v8 -; NOVLOPT-NEXT: ret -; -; VLOPT-LABEL: vwadd_wv: -; VLOPT: # %bb.0: -; VLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; VLOPT-NEXT: vwadd.wv v8, v8, v12 -; VLOPT-NEXT: vsetvli zero, zero, e64, m4, ta, ma -; VLOPT-NEXT: vadd.vv v8, v8, v8 -; VLOPT-NEXT: ret +; CHECK-LABEL: vwadd_wv: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; CHECK-NEXT: vwadd.wv v8, v8, v12 +; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, ma +; CHECK-NEXT: vadd.vv v8, v8, v8 +; CHECK-NEXT: ret %1 = call <vscale x 4 x i64> @llvm.riscv.vwadd.w.nxv4i64.nxv4i32.nxv4i32(<vscale x 4 x i64> poison, <vscale x 4 x i64> %a, <vscale x 4 x i32> %b, iXLen -1) %2 = call <vscale x 4 x i64> @llvm.riscv.vadd.nxv4i64.nxv4i64(<vscale x 4 x i64> poison, <vscale x 4 x i64> %1, <vscale x 4 x i64> %1, iXLen %vl) ret <vscale x 4 x i64> %2 } define <vscale x 4 x i64> @vwadd_wx(<vscale x 4 x i64> %a, i32 %b, iXLen %vl) { -; NOVLOPT-LABEL: vwadd_wx: -; NOVLOPT: # %bb.0: -; NOVLOPT-NEXT: vsetvli a2, zero, e32, m2, ta, ma -; NOVLOPT-NEXT: vwadd.wx v8, v8, a0 -; NOVLOPT-NEXT: vsetvli zero, a1, e64, m4, ta, ma -; NOVLOPT-NEXT: vadd.vv v8, v8, v8 -; NOVLOPT-NEXT: ret -; -; VLOPT-LABEL: vwadd_wx: -; VLOPT: # %bb.0: -; VLOPT-NEXT: vsetvli zero, a1, e32, m2, ta, ma -; VLOPT-NEXT: vwadd.wx v8, v8, a0 -; VLOPT-NEXT: vsetvli zero, zero, e64, m4, ta, ma -; VLOPT-NEXT: vadd.vv v8, v8, v8 -; VLOPT-NEXT: ret +; CHECK-LABEL: vwadd_wx: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, ma +; CHECK-NEXT: vwadd.wx v8, v8, a0 +; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, ma +; CHECK-NEXT: vadd.vv v8, v8, v8 +; CHECK-NEXT: ret %1 = call <vscale x 4 x i64> @llvm.riscv.vwadd.w.nxv4i64.nxv4i32.i32(<vscale x 4 x i64> poison, <vscale x 4 x i64> %a, i32 %b, iXLen -1) %2 = call <vscale x 4 x i64> @llvm.riscv.vadd.nxv4i64.nxv4i64(<vscale x 4 x i64> poison, <vscale x 4 x i64> %1, <vscale x 4 x i64> %1, iXLen %vl) ret <vscale x 4 x i64> %2 } define <vscale x 4 x i64> @vwsub_wv(<vscale x 4 x i64> %a, <vscale x 4 x i32> %b, iXLen %vl) { -; NOVLOPT-LABEL: vwsub_wv: -; NOVLOPT: # %bb.0: -; NOVLOPT-NEXT: vsetvli a1, zero, e32, m2, ta, ma -; NOVLOPT-NEXT: vwsub.wv v8, v8, v12 -; NOVLOPT-NEXT: vsetvli zero, a0, e64, m4, ta, ma -; NOVLOPT-NEXT: vadd.vv v8, v8, v8 -; NOVLOPT-NEXT: ret -; -; VLOPT-LABEL: vwsub_wv: -; VLOPT: # %bb.0: -; VLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; VLOPT-NEXT: vwsub.wv v8, v8, v12 -; VLOPT-NEXT: vsetvli zero, zero, e64, m4, ta, ma -; VLOPT-NEXT: vadd.vv v8, v8, v8 -; VLOPT-NEXT: ret +; CHECK-LABEL: vwsub_wv: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; CHECK-NEXT: vwsub.wv v8, v8, v12 +; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, ma +; CHECK-NEXT: vadd.vv v8, v8, v8 +; CHECK-NEXT: ret %1 = call <vscale x 4 x i64> @llvm.riscv.vwsub.w.nxv4i64.nxv4i32.nxv4i32(<vscale x 4 x i64> poison, <vscale x 4 x i64> %a, <vscale x 4 x i32> %b, iXLen -1) %2 = call <vscale x 4 x i64> @llvm.riscv.vadd.nxv4i64.nxv4i64(<vscale x 4 x i64> poison, <vscale x 4 x i64> %1, <vscale x 4 x i64> %1, iXLen %vl) ret <vscale x 4 x i64> %2 } define <vscale x 4 x i64> @vwsub_wx(<vscale x 4 x i64> %a, i32 %b, iXLen %vl) { -; NOVLOPT-LABEL: vwsub_wx: -; NOVLOPT: # %bb.0: -; NOVLOPT-NEXT: vsetvli a2, zero, e32, m2, ta, ma -; NOVLOPT-NEXT: vwsub.wx v8, v8, a0 -; NOVLOPT-NEXT: vsetvli zero, a1, e64, m4, ta, ma -; NOVLOPT-NEXT: vadd.vv v8, v8, v8 -; NOVLOPT-NEXT: ret -; -; VLOPT-LABEL: vwsub_wx: -; VLOPT: # %bb.0: -; VLOPT-NEXT: vsetvli zero, a1, e32, m2, ta, ma -; VLOPT-NEXT: vwsub.wx v8, v8, a0 -; VLOPT-NEXT: vsetvli zero, zero, e64, m4, ta, ma -; VLOPT-NEXT: vadd.vv v8, v8, v8 -; VLOPT-NEXT: ret +; CHECK-LABEL: vwsub_wx: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, ma +; CHECK-NEXT: vwsub.wx v8, v8, a0 +; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, ma +; CHECK-NEXT: vadd.vv v8, v8, v8 +; CHECK-NEXT: ret %1 = call <vscale x 4 x i64> @llvm.riscv.vwsub.w.nxv4i64.nxv4i32.i32(<vscale x 4 x i64> poison, <vscale x 4 x i64> %a, i32 %b, iXLen -1) %2 = call <vscale x 4 x i64> @llvm.riscv.vadd.nxv4i64.nxv4i64(<vscale x 4 x i64> poison, <vscale x 4 x i64> %1, <vscale x 4 x i64> %1, iXLen %vl) ret <vscale x 4 x i64> %2 } define <vscale x 4 x i32> @vsext_vf2(<vscale x 4 x i16> %a, <vscale x 4 x i32> %b, iXLen %vl) { -; NOVLOPT-LABEL: vsext_vf2: -; NOVLOPT: # %bb.0: -; NOVLOPT-NEXT: vsetvli a1, zero, e32, m2, ta, ma -; NOVLOPT-NEXT: vsext.vf2 v12, v8 -; NOVLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; NOVLOPT-NEXT: vadd.vv v8, v12, v10 -; NOVLOPT-NEXT: ret -; -; VLOPT-LABEL: vsext_vf2: -; VLOPT: # %bb.0: -; VLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; VLOPT-NEXT: vsext.vf2 v12, v8 -; VLOPT-NEXT: vadd.vv v8, v12, v10 -; VLOPT-NEXT: ret +; CHECK-LABEL: vsext_vf2: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; CHECK-NEXT: vsext.vf2 v12, v8 +; CHECK-NEXT: vadd.vv v8, v12, v10 +; CHECK-NEXT: ret %1 = call <vscale x 4 x i32> @llvm.riscv.vsext.nxv4i32.nxv4i16(<vscale x 4 x i32> poison, <vscale x 4 x i16> %a, iXLen -1) %2 = call <vscale x 4 x i32> @llvm.riscv.vadd.nxv4i32.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i32> %1, <vscale x 4 x i32> %b, iXLen %vl) ret <vscale x 4 x i32> %2 } define <vscale x 4 x i32> @vsext_vf4(<vscale x 4 x i8> %a, <vscale x 4 x i32> %b, iXLen %vl) { -; NOVLOPT-LABEL: vsext_vf4: -; NOVLOPT: # %bb.0: -; NOVLOPT-NEXT: vsetvli a1, zero, e32, m2, ta, ma -; NOVLOPT-NEXT: vsext.vf4 v12, v8 -; NOVLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; NOVLOPT-NEXT: vadd.vv v8, v12, v10 -; NOVLOPT-NEXT: ret -; -; VLOPT-LABEL: vsext_vf4: -; VLOPT: # %bb.0: -; VLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; VLOPT-NEXT: vsext.vf4 v12, v8 -; VLOPT-NEXT: vadd.vv v8, v12, v10 -; VLOPT-NEXT: ret +; CHECK-LABEL: vsext_vf4: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; CHECK-NEXT: vsext.vf4 v12, v8 +; CHECK-NEXT: vadd.vv v8, v12, v10 +; CHECK-NEXT: ret %1 = call <vscale x 4 x i32> @llvm.riscv.vsext.nxv4i32.nxv4i8(<vscale x 4 x i32> poison, <vscale x 4 x i8> %a, iXLen -1) %2 = call <vscale x 4 x i32> @llvm.riscv.vadd.nxv4i32.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i32> %1, <vscale x 4 x i32> %b, iXLen %vl) ret <vscale x 4 x i32> %2 } define <vscale x 4 x i64> @vsext_vf8(<vscale x 4 x i8> %a, <vscale x 4 x i64> %b, iXLen %vl) { -; NOVLOPT-LABEL: vsext_vf8: -; NOVLOPT: # %bb.0: -; NOVLOPT-NEXT: vsetvli a1, zero, e64, m4, ta, ma -; NOVLOPT-NEXT: vsext.vf8 v16, v8 -; NOVLOPT-NEXT: vsetvli zero, a0, e64, m4, ta, ma -; NOVLOPT-NEXT: vadd.vv v8, v16, v12 -; NOVLOPT-NEXT: ret -; -; VLOPT-LABEL: vsext_vf8: -; VLOPT: # %bb.0: -; VLOPT-NEXT: vsetvli zero, a0, e64, m4, ta, ma -; VLOPT-NEXT: vsext.vf8 v16, v8 -; VLOPT-NEXT: vadd.vv v8, v16, v12 -; VLOPT-NEXT: ret +; CHECK-LABEL: vsext_vf8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma +; CHECK-NEXT: vsext.vf8 v16, v8 +; CHECK-NEXT: vadd.vv v8, v16, v12 +; CHECK-NEXT: ret %1 = call <vscale x 4 x i64> @llvm.riscv.vsext.nxv4i32.nxv4i8(<vscale x 4 x i64> poison, <vscale x 4 x i8> %a, iXLen -1) %2 = call <vscale x 4 x i64> @llvm.riscv.vadd.nxv4i32.nxv4i32(<vscale x 4 x i64> poison, <vscale x 4 x i64> %1, <vscale x 4 x i64> %b, iXLen %vl) ret <vscale x 4 x i64> %2 } define <vscale x 4 x i32> @vzext_vf2(<vscale x 4 x i16> %a, <vscale x 4 x i32> %b, iXLen %vl) { -; NOVLOPT-LABEL: vzext_vf2: -; NOVLOPT: # %bb.0: -; NOVLOPT-NEXT: vsetvli a1, zero, e32, m2, ta, ma -; NOVLOPT-NEXT: vzext.vf2 v12, v8 -; NOVLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; NOVLOPT-NEXT: vadd.vv v8, v12, v10 -; NOVLOPT-NEXT: ret -; -; VLOPT-LABEL: vzext_vf2: -; VLOPT: # %bb.0: -; VLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; VLOPT-NEXT: vzext.vf2 v12, v8 -; VLOPT-NEXT: vadd.vv v8, v12, v10 -; VLOPT-NEXT: ret +; CHECK-LABEL: vzext_vf2: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; CHECK-NEXT: vzext.vf2 v12, v8 +; CHECK-NEXT: vadd.vv v8, v12, v10 +; CHECK-NEXT: ret %1 = call <vscale x 4 x i32> @llvm.riscv.vzext.nxv4i32.nxv4i16(<vscale x 4 x i32> poison, <vscale x 4 x i16> %a, iXLen -1) %2 = call <vscale x 4 x i32> @llvm.riscv.vadd.nxv4i32.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i32> %1, <vscale x 4 x i32> %b, iXLen %vl) ret <vscale x 4 x i32> %2 } define <vscale x 4 x i32> @vzext_vf4(<vscale x 4 x i8> %a, <vscale x 4 x i32> %b, iXLen %vl) { -; NOVLOPT-LABEL: vzext_vf4: -; NOVLOPT: # %bb.0: -; NOVLOPT-NEXT: vsetvli a1, zero, e32, m2, ta, ma -; NOVLOPT-NEXT: vzext.vf4 v12, v8 -; NOVLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; NOVLOPT-NEXT: vadd.vv v8, v12, v10 -; NOVLOPT-NEXT: ret -; -; VLOPT-LABEL: vzext_vf4: -; VLOPT: # %bb.0: -; VLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; VLOPT-NEXT: vzext.vf4 v12, v8 -; VLOPT-NEXT: vadd.vv v8, v12, v10 -; VLOPT-NEXT: ret +; CHECK-LABEL: vzext_vf4: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; CHECK-NEXT: vzext.vf4 v12, v8 +; CHECK-NEXT: vadd.vv v8, v12, v10 +; CHECK-NEXT: ret %1 = call <vscale x 4 x i32> @llvm.riscv.vzext.nxv4i32.nxv4i8(<vscale x 4 x i32> poison, <vscale x 4 x i8> %a, iXLen -1) %2 = call <vscale x 4 x i32> @llvm.riscv.vadd.nxv4i32.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i32> %1, <vscale x 4 x i32> %b, iXLen %vl) ret <vscale x 4 x i32> %2 } define <vscale x 4 x i64> @vzext_vf8(<vscale x 4 x i8> %a, <vscale x 4 x i64> %b, iXLen %vl) { -; NOVLOPT-LABEL: vzext_vf8: -; NOVLOPT: # %bb.0: -; NOVLOPT-NEXT: vsetvli a1, zero, e64, m4, ta, ma -; NOVLOPT-NEXT: vzext.vf8 v16, v8 -; NOVLOPT-NEXT: vsetvli zero, a0, e64, m4, ta, ma -; NOVLOPT-NEXT: vadd.vv v8, v16, v12 -; NOVLOPT-NEXT: ret -; -; VLOPT-LABEL: vzext_vf8: -; VLOPT: # %bb.0: -; VLOPT-NEXT: vsetvli zero, a0, e64, m4, ta, ma -; VLOPT-NEXT: vzext.vf8 v16, v8 -; VLOPT-NEXT: vadd.vv v8, v16, v12 -; VLOPT-NEXT: ret +; CHECK-LABEL: vzext_vf8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma +; CHECK-NEXT: vzext.vf8 v16, v8 +; CHECK-NEXT: vadd.vv v8, v16, v12 +; CHECK-NEXT: ret %1 = call <vscale x 4 x i64> @llvm.riscv.vzext.nxv4i32.nxv4i8(<vscale x 4 x i64> poison, <vscale x 4 x i8> %a, iXLen -1) %2 = call <vscale x 4 x i64> @llvm.riscv.vadd.nxv4i32.nxv4i32(<vscale x 4 x i64> poison, <vscale x 4 x i64> %1, <vscale x 4 x i64> %b, iXLen %vl) ret <vscale x 4 x i64> %2 } define <vscale x 4 x i1> @vmadc_vi(<vscale x 4 x i32> %a, <vscale x 4 x i1> %b, iXLen %vl) { -; NOVLOPT-LABEL: vmadc_vi: -; NOVLOPT: # %bb.0: -; NOVLOPT-NEXT: vsetvli a1, zero, e32, m2, ta, ma -; NOVLOPT-NEXT: vmadc.vi v10, v8, 5 -; NOVLOPT-NEXT: vsetvli zero, a0, e8, mf2, ta, ma -; NOVLOPT-NEXT: vmand.mm v0, v10, v0 -; NOVLOPT-NEXT: ret -; -; VLOPT-LABEL: vmadc_vi: -; VLOPT: # %bb.0: -; VLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; VLOPT-NEXT: vmadc.vi v10, v8, 5 -; VLOPT-NEXT: vmand.mm v0, v10, v0 -; VLOPT-NEXT: ret +; CHECK-LABEL: vmadc_vi: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; CHECK-NEXT: vmadc.vi v10, v8, 5 +; CHECK-NEXT: vmand.mm v0, v10, v0 +; CHECK-NEXT: ret %1 = call <vscale x 4 x i1> @llvm.riscv.vmadc.nxv4i32.i32(<vscale x 4 x i32> %a, i32 5, iXLen -1) %2 = call <vscale x 4 x i1> @llvm.riscv.vmand.nxv4i1(<vscale x 4 x i1> %1, <vscale x 4 x i1> %b, iXLen %vl) ret <vscale x 4 x i1> %2 } define <vscale x 4 x i1> @vmadc_vx(<vscale x 4 x i32> %a, <vscale x 4 x i1> %b, i32 %c, iXLen %vl) { -; NOVLOPT-LABEL: vmadc_vx: -; NOVLOPT: # %bb.0: -; NOVLOPT-NEXT: vsetvli a2, zero, e32, m2, ta, ma -; NOVLOPT-NEXT: vmadc.vx v10, v8, a0 -; NOVLOPT-NEXT: vsetvli zero, a1, e8, mf2, ta, ma -; NOVLOPT-NEXT: vmand.mm v0, v10, v0 -; NOVLOPT-NEXT: ret -; -; VLOPT-LABEL: vmadc_vx: -; VLOPT: # %bb.0: -; VLOPT-NEXT: vsetvli zero, a1, e32, m2, ta, ma -; VLOPT-NEXT: vmadc.vx v10, v8, a0 -; VLOPT-NEXT: vmand.mm v0, v10, v0 -; VLOPT-NEXT: ret +; CHECK-LABEL: vmadc_vx: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, ma +; CHECK-NEXT: vmadc.vx v10, v8, a0 +; CHECK-NEXT: vmand.mm v0, v10, v0 +; CHECK-NEXT: ret %1 = call <vscale x 4 x i1> @llvm.riscv.vmadc.nxv4i32.i32(<vscale x 4 x i32> %a, i32 %c, iXLen -1) %2 = call <vscale x 4 x i1> @llvm.riscv.vmand.nxv4i1(<vscale x 4 x i1> %1, <vscale x 4 x i1> %b, iXLen %vl) ret <vscale x 4 x i1> %2 } define <vscale x 4 x i1> @vmadc_vv(<vscale x 4 x i32> %a, <vscale x 4 x i1> %b, <vscale x 4 x i32> %c, iXLen %vl) { -; NOVLOPT-LABEL: vmadc_vv: -; NOVLOPT: # %bb.0: -; NOVLOPT-NEXT: vsetvli a1, zero, e32, m2, ta, ma -; NOVLOPT-NEXT: vmadc.vv v12, v8, v10 -; NOVLOPT-NEXT: vsetvli zero, a0, e8, mf2, ta, ma -; NOVLOPT-NEXT: vmand.mm v0, v12, v0 -; NOVLOPT-NEXT: ret -; -; VLOPT-LABEL: vmadc_vv: -; VLOPT: # %bb.0: -; VLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; VLOPT-NEXT: vmadc.vv v12, v8, v10 -; VLOPT-NEXT: vmand.mm v0, v12, v0 -; VLOPT-NEXT: ret +; CHECK-LABEL: vmadc_vv: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; CHECK-NEXT: vmadc.vv v12, v8, v10 +; CHECK-NEXT: vmand.mm v0, v12, v0 +; CHECK-NEXT: ret %1 = call <vscale x 4 x i1> @llvm.riscv.vmadc.nxv4i32.nxv4i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %c, iXLen -1) %2 = call <vscale x 4 x i1> @llvm.riscv.vmand.nxv4i1(<vscale x 4 x i1> %1, <vscale x 4 x i1> %b, iXLen %vl) ret <vscale x 4 x i1> %2 } define <vscale x 4 x i1> @vmadc_vim(<vscale x 4 x i32> %a, <vscale x 4 x i1> %mask, <vscale x 4 x i1> %b, iXLen %vl) { -; NOVLOPT-LABEL: vmadc_vim: -; NOVLOPT: # %bb.0: -; NOVLOPT-NEXT: vsetvli a1, zero, e32, m2, ta, ma -; NOVLOPT-NEXT: vmadc.vim v11, v8, 5, v0 -; NOVLOPT-NEXT: vsetvli zero, a0, e8, mf2, ta, ma -; NOVLOPT-NEXT: vmand.mm v0, v11, v10 -; NOVLOPT-NEXT: ret -; -; VLOPT-LABEL: vmadc_vim: -; VLOPT: # %bb.0: -; VLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; VLOPT-NEXT: vmadc.vim v11, v8, 5, v0 -; VLOPT-NEXT: vmand.mm v0, v11, v10 -; VLOPT-NEXT: ret +; CHECK-LABEL: vmadc_vim: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; CHECK-NEXT: vmadc.vim v11, v8, 5, v0 +; CHECK-NEXT: vmand.mm v0, v11, v10 +; CHECK-NEXT: ret %1 = call <vscale x 4 x i1> @llvm.riscv.vmadc.carry.in.nxv4i32.i32(<vscale x 4 x i32> %a, i32 5, <vscale x 4 x i1> %mask, iXLen -1) %2 = call <vscale x 4 x i1> @llvm.riscv.vmand.nxv4i1(<vscale x 4 x i1> %1, <vscale x 4 x i1> %b, iXLen %vl) ret <vscale x 4 x i1> %2 } define <vscale x 4 x i1> @vmadc_vxm(<vscale x 4 x i32> %a, <vscale x 4 x i1> %mask, <vscale x 4 x i1> %b, i32 %c, iXLen %vl) { -; NOVLOPT-LABEL: vmadc_vxm: -; NOVLOPT: # %bb.0: -; NOVLOPT-NEXT: vsetvli a2, zero, e32, m2, ta, ma -; NOVLOPT-NEXT: vmadc.vxm v11, v8, a0, v0 -; NOVLOPT-NEXT: vsetvli zero, a1, e8, mf2, ta, ma -; NOVLOPT-NEXT: vmand.mm v0, v11, v10 -; NOVLOPT-NEXT: ret -; -; VLOPT-LABEL: vmadc_vxm: -; VLOPT: # %bb.0: -; VLOPT-NEXT: vsetvli zero, a1, e32, m2, ta, ma -; VLOPT-NEXT: vmadc.vxm v11, v8, a0, v0 -; VLOPT-NEXT: vmand.mm v0, v11, v10 -; VLOPT-NEXT: ret +; CHECK-LABEL: vmadc_vxm: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, ma +; CHECK-NEXT: vmadc.vxm v11, v8, a0, v0 +; CHECK-NEXT: vmand.mm v0, v11, v10 +; CHECK-NEXT: ret %1 = call <vscale x 4 x i1> @llvm.riscv.vmadc.carry.in.nxv4i32.i32(<vscale x 4 x i32> %a, i32 %c, <vscale x 4 x i1> %mask, iXLen -1) %2 = call <vscale x 4 x i1> @llvm.riscv.vmand.nxv4i1(<vscale x 4 x i1> %1, <vscale x 4 x i1> %b, iXLen %vl) ret <vscale x 4 x i1> %2 } define <vscale x 4 x i1> @vmadc_vvm(<vscale x 4 x i32> %a, <vscale x 4 x i1> %mask, <vscale x 4 x i1> %b, <vscale x 4 x i32> %c, iXLen %vl) { -; NOVLOPT-LABEL: vmadc_vvm: -; NOVLOPT: # %bb.0: -; NOVLOPT-NEXT: vsetvli a1, zero, e32, m2, ta, ma -; NOVLOPT-NEXT: vmadc.vvm v11, v8, v12, v0 -; NOVLOPT-NEXT: vsetvli zero, a0, e8, mf2, ta, ma -; NOVLOPT-NEXT: vmand.mm v0, v11, v10 -; NOVLOPT-NEXT: ret -; -; VLOPT-LABEL: vmadc_vvm: -; VLOPT: # %bb.0: -; VLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; VLOPT-NEXT: vmadc.vvm v11, v8, v12, v0 -; VLOPT-NEXT: vmand.mm v0, v11, v10 -; VLOPT-NEXT: ret +; CHECK-LABEL: vmadc_vvm: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; CHECK-NEXT: vmadc.vvm v11, v8, v12, v0 +; CHECK-NEXT: vmand.mm v0, v11, v10 +; CHECK-NEXT: ret %1 = call <vscale x 4 x i1> @llvm.riscv.vmadc.carry.in.nxv4i32.nxv4i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %c, <vscale x 4 x i1> %mask, iXLen -1) %2 = call <vscale x 4 x i1> @llvm.riscv.vmand.nxv4i1(<vscale x 4 x i1> %1, <vscale x 4 x i1> %b, iXLen %vl) ret <vscale x 4 x i1> %2 } define <vscale x 4 x i1> @vmsbc_vvm(<vscale x 4 x i32> %a, <vscale x 4 x i1> %mask, <vscale x 4 x i1> %b, <vscale x 4 x i32> %c, iXLen %vl) { -; NOVLOPT-LABEL: vmsbc_vvm: -; NOVLOPT: # %bb.0: -; NOVLOPT-NEXT: vsetvli a1, zero, e32, m2, ta, ma -; NOVLOPT-NEXT: vmsbc.vvm v11, v8, v12, v0 -; NOVLOPT-NEXT: vsetvli zero, a0, e8, mf2, ta, ma -; NOVLOPT-NEXT: vmand.mm v0, v11, v10 -; NOVLOPT-NEXT: ret -; -; VLOPT-LABEL: vmsbc_vvm: -; VLOPT: # %bb.0: -; VLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; VLOPT-NEXT: vmsbc.vvm v11, v8, v12, v0 -; VLOPT-NEXT: vmand.mm v0, v11, v10 -; VLOPT-NEXT: ret +; CHECK-LABEL: vmsbc_vvm: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; CHECK-NEXT: vmsbc.vvm v11, v8, v12, v0 +; CHECK-NEXT: vmand.mm v0, v11, v10 +; CHECK-NEXT: ret %1 = call <vscale x 4 x i1> @llvm.riscv.vmsbc.borrow.in.nxv4i32.nxv4i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %c, <vscale x 4 x i1> %mask, iXLen -1) %2 = call <vscale x 4 x i1> @llvm.riscv.vmand.nxv4i1(<vscale x 4 x i1> %1, <vscale x 4 x i1> %b, iXLen %vl) ret <vscale x 4 x i1> %2 } define <vscale x 4 x i1> @vmsbc_vxm(<vscale x 4 x i32> %a, <vscale x 4 x i1> %mask, <vscale x 4 x i1> %b, i32 %c, iXLen %vl) { -; NOVLOPT-LABEL: vmsbc_vxm: -; NOVLOPT: # %bb.0: -; NOVLOPT-NEXT: vsetvli a2, zero, e32, m2, ta, ma -; NOVLOPT-NEXT: vmsbc.vxm v11, v8, a0, v0 -; NOVLOPT-NEXT: vsetvli zero, a1, e8, mf2, ta, ma -; NOVLOPT-NEXT: vmand.mm v0, v11, v10 -; NOVLOPT-NEXT: ret -; -; VLOPT-LABEL: vmsbc_vxm: -; VLOPT: # %bb.0: -; VLOPT-NEXT: vsetvli zero, a1, e32, m2, ta, ma -; VLOPT-NEXT: vmsbc.vxm v11, v8, a0, v0 -; VLOPT-NEXT: vmand.mm v0, v11, v10 -; VLOPT-NEXT: ret +; CHECK-LABEL: vmsbc_vxm: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, ma +; CHECK-NEXT: vmsbc.vxm v11, v8, a0, v0 +; CHECK-NEXT: vmand.mm v0, v11, v10 +; CHECK-NEXT: ret %1 = call <vscale x 4 x i1> @llvm.riscv.vmsbc.borrow.in.nxv4i32.i32(<vscale x 4 x i32> %a, i32 %c, <vscale x 4 x i1> %mask, iXLen -1) %2 = call <vscale x 4 x i1> @llvm.riscv.vmand.nxv4i1(<vscale x 4 x i1> %1, <vscale x 4 x i1> %b, iXLen %vl) ret <vscale x 4 x i1> %2 } define <vscale x 4 x i1> @vmsbc_vx(<vscale x 4 x i32> %a, <vscale x 4 x i1> %b, i32 %c, iXLen %vl) { -; NOVLOPT-LABEL: vmsbc_vx: -; NOVLOPT: # %bb.0: -; NOVLOPT-NEXT: vsetvli a2, zero, e32, m2, ta, ma -; NOVLOPT-NEXT: vmsbc.vx v10, v8, a0 -; NOVLOPT-NEXT: vsetvli zero, a1, e8, mf2, ta, ma -; NOVLOPT-NEXT: vmand.mm v0, v10, v0 -; NOVLOPT-NEXT: ret -; -; VLOPT-LABEL: vmsbc_vx: -; VLOPT: # %bb.0: -; VLOPT-NEXT: vsetvli zero, a1, e32, m2, ta, ma -; VLOPT-NEXT: vmsbc.vx v10, v8, a0 -; VLOPT-NEXT: vmand.mm v0, v10, v0 -; VLOPT-NEXT: ret +; CHECK-LABEL: vmsbc_vx: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, ma +; CHECK-NEXT: vmsbc.vx v10, v8, a0 +; CHECK-NEXT: vmand.mm v0, v10, v0 +; CHECK-NEXT: ret %1 = call <vscale x 4 x i1> @llvm.riscv.vmsbc.nxv4i32.i32(<vscale x 4 x i32> %a, i32 %c, iXLen -1) %2 = call <vscale x 4 x i1> @llvm.riscv.vmand.nxv4i1(<vscale x 4 x i1> %1, <vscale x 4 x i1> %b, iXLen %vl) ret <vscale x 4 x i1> %2 } define <vscale x 4 x i1> @vmsbc_vv(<vscale x 4 x i32> %a, <vscale x 4 x i1> %b, <vscale x 4 x i32> %c, iXLen %vl) { -; NOVLOPT-LABEL: vmsbc_vv: -; NOVLOPT: # %bb.0: -; NOVLOPT-NEXT: vsetvli a1, zero, e32, m2, ta, ma -; NOVLOPT-NEXT: vmsbc.vv v12, v8, v10 -; NOVLOPT-NEXT: vsetvli zero, a0, e8, mf2, ta, ma -; NOVLOPT-NEXT: vmand.mm v0, v12, v0 -; NOVLOPT-NEXT: ret -; -; VLOPT-LABEL: vmsbc_vv: -; VLOPT: # %bb.0: -; VLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; VLOPT-NEXT: vmsbc.vv v12, v8, v10 -; VLOPT-NEXT: vmand.mm v0, v12, v0 -; VLOPT-NEXT: ret +; CHECK-LABEL: vmsbc_vv: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; CHECK-NEXT: vmsbc.vv v12, v8, v10 +; CHECK-NEXT: vmand.mm v0, v12, v0 +; CHECK-NEXT: ret %1 = call <vscale x 4 x i1> @llvm.riscv.vmsbc.nxv4i32.nxv4i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %c, iXLen -1) %2 = call <vscale x 4 x i1> @llvm.riscv.vmand.nxv4i1(<vscale x 4 x i1> %1, <vscale x 4 x i1> %b, iXLen %vl) ret <vscale x 4 x i1> %2 } define <vscale x 4 x i16> @vnsrl_wi(<vscale x 4 x i32> %a, <vscale x 4 x i16> %b, iXLen %vl) { -; NOVLOPT-LABEL: vnsrl_wi: -; NOVLOPT: # %bb.0: -; NOVLOPT-NEXT: vsetvli a1, zero, e16, m1, ta, ma -; NOVLOPT-NEXT: vnsrl.wi v11, v8, 5 -; NOVLOPT-NEXT: vsetvli zero, a0, e16, m1, ta, ma -; NOVLOPT-NEXT: vadd.vv v8, v11, v10 -; NOVLOPT-NEXT: ret -; -; VLOPT-LABEL: vnsrl_wi: -; VLOPT: # %bb.0: -; VLOPT-NEXT: vsetvli zero, a0, e16, m1, ta, ma -; VLOPT-NEXT: vnsrl.wi v11, v8, 5 -; VLOPT-NEXT: vadd.vv v8, v11, v10 -; VLOPT-NEXT: ret +; CHECK-LABEL: vnsrl_wi: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma +; CHECK-NEXT: vnsrl.wi v11, v8, 5 +; CHECK-NEXT: vadd.vv v8, v11, v10 +; CHECK-NEXT: ret %1 = call <vscale x 4 x i16> @llvm.riscv.vnsrl.nxv4i16.nxv4i32(<vscale x 4 x i16> poison, <vscale x 4 x i32> %a, iXLen 5, iXLen -1) %2 = call <vscale x 4 x i16> @llvm.riscv.vadd.nxv4i16.nxv4i16(<vscale x 4 x i16> poison, <vscale x 4 x i16> %1, <vscale x 4 x i16> %b, iXLen %vl) ret <vscale x 4 x i16> %2 } define <vscale x 4 x i16> @vnsrl_wx(<vscale x 4 x i32> %a, <vscale x 4 x i16> %b, iXLen %c, iXLen %vl) { -; NOVLOPT-LABEL: vnsrl_wx: -; NOVLOPT: # %bb.0: -; NOVLOPT-NEXT: vsetvli a2, zero, e16, m1, ta, ma -; NOVLOPT-NEXT: vnsrl.wx v11, v8, a0 -; NOVLOPT-NEXT: vsetvli zero, a1, e16, m1, ta, ma -; NOVLOPT-NEXT: vadd.vv v8, v11, v10 -; NOVLOPT-NEXT: ret -; -; VLOPT-LABEL: vnsrl_wx: -; VLOPT: # %bb.0: -; VLOPT-NEXT: vsetvli zero, a1, e16, m1, ta, ma -; VLOPT-NEXT: vnsrl.wx v11, v8, a0 -; VLOPT-NEXT: vadd.vv v8, v11, v10 -; VLOPT-NEXT: ret +; CHECK-LABEL: vnsrl_wx: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma +; CHECK-NEXT: vnsrl.wx v11, v8, a0 +; CHECK-NEXT: vadd.vv v8, v11, v10 +; CHECK-NEXT: ret %1 = call <vscale x 4 x i16> @llvm.riscv.vnsrl.nxv4i16.nxv4i32(<vscale x 4 x i16> poison, <vscale x 4 x i32> %a, iXLen %c, iXLen -1) %2 = call <vscale x 4 x i16> @llvm.riscv.vadd.nxv4i16.nxv4i16(<vscale x 4 x i16> poison, <vscale x 4 x i16> %1, <vscale x 4 x i16> %b, iXLen %vl) ret <vscale x 4 x i16> %2 } define <vscale x 4 x i16> @vnsrl_wv(<vscale x 4 x i32> %a, <vscale x 4 x i16> %b, <vscale x 4 x i16> %c, iXLen %vl) { -; NOVLOPT-LABEL: vnsrl_wv: -; NOVLOPT: # %bb.0: -; NOVLOPT-NEXT: vsetvli a1, zero, e16, m1, ta, ma -; NOVLOPT-NEXT: vnsrl.wv v12, v8, v11 -; NOVLOPT-NEXT: vsetvli zero, a0, e16, m1, ta, ma -; NOVLOPT-NEXT: vadd.vv v8, v12, v10 -; NOVLOPT-NEXT: ret -; -; VLOPT-LABEL: vnsrl_wv: -; VLOPT: # %bb.0: -; VLOPT-NEXT: vsetvli zero, a0, e16, m1, ta, ma -; VLOPT-NEXT: vnsrl.wv v12, v8, v11 -; VLOPT-NEXT: vadd.vv v8, v12, v10 -; VLOPT-NEXT: ret +; CHECK-LABEL: vnsrl_wv: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma +; CHECK-NEXT: vnsrl.wv v12, v8, v11 +; CHECK-NEXT: vadd.vv v8, v12, v10 +; CHECK-NEXT: ret %1 = call <vscale x 4 x i16> @llvm.riscv.vnsrl.nxv4i16.nxv4i32.nxv4i16(<vscale x 4 x i16> poison, <vscale x 4 x i32> %a, <vscale x 4 x i16> %c, iXLen -1) %2 = call <vscale x 4 x i16> @llvm.riscv.vadd.nxv4i16.nxv4i16(<vscale x 4 x i16> poison, <vscale x 4 x i16> %1, <vscale x 4 x i16> %b, iXLen %vl) ret <vscale x 4 x i16> %2 } define <vscale x 4 x i16> @vnsra_wi(<vscale x 4 x i32> %a, <vscale x 4 x i16> %b, iXLen %vl) { -; NOVLOPT-LABEL: vnsra_wi: -; NOVLOPT: # %bb.0: -; NOVLOPT-NEXT: vsetvli a1, zero, e16, m1, ta, ma -; NOVLOPT-NEXT: vnsra.wi v11, v8, 5 -; NOVLOPT-NEXT: vsetvli zero, a0, e16, m1, ta, ma -; NOVLOPT-NEXT: vadd.vv v8, v11, v10 -; NOVLOPT-NEXT: ret -; -; VLOPT-LABEL: vnsra_wi: -; VLOPT: # %bb.0: -; VLOPT-NEXT: vsetvli zero, a0, e16, m1, ta, ma -; VLOPT-NEXT: vnsra.wi v11, v8, 5 -; VLOPT-NEXT: vadd.vv v8, v11, v10 -; VLOPT-NEXT: ret +; CHECK-LABEL: vnsra_wi: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma +; CHECK-NEXT: vnsra.wi v11, v8, 5 +; CHECK-NEXT: vadd.vv v8, v11, v10 +; CHECK-NEXT: ret %1 = call <vscale x 4 x i16> @llvm.riscv.vnsra.nxv4i16.nxv4i32(<vscale x 4 x i16> poison, <vscale x 4 x i32> %a, iXLen 5, iXLen -1) %2 = call <vscale x 4 x i16> @llvm.riscv.vadd.nxv4i16.nxv4i16(<vscale x 4 x i16> poison, <vscale x 4 x i16> %1, <vscale x 4 x i16> %b, iXLen %vl) ret <vscale x 4 x i16> %2 } define <vscale x 4 x i16> @vnsra_wx(<vscale x 4 x i32> %a, <vscale x 4 x i16> %b, iXLen %c, iXLen %vl) { -; NOVLOPT-LABEL: vnsra_wx: -; NOVLOPT: # %bb.0: -; NOVLOPT-NEXT: vsetvli a2, zero, e16, m1, ta, ma -; NOVLOPT-NEXT: vnsra.wx v11, v8, a0 -; NOVLOPT-NEXT: vsetvli zero, a1, e16, m1, ta, ma -; NOVLOPT-NEXT: vadd.vv v8, v11, v10 -; NOVLOPT-NEXT: ret -; -; VLOPT-LABEL: vnsra_wx: -; VLOPT: # %bb.0: -; VLOPT-NEXT: vsetvli zero, a1, e16, m1, ta, ma -; VLOPT-NEXT: vnsra.wx v11, v8, a0 -; VLOPT-NEXT: vadd.vv v8, v11, v10 -; VLOPT-NEXT: ret +; CHECK-LABEL: vnsra_wx: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma +; CHECK-NEXT: vnsra.wx v11, v8, a0 +; CHECK-NEXT: vadd.vv v8, v11, v10 +; CHECK-NEXT: ret %1 = call <vscale x 4 x i16> @llvm.riscv.vnsra.nxv4i16.nxv4i32(<vscale x 4 x i16> poison, <vscale x 4 x i32> %a, iXLen %c, iXLen -1) %2 = call <vscale x 4 x i16> @llvm.riscv.vadd.nxv4i16.nxv4i16(<vscale x 4 x i16> poison, <vscale x 4 x i16> %1, <vscale x 4 x i16> %b, iXLen %vl) ret <vscale x 4 x i16> %2 } define <vscale x 4 x i16> @vnsra_wv(<vscale x 4 x i32> %a, <vscale x 4 x i16> %b, <vscale x 4 x i16> %c, iXLen %vl) { -; NOVLOPT-LABEL: vnsra_wv: -; NOVLOPT: # %bb.0: -; NOVLOPT-NEXT: vsetvli a1, zero, e16, m1, ta, ma -; NOVLOPT-NEXT: vnsra.wv v12, v8, v11 -; NOVLOPT-NEXT: vsetvli zero, a0, e16, m1, ta, ma -; NOVLOPT-NEXT: vadd.vv v8, v12, v10 -; NOVLOPT-NEXT: ret -; -; VLOPT-LABEL: vnsra_wv: -; VLOPT: # %bb.0: -; VLOPT-NEXT: vsetvli zero, a0, e16, m1, ta, ma -; VLOPT-NEXT: vnsra.wv v12, v8, v11 -; VLOPT-NEXT: vadd.vv v8, v12, v10 -; VLOPT-NEXT: ret +; CHECK-LABEL: vnsra_wv: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma +; CHECK-NEXT: vnsra.wv v12, v8, v11 +; CHECK-NEXT: vadd.vv v8, v12, v10 +; CHECK-NEXT: ret %1 = call <vscale x 4 x i16> @llvm.riscv.vnsra.nxv4i16.nxv4i32.nxv4i16(<vscale x 4 x i16> poison, <vscale x 4 x i32> %a, <vscale x 4 x i16> %c, iXLen -1) %2 = call <vscale x 4 x i16> @llvm.riscv.vadd.nxv4i16.nxv4i16(<vscale x 4 x i16> poison, <vscale x 4 x i16> %1, <vscale x 4 x i16> %b, iXLen %vl) ret <vscale x 4 x i16> %2 } define <vscale x 4 x i1> @vmseq_vi(<vscale x 4 x i32> %a, <vscale x 4 x i1> %b, iXLen %vl) { -; NOVLOPT-LABEL: vmseq_vi: -; NOVLOPT: # %bb.0: -; NOVLOPT-NEXT: vsetvli a1, zero, e32, m2, ta, ma -; NOVLOPT-NEXT: vmseq.vi v10, v8, 5 -; NOVLOPT-NEXT: vsetvli zero, a0, e8, mf2, ta, ma -; NOVLOPT-NEXT: vmand.mm v0, v10, v0 -; NOVLOPT-NEXT: ret -; -; VLOPT-LABEL: vmseq_vi: -; VLOPT: # %bb.0: -; VLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; VLOPT-NEXT: vmseq.vi v10, v8, 5 -; VLOPT-NEXT: vmand.mm v0, v10, v0 -; VLOPT-NEXT: ret +; CHECK-LABEL: vmseq_vi: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; CHECK-NEXT: vmseq.vi v10, v8, 5 +; CHECK-NEXT: vmand.mm v0, v10, v0 +; CHECK-NEXT: ret %1 = call <vscale x 4 x i1> @llvm.riscv.vmseq.nxv4i32.i32(<vscale x 4 x i32> %a, i32 5, iXLen -1) %2 = call <vscale x 4 x i1> @llvm.riscv.vmand.nxv4i1(<vscale x 4 x i1> %1, <vscale x 4 x i1> %b, iXLen %vl) ret <vscale x 4 x i1> %2 } define <vscale x 4 x i1> @vmseq_vx(<vscale x 4 x i32> %a, <vscale x 4 x i1> %b, i32 %c, iXLen %vl) { -; NOVLOPT-LABEL: vmseq_vx: -; NOVLOPT: # %bb.0: -; NOVLOPT-NEXT: vsetvli a2, zero, e32, m2, ta, ma -; NOVLOPT-NEXT: vmseq.vx v10, v8, a0 -; NOVLOPT-NEXT: vsetvli zero, a1, e8, mf2, ta, ma -; NOVLOPT-NEXT: vmand.mm v0, v10, v0 -; NOVLOPT-NEXT: ret -; -; VLOPT-LABEL: vmseq_vx: -; VLOPT: # %bb.0: -; VLOPT-NEXT: vsetvli zero, a1, e32, m2, ta, ma -; VLOPT-NEXT: vmseq.vx v10, v8, a0 -; VLOPT-NEXT: vmand.mm v0, v10, v0 -; VLOPT-NEXT: ret +; CHECK-LABEL: vmseq_vx: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, ma +; CHECK-NEXT: vmseq.vx v10, v8, a0 +; CHECK-NEXT: vmand.mm v0, v10, v0 +; CHECK-NEXT: ret %1 = call <vscale x 4 x i1> @llvm.riscv.vmseq.nxv4i32.i32(<vscale x 4 x i32> %a, i32 %c, iXLen -1) %2 = call <vscale x 4 x i1> @llvm.riscv.vmand.nxv4i1(<vscale x 4 x i1> %1, <vscale x 4 x i1> %b, iXLen %vl) ret <vscale x 4 x i1> %2 } define <vscale x 4 x i1> @vmseq_vv(<vscale x 4 x i32> %a, <vscale x 4 x i1> %b, <vscale x 4 x i32> %c, iXLen %vl) { -; NOVLOPT-LABEL: vmseq_vv: -; NOVLOPT: # %bb.0: -; NOVLOPT-NEXT: vsetvli a1, zero, e32, m2, ta, ma -; NOVLOPT-NEXT: vmseq.vv v12, v8, v10 -; NOVLOPT-NEXT: vsetvli zero, a0, e8, mf2, ta, ma -; NOVLOPT-NEXT: vmand.mm v0, v12, v0 -; NOVLOPT-NEXT: ret -; -; VLOPT-LABEL: vmseq_vv: -; VLOPT: # %bb.0: -; VLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; VLOPT-NEXT: vmseq.vv v12, v8, v10 -; VLOPT-NEXT: vmand.mm v0, v12, v0 -; VLOPT-NEXT: ret +; CHECK-LABEL: vmseq_vv: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; CHECK-NEXT: vmseq.vv v12, v8, v10 +; CHECK-NEXT: vmand.mm v0, v12, v0 +; CHECK-NEXT: ret %1 = call <vscale x 4 x i1> @llvm.riscv.vmseq.nxv4i32.nxv4i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %c, iXLen -1) %2 = call <vscale x 4 x i1> @llvm.riscv.vmand.nxv4i1(<vscale x 4 x i1> %1, <vscale x 4 x i1> %b, iXLen %vl) ret <vscale x 4 x i1> %2 } define <vscale x 4 x i1> @vmsne_vi(<vscale x 4 x i32> %a, <vscale x 4 x i1> %b, iXLen %vl) { -; NOVLOPT-LABEL: vmsne_vi: -; NOVLOPT: # %bb.0: -; NOVLOPT-NEXT: vsetvli a1, zero, e32, m2, ta, ma -; NOVLOPT-NEXT: vmsne.vi v10, v8, 5 -; NOVLOPT-NEXT: vsetvli zero, a0, e8, mf2, ta, ma -; NOVLOPT-NEXT: vmand.mm v0, v10, v0 -; NOVLOPT-NEXT: ret -; -; VLOPT-LABEL: vmsne_vi: -; VLOPT: # %bb.0: -; VLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; VLOPT-NEXT: vmsne.vi v10, v8, 5 -; VLOPT-NEXT: vmand.mm v0, v10, v0 -; VLOPT-NEXT: ret +; CHECK-LABEL: vmsne_vi: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; CHECK-NEXT: vmsne.vi v10, v8, 5 +; CHECK-NEXT: vmand.mm v0, v10, v0 +; CHECK-NEXT: ret %1 = call <vscale x 4 x i1> @llvm.riscv.vmsne.nxv4i32.i32(<vscale x 4 x i32> %a, i32 5, iXLen -1) %2 = call <vscale x 4 x i1> @llvm.riscv.vmand.nxv4i1(<vscale x 4 x i1> %1, <vscale x 4 x i1> %b, iXLen %vl) ret <vscale x 4 x i1> %2 } define <vscale x 4 x i1> @vmsne_vx(<vscale x 4 x i32> %a, <vscale x 4 x i1> %b, i32 %c, iXLen %vl) { -; NOVLOPT-LABEL: vmsne_vx: -; NOVLOPT: # %bb.0: -; NOVLOPT-NEXT: vsetvli a2, zero, e32, m2, ta, ma -; NOVLOPT-NEXT: vmsne.vx v10, v8, a0 -; NOVLOPT-NEXT: vsetvli zero, a1, e8, mf2, ta, ma -; NOVLOPT-NEXT: vmand.mm v0, v10, v0 -; NOVLOPT-NEXT: ret -; -; VLOPT-LABEL: vmsne_vx: -; VLOPT: # %bb.0: -; VLOPT-NEXT: vsetvli zero, a1, e32, m2, ta, ma -; VLOPT-NEXT: vmsne.vx v10, v8, a0 -; VLOPT-NEXT: vmand.mm v0, v10, v0 -; VLOPT-NEXT: ret +; CHECK-LABEL: vmsne_vx: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, ma +; CHECK-NEXT: vmsne.vx v10, v8, a0 +; CHECK-NEXT: vmand.mm v0, v10, v0 +; CHECK-NEXT: ret %1 = call <vscale x 4 x i1> @llvm.riscv.vmsne.nxv4i32.i32(<vscale x 4 x i32> %a, i32 %c, iXLen -1) %2 = call <vscale x 4 x i1> @llvm.riscv.vmand.nxv4i1(<vscale x 4 x i1> %1, <vscale x 4 x i1> %b, iXLen %vl) ret <vscale x 4 x i1> %2 } define <vscale x 4 x i1> @vmsne_vv(<vscale x 4 x i32> %a, <vscale x 4 x i1> %b, <vscale x 4 x i32> %c, iXLen %vl) { -; NOVLOPT-LABEL: vmsne_vv: -; NOVLOPT: # %bb.0: -; NOVLOPT-NEXT: vsetvli a1, zero, e32, m2, ta, ma -; NOVLOPT-NEXT: vmsne.vv v12, v8, v10 -; NOVLOPT-NEXT: vsetvli zero, a0, e8, mf2, ta, ma -; NOVLOPT-NEXT: vmand.mm v0, v12, v0 -; NOVLOPT-NEXT: ret -; -; VLOPT-LABEL: vmsne_vv: -; VLOPT: # %bb.0: -; VLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; VLOPT-NEXT: vmsne.vv v12, v8, v10 -; VLOPT-NEXT: vmand.mm v0, v12, v0 -; VLOPT-NEXT: ret +; CHECK-LABEL: vmsne_vv: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; CHECK-NEXT: vmsne.vv v12, v8, v10 +; CHECK-NEXT: vmand.mm v0, v12, v0 +; CHECK-NEXT: ret %1 = call <vscale x 4 x i1> @llvm.riscv.vmsne.nxv4i32.nxv4i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %c, iXLen -1) %2 = call <vscale x 4 x i1> @llvm.riscv.vmand.nxv4i1(<vscale x 4 x i1> %1, <vscale x 4 x i1> %b, iXLen %vl) ret <vscale x 4 x i1> %2 } define <vscale x 4 x i1> @vmsltu_vx(<vscale x 4 x i32> %a, <vscale x 4 x i1> %b, i32 %c, iXLen %vl) { -; NOVLOPT-LABEL: vmsltu_vx: -; NOVLOPT: # %bb.0: -; NOVLOPT-NEXT: vsetvli a2, zero, e32, m2, ta, ma -; NOVLOPT-NEXT: vmsltu.vx v10, v8, a0 -; NOVLOPT-NEXT: vsetvli zero, a1, e8, mf2, ta, ma -; NOVLOPT-NEXT: vmand.mm v0, v10, v0 -; NOVLOPT-NEXT: ret -; -; VLOPT-LABEL: vmsltu_vx: -; VLOPT: # %bb.0: -; VLOPT-NEXT: vsetvli zero, a1, e32, m2, ta, ma -; VLOPT-NEXT: vmsltu.vx v10, v8, a0 -; VLOPT-NEXT: vmand.mm v0, v10, v0 -; VLOPT-NEXT: ret +; CHECK-LABEL: vmsltu_vx: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, ma +; CHECK-NEXT: vmsltu.vx v10, v8, a0 +; CHECK-NEXT: vmand.mm v0, v10, v0 +; CHECK-NEXT: ret %1 = call <vscale x 4 x i1> @llvm.riscv.vmsltu.nxv4i32.i32(<vscale x 4 x i32> %a, i32 %c, iXLen -1) %2 = call <vscale x 4 x i1> @llvm.riscv.vmand.nxv4i1(<vscale x 4 x i1> %1, <vscale x 4 x i1> %b, iXLen %vl) ret <vscale x 4 x i1> %2 } define <vscale x 4 x i1> @vmsltu_vv(<vscale x 4 x i32> %a, <vscale x 4 x i1> %b, <vscale x 4 x i32> %c, iXLen %vl) { -; NOVLOPT-LABEL: vmsltu_vv: -; NOVLOPT: # %bb.0: -; NOVLOPT-NEXT: vsetvli a1, zero, e32, m2, ta, ma -; NOVLOPT-NEXT: vmsltu.vv v12, v8, v10 -; NOVLOPT-NEXT: vsetvli zero, a0, e8, mf2, ta, ma -; NOVLOPT-NEXT: vmand.mm v0, v12, v0 -; NOVLOPT-NEXT: ret -; -; VLOPT-LABEL: vmsltu_vv: -; VLOPT: # %bb.0: -; VLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; VLOPT-NEXT: vmsltu.vv v12, v8, v10 -; VLOPT-NEXT: vmand.mm v0, v12, v0 -; VLOPT-NEXT: ret +; CHECK-LABEL: vmsltu_vv: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; CHECK-NEXT: vmsltu.vv v12, v8, v10 +; CHECK-NEXT: vmand.mm v0, v12, v0 +; CHECK-NEXT: ret %1 = call <vscale x 4 x i1> @llvm.riscv.vmsltu.nxv4i32.nxv4i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %c, iXLen -1) %2 = call <vscale x 4 x i1> @llvm.riscv.vmand.nxv4i1(<vscale x 4 x i1> %1, <vscale x 4 x i1> %b, iXLen %vl) ret <vscale x 4 x i1> %2 } define <vscale x 4 x i1> @vmslt_vx(<vscale x 4 x i32> %a, <vscale x 4 x i1> %b, i32 %c, iXLen %vl) { -; NOVLOPT-LABEL: vmslt_vx: -; NOVLOPT: # %bb.0: -; NOVLOPT-NEXT: vsetvli a2, zero, e32, m2, ta, ma -; NOVLOPT-NEXT: vmslt.vx v10, v8, a0 -; NOVLOPT-NEXT: vsetvli zero, a1, e8, mf2, ta, ma -; NOVLOPT-NEXT: vmand.mm v0, v10, v0 -; NOVLOPT-NEXT: ret -; -; VLOPT-LABEL: vmslt_vx: -; VLOPT: # %bb.0: -; VLOPT-NEXT: vsetvli zero, a1, e32, m2, ta, ma -; VLOPT-NEXT: vmslt.vx v10, v8, a0 -; VLOPT-NEXT: vmand.mm v0, v10, v0 -; VLOPT-NEXT: ret +; CHECK-LABEL: vmslt_vx: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, ma +; CHECK-NEXT: vmslt.vx v10, v8, a0 +; CHECK-NEXT: vmand.mm v0, v10, v0 +; CHECK-NEXT: ret %1 = call <vscale x 4 x i1> @llvm.riscv.vmslt.nxv4i32.i32(<vscale x 4 x i32> %a, i32 %c, iXLen -1) %2 = call <vscale x 4 x i1> @llvm.riscv.vmand.nxv4i1(<vscale x 4 x i1> %1, <vscale x 4 x i1> %b, iXLen %vl) ret <vscale x 4 x i1> %2 } define <vscale x 4 x i1> @vmslt_vv(<vscale x 4 x i32> %a, <vscale x 4 x i1> %b, <vscale x 4 x i32> %c, iXLen %vl) { -; NOVLOPT-LABEL: vmslt_vv: -; NOVLOPT: # %bb.0: -; NOVLOPT-NEXT: vsetvli a1, zero, e32, m2, ta, ma -; NOVLOPT-NEXT: vmslt.vv v12, v8, v10 -; NOVLOPT-NEXT: vsetvli zero, a0, e8, mf2, ta, ma -; NOVLOPT-NEXT: vmand.mm v0, v12, v0 -; NOVLOPT-NEXT: ret -; -; VLOPT-LABEL: vmslt_vv: -; VLOPT: # %bb.0: -; VLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; VLOPT-NEXT: vmslt.vv v12, v8, v10 -; VLOPT-NEXT: vmand.mm v0, v12, v0 -; VLOPT-NEXT: ret +; CHECK-LABEL: vmslt_vv: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; CHECK-NEXT: vmslt.vv v12, v8, v10 +; CHECK-NEXT: vmand.mm v0, v12, v0 +; CHECK-NEXT: ret %1 = call <vscale x 4 x i1> @llvm.riscv.vmslt.nxv4i32.nxv4i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %c, iXLen -1) %2 = call <vscale x 4 x i1> @llvm.riscv.vmand.nxv4i1(<vscale x 4 x i1> %1, <vscale x 4 x i1> %b, iXLen %vl) ret <vscale x 4 x i1> %2 } define <vscale x 4 x i1> @vmsleu_vi(<vscale x 4 x i32> %a, <vscale x 4 x i1> %b, iXLen %vl) { -; NOVLOPT-LABEL: vmsleu_vi: -; NOVLOPT: # %bb.0: -; NOVLOPT-NEXT: vsetvli a1, zero, e32, m2, ta, ma -; NOVLOPT-NEXT: vmsleu.vi v10, v8, 5 -; NOVLOPT-NEXT: vsetvli zero, a0, e8, mf2, ta, ma -; NOVLOPT-NEXT: vmand.mm v0, v10, v0 -; NOVLOPT-NEXT: ret -; -; VLOPT-LABEL: vmsleu_vi: -; VLOPT: # %bb.0: -; VLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; VLOPT-NEXT: vmsleu.vi v10, v8, 5 -; VLOPT-NEXT: vmand.mm v0, v10, v0 -; VLOPT-NEXT: ret +; CHECK-LABEL: vmsleu_vi: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; CHECK-NEXT: vmsleu.vi v10, v8, 5 +; CHECK-NEXT: vmand.mm v0, v10, v0 +; CHECK-NEXT: ret %1 = call <vscale x 4 x i1> @llvm.riscv.vmsleu.nxv4i32.i32(<vscale x 4 x i32> %a, i32 5, iXLen -1) %2 = call <vscale x 4 x i1> @llvm.riscv.vmand.nxv4i1(<vscale x 4 x i1> %1, <vscale x 4 x i1> %b, iXLen %vl) ret <vscale x 4 x i1> %2 } define <vscale x 4 x i1> @vmsleu_vx(<vscale x 4 x i32> %a, <vscale x 4 x i1> %b, i32 %c, iXLen %vl) { -; NOVLOPT-LABEL: vmsleu_vx: -; NOVLOPT: # %bb.0: -; NOVLOPT-NEXT: vsetvli a2, zero, e32, m2, ta, ma -; NOVLOPT-NEXT: vmsleu.vx v10, v8, a0 -; NOVLOPT-NEXT: vsetvli zero, a1, e8, mf2, ta, ma -; NOVLOPT-NEXT: vmand.mm v0, v10, v0 -; NOVLOPT-NEXT: ret -; -; VLOPT-LABEL: vmsleu_vx: -; VLOPT: # %bb.0: -; VLOPT-NEXT: vsetvli zero, a1, e32, m2, ta, ma -; VLOPT-NEXT: vmsleu.vx v10, v8, a0 -; VLOPT-NEXT: vmand.mm v0, v10, v0 -; VLOPT-NEXT: ret +; CHECK-LABEL: vmsleu_vx: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, ma +; CHECK-NEXT: vmsleu.vx v10, v8, a0 +; CHECK-NEXT: vmand.mm v0, v10, v0 +; CHECK-NEXT: ret %1 = call <vscale x 4 x i1> @llvm.riscv.vmsleu.nxv4i32.i32(<vscale x 4 x i32> %a, i32 %c, iXLen -1) %2 = call <vscale x 4 x i1> @llvm.riscv.vmand.nxv4i1(<vscale x 4 x i1> %1, <vscale x 4 x i1> %b, iXLen %vl) ret <vscale x 4 x i1> %2 } define <vscale x 4 x i1> @vmsleu_vv(<vscale x 4 x i32> %a, <vscale x 4 x i1> %b, <vscale x 4 x i32> %c, iXLen %vl) { -; NOVLOPT-LABEL: vmsleu_vv: -; NOVLOPT: # %bb.0: -; NOVLOPT-NEXT: vsetvli a1, zero, e32, m2, ta, ma -; NOVLOPT-NEXT: vmsleu.vv v12, v8, v10 -; NOVLOPT-NEXT: vsetvli zero, a0, e8, mf2, ta, ma -; NOVLOPT-NEXT: vmand.mm v0, v12, v0 -; NOVLOPT-NEXT: ret -; -; VLOPT-LABEL: vmsleu_vv: -; VLOPT: # %bb.0: -; VLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; VLOPT-NEXT: vmsleu.vv v12, v8, v10 -; VLOPT-NEXT: vmand.mm v0, v12, v0 -; VLOPT-NEXT: ret +; CHECK-LABEL: vmsleu_vv: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; CHECK-NEXT: vmsleu.vv v12, v8, v10 +; CHECK-NEXT: vmand.mm v0, v12, v0 +; CHECK-NEXT: ret %1 = call <vscale x 4 x i1> @llvm.riscv.vmsleu.nxv4i32.nxv4i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %c, iXLen -1) %2 = call <vscale x 4 x i1> @llvm.riscv.vmand.nxv4i1(<vscale x 4 x i1> %1, <vscale x 4 x i1> %b, iXLen %vl) ret <vscale x 4 x i1> %2 } define <vscale x 4 x i1> @vmsle_vi(<vscale x 4 x i32> %a, <vscale x 4 x i1> %b, iXLen %vl) { -; NOVLOPT-LABEL: vmsle_vi: -; NOVLOPT: # %bb.0: -; NOVLOPT-NEXT: vsetvli a1, zero, e32, m2, ta, ma -; NOVLOPT-NEXT: vmsle.vi v10, v8, 5 -; NOVLOPT-NEXT: vsetvli zero, a0, e8, mf2, ta, ma -; NOVLOPT-NEXT: vmand.mm v0, v10, v0 -; NOVLOPT-NEXT: ret -; -; VLOPT-LABEL: vmsle_vi: -; VLOPT: # %bb.0: -; VLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; VLOPT-NEXT: vmsle.vi v10, v8, 5 -; VLOPT-NEXT: vmand.mm v0, v10, v0 -; VLOPT-NEXT: ret +; CHECK-LABEL: vmsle_vi: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; CHECK-NEXT: vmsle.vi v10, v8, 5 +; CHECK-NEXT: vmand.mm v0, v10, v0 +; CHECK-NEXT: ret %1 = call <vscale x 4 x i1> @llvm.riscv.vmsle.nxv4i32.i32(<vscale x 4 x i32> %a, i32 5, iXLen -1) %2 = call <vscale x 4 x i1> @llvm.riscv.vmand.nxv4i1(<vscale x 4 x i1> %1, <vscale x 4 x i1> %b, iXLen %vl) ret <vscale x 4 x i1> %2 } define <vscale x 4 x i1> @vmsle_vx(<vscale x 4 x i32> %a, <vscale x 4 x i1> %b, i32 %c, iXLen %vl) { -; NOVLOPT-LABEL: vmsle_vx: -; NOVLOPT: # %bb.0: -; NOVLOPT-NEXT: vsetvli a2, zero, e32, m2, ta, ma -; NOVLOPT-NEXT: vmsle.vx v10, v8, a0 -; NOVLOPT-NEXT: vsetvli zero, a1, e8, mf2, ta, ma -; NOVLOPT-NEXT: vmand.mm v0, v10, v0 -; NOVLOPT-NEXT: ret -; -; VLOPT-LABEL: vmsle_vx: -; VLOPT: # %bb.0: -; VLOPT-NEXT: vsetvli zero, a1, e32, m2, ta, ma -; VLOPT-NEXT: vmsle.vx v10, v8, a0 -; VLOPT-NEXT: vmand.mm v0, v10, v0 -; VLOPT-NEXT: ret +; CHECK-LABEL: vmsle_vx: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, ma +; CHECK-NEXT: vmsle.vx v10, v8, a0 +; CHECK-NEXT: vmand.mm v0, v10, v0 +; CHECK-NEXT: ret %1 = call <vscale x 4 x i1> @llvm.riscv.vmsle.nxv4i32.i32(<vscale x 4 x i32> %a, i32 %c, iXLen -1) %2 = call <vscale x 4 x i1> @llvm.riscv.vmand.nxv4i1(<vscale x 4 x i1> %1, <vscale x 4 x i1> %b, iXLen %vl) ret <vscale x 4 x i1> %2 } define <vscale x 4 x i1> @vmsle_vv(<vscale x 4 x i32> %a, <vscale x 4 x i1> %b, <vscale x 4 x i32> %c, iXLen %vl) { -; NOVLOPT-LABEL: vmsle_vv: -; NOVLOPT: # %bb.0: -; NOVLOPT-NEXT: vsetvli a1, zero, e32, m2, ta, ma -; NOVLOPT-NEXT: vmsle.vv v12, v8, v10 -; NOVLOPT-NEXT: vsetvli zero, a0, e8, mf2, ta, ma -; NOVLOPT-NEXT: vmand.mm v0, v12, v0 -; NOVLOPT-NEXT: ret -; -; VLOPT-LABEL: vmsle_vv: -; VLOPT: # %bb.0: -; VLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; VLOPT-NEXT: vmsle.vv v12, v8, v10 -; VLOPT-NEXT: vmand.mm v0, v12, v0 -; VLOPT-NEXT: ret +; CHECK-LABEL: vmsle_vv: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; CHECK-NEXT: vmsle.vv v12, v8, v10 +; CHECK-NEXT: vmand.mm v0, v12, v0 +; CHECK-NEXT: ret %1 = call <vscale x 4 x i1> @llvm.riscv.vmsle.nxv4i32.nxv4i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %c, iXLen -1) %2 = call <vscale x 4 x i1> @llvm.riscv.vmand.nxv4i1(<vscale x 4 x i1> %1, <vscale x 4 x i1> %b, iXLen %vl) ret <vscale x 4 x i1> %2 } define <vscale x 4 x i1> @vmsgtu_vi(<vscale x 4 x i32> %a, <vscale x 4 x i1> %b, iXLen %vl) { -; NOVLOPT-LABEL: vmsgtu_vi: -; NOVLOPT: # %bb.0: -; NOVLOPT-NEXT: vsetvli a1, zero, e32, m2, ta, ma -; NOVLOPT-NEXT: vmsgtu.vi v10, v8, 5 -; NOVLOPT-NEXT: vsetvli zero, a0, e8, mf2, ta, ma -; NOVLOPT-NEXT: vmand.mm v0, v10, v0 -; NOVLOPT-NEXT: ret -; -; VLOPT-LABEL: vmsgtu_vi: -; VLOPT: # %bb.0: -; VLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; VLOPT-NEXT: vmsgtu.vi v10, v8, 5 -; VLOPT-NEXT: vmand.mm v0, v10, v0 -; VLOPT-NEXT: ret +; CHECK-LABEL: vmsgtu_vi: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; CHECK-NEXT: vmsgtu.vi v10, v8, 5 +; CHECK-NEXT: vmand.mm v0, v10, v0 +; CHECK-NEXT: ret %1 = call <vscale x 4 x i1> @llvm.riscv.vmsgtu.nxv4i32.i32(<vscale x 4 x i32> %a, i32 5, iXLen -1) %2 = call <vscale x 4 x i1> @llvm.riscv.vmand.nxv4i1(<vscale x 4 x i1> %1, <vscale x 4 x i1> %b, iXLen %vl) ret <vscale x 4 x i1> %2 } define <vscale x 4 x i1> @vmsgtu_vx(<vscale x 4 x i32> %a, <vscale x 4 x i1> %b, i32 %c, iXLen %vl) { -; NOVLOPT-LABEL: vmsgtu_vx: -; NOVLOPT: # %bb.0: -; NOVLOPT-NEXT: vsetvli a2, zero, e32, m2, ta, ma -; NOVLOPT-NEXT: vmsgtu.vx v10, v8, a0 -; NOVLOPT-NEXT: vsetvli zero, a1, e8, mf2, ta, ma -; NOVLOPT-NEXT: vmand.mm v0, v10, v0 -; NOVLOPT-NEXT: ret -; -; VLOPT-LABEL: vmsgtu_vx: -; VLOPT: # %bb.0: -; VLOPT-NEXT: vsetvli zero, a1, e32, m2, ta, ma -; VLOPT-NEXT: vmsgtu.vx v10, v8, a0 -; VLOPT-NEXT: vmand.mm v0, v10, v0 -; VLOPT-NEXT: ret +; CHECK-LABEL: vmsgtu_vx: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, ma +; CHECK-NEXT: vmsgtu.vx v10, v8, a0 +; CHECK-NEXT: vmand.mm v0, v10, v0 +; CHECK-NEXT: ret %1 = call <vscale x 4 x i1> @llvm.riscv.vmsgtu.nxv4i32.i32(<vscale x 4 x i32> %a, i32 %c, iXLen -1) %2 = call <vscale x 4 x i1> @llvm.riscv.vmand.nxv4i1(<vscale x 4 x i1> %1, <vscale x 4 x i1> %b, iXLen %vl) ret <vscale x 4 x i1> %2 } define <vscale x 4 x i1> @vmsgt_vi(<vscale x 4 x i32> %a, <vscale x 4 x i1> %b, iXLen %vl) { -; NOVLOPT-LABEL: vmsgt_vi: -; NOVLOPT: # %bb.0: -; NOVLOPT-NEXT: vsetvli a1, zero, e32, m2, ta, ma -; NOVLOPT-NEXT: vmsgt.vi v10, v8, 5 -; NOVLOPT-NEXT: vsetvli zero, a0, e8, mf2, ta, ma -; NOVLOPT-NEXT: vmand.mm v0, v10, v0 -; NOVLOPT-NEXT: ret -; -; VLOPT-LABEL: vmsgt_vi: -; VLOPT: # %bb.0: -; VLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; VLOPT-NEXT: vmsgt.vi v10, v8, 5 -; VLOPT-NEXT: vmand.mm v0, v10, v0 -; VLOPT-NEXT: ret +; CHECK-LABEL: vmsgt_vi: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; CHECK-NEXT: vmsgt.vi v10, v8, 5 +; CHECK-NEXT: vmand.mm v0, v10, v0 +; CHECK-NEXT: ret %1 = call <vscale x 4 x i1> @llvm.riscv.vmsgt.nxv4i32.i32(<vscale x 4 x i32> %a, i32 5, iXLen -1) %2 = call <vscale x 4 x i1> @llvm.riscv.vmand.nxv4i1(<vscale x 4 x i1> %1, <vscale x 4 x i1> %b, iXLen %vl) ret <vscale x 4 x i1> %2 } define <vscale x 4 x i1> @vmsgt_vx(<vscale x 4 x i32> %a, <vscale x 4 x i1> %b, i32 %c, iXLen %vl) { -; NOVLOPT-LABEL: vmsgt_vx: -; NOVLOPT: # %bb.0: -; NOVLOPT-NEXT: vsetvli a2, zero, e32, m2, ta, ma -; NOVLOPT-NEXT: vmsgt.vx v10, v8, a0 -; NOVLOPT-NEXT: vsetvli zero, a1, e8, mf2, ta, ma -; NOVLOPT-NEXT: vmand.mm v0, v10, v0 -; NOVLOPT-NEXT: ret -; -; VLOPT-LABEL: vmsgt_vx: -; VLOPT: # %bb.0: -; VLOPT-NEXT: vsetvli zero, a1, e32, m2, ta, ma -; VLOPT-NEXT: vmsgt.vx v10, v8, a0 -; VLOPT-NEXT: vmand.mm v0, v10, v0 -; VLOPT-NEXT: ret +; CHECK-LABEL: vmsgt_vx: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, ma +; CHECK-NEXT: vmsgt.vx v10, v8, a0 +; CHECK-NEXT: vmand.mm v0, v10, v0 +; CHECK-NEXT: ret %1 = call <vscale x 4 x i1> @llvm.riscv.vmsgt.nxv4i32.i32(<vscale x 4 x i32> %a, i32 %c, iXLen -1) %2 = call <vscale x 4 x i1> @llvm.riscv.vmand.nxv4i1(<vscale x 4 x i1> %1, <vscale x 4 x i1> %b, iXLen %vl) ret <vscale x 4 x i1> %2 } define <vscale x 4 x i32> @vminu_vv(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b, iXLen %vl) { -; NOVLOPT-LABEL: vminu_vv: -; NOVLOPT: # %bb.0: -; NOVLOPT-NEXT: vsetvli a1, zero, e32, m2, ta, ma -; NOVLOPT-NEXT: vminu.vv v8, v8, v10 -; NOVLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; NOVLOPT-NEXT: vadd.vv v8, v8, v10 -; NOVLOPT-NEXT: ret -; -; VLOPT-LABEL: vminu_vv: -; VLOPT: # %bb.0: -; VLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; VLOPT-NEXT: vminu.vv v8, v8, v10 -; VLOPT-NEXT: vadd.vv v8, v8, v10 -; VLOPT-NEXT: ret +; CHECK-LABEL: vminu_vv: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; CHECK-NEXT: vminu.vv v8, v8, v10 +; CHECK-NEXT: vadd.vv v8, v8, v10 +; CHECK-NEXT: ret %1 = call <vscale x 4 x i32> @llvm.riscv.vminu.nxv4i32.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b, iXLen -1) %2 = call <vscale x 4 x i32> @llvm.riscv.vadd.nxv4i32.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i32> %1, <vscale x 4 x i32> %b, iXLen %vl) ret <vscale x 4 x i32> %2 } define <vscale x 4 x i32> @vminu_vx(<vscale x 4 x i32> %a, i32 %b, iXLen %vl) { -; NOVLOPT-LABEL: vminu_vx: -; NOVLOPT: # %bb.0: -; NOVLOPT-NEXT: vsetvli a2, zero, e32, m2, ta, ma -; NOVLOPT-NEXT: vminu.vx v10, v8, a0 -; NOVLOPT-NEXT: vsetvli zero, a1, e32, m2, ta, ma -; NOVLOPT-NEXT: vadd.vv v8, v10, v8 -; NOVLOPT-NEXT: ret -; -; VLOPT-LABEL: vminu_vx: -; VLOPT: # %bb.0: -; VLOPT-NEXT: vsetvli zero, a1, e32, m2, ta, ma -; VLOPT-NEXT: vminu.vx v10, v8, a0 -; VLOPT-NEXT: vadd.vv v8, v10, v8 -; VLOPT-NEXT: ret +; CHECK-LABEL: vminu_vx: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, ma +; CHECK-NEXT: vminu.vx v10, v8, a0 +; CHECK-NEXT: vadd.vv v8, v10, v8 +; CHECK-NEXT: ret %1 = call <vscale x 4 x i32> @llvm.riscv.vminu.nxv4i32.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i32> %a, i32 %b, iXLen -1) %2 = call <vscale x 4 x i32> @llvm.riscv.vadd.nxv4i32.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i32> %1, <vscale x 4 x i32> %a, iXLen %vl) ret <vscale x 4 x i32> %2 } define <vscale x 4 x i32> @vmin_vv(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b, iXLen %vl) { -; NOVLOPT-LABEL: vmin_vv: -; NOVLOPT: # %bb.0: -; NOVLOPT-NEXT: vsetvli a1, zero, e32, m2, ta, ma -; NOVLOPT-NEXT: vmin.vv v8, v8, v10 -; NOVLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; NOVLOPT-NEXT: vadd.vv v8, v8, v10 -; NOVLOPT-NEXT: ret -; -; VLOPT-LABEL: vmin_vv: -; VLOPT: # %bb.0: -; VLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; VLOPT-NEXT: vmin.vv v8, v8, v10 -; VLOPT-NEXT: vadd.vv v8, v8, v10 -; VLOPT-NEXT: ret +; CHECK-LABEL: vmin_vv: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; CHECK-NEXT: vmin.vv v8, v8, v10 +; CHECK-NEXT: vadd.vv v8, v8, v10 +; CHECK-NEXT: ret %1 = call <vscale x 4 x i32> @llvm.riscv.vmin.nxv4i32.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b, iXLen -1) %2 = call <vscale x 4 x i32> @llvm.riscv.vadd.nxv4i32.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i32> %1, <vscale x 4 x i32> %b, iXLen %vl) ret <vscale x 4 x i32> %2 } define <vscale x 4 x i32> @vmin_vx(<vscale x 4 x i32> %a, i32 %b, iXLen %vl) { -; NOVLOPT-LABEL: vmin_vx: -; NOVLOPT: # %bb.0: -; NOVLOPT-NEXT: vsetvli a2, zero, e32, m2, ta, ma -; NOVLOPT-NEXT: vmin.vx v10, v8, a0 -; NOVLOPT-NEXT: vsetvli zero, a1, e32, m2, ta, ma -; NOVLOPT-NEXT: vadd.vv v8, v10, v8 -; NOVLOPT-NEXT: ret -; -; VLOPT-LABEL: vmin_vx: -; VLOPT: # %bb.0: -; VLOPT-NEXT: vsetvli zero, a1, e32, m2, ta, ma -; VLOPT-NEXT: vmin.vx v10, v8, a0 -; VLOPT-NEXT: vadd.vv v8, v10, v8 -; VLOPT-NEXT: ret +; CHECK-LABEL: vmin_vx: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, ma +; CHECK-NEXT: vmin.vx v10, v8, a0 +; CHECK-NEXT: vadd.vv v8, v10, v8 +; CHECK-NEXT: ret %1 = call <vscale x 4 x i32> @llvm.riscv.vmin.nxv4i32.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i32> %a, i32 %b, iXLen -1) %2 = call <vscale x 4 x i32> @llvm.riscv.vadd.nxv4i32.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i32> %1, <vscale x 4 x i32> %a, iXLen %vl) ret <vscale x 4 x i32> %2 } define <vscale x 4 x i32> @vmaxu_vv(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b, iXLen %vl) { -; NOVLOPT-LABEL: vmaxu_vv: -; NOVLOPT: # %bb.0: -; NOVLOPT-NEXT: vsetvli a1, zero, e32, m2, ta, ma -; NOVLOPT-NEXT: vmaxu.vv v8, v8, v10 -; NOVLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; NOVLOPT-NEXT: vadd.vv v8, v8, v10 -; NOVLOPT-NEXT: ret -; -; VLOPT-LABEL: vmaxu_vv: -; VLOPT: # %bb.0: -; VLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; VLOPT-NEXT: vmaxu.vv v8, v8, v10 -; VLOPT-NEXT: vadd.vv v8, v8, v10 -; VLOPT-NEXT: ret +; CHECK-LABEL: vmaxu_vv: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; CHECK-NEXT: vmaxu.vv v8, v8, v10 +; CHECK-NEXT: vadd.vv v8, v8, v10 +; CHECK-NEXT: ret %1 = call <vscale x 4 x i32> @llvm.riscv.vmaxu.nxv4i32.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b, iXLen -1) %2 = call <vscale x 4 x i32> @llvm.riscv.vadd.nxv4i32.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i32> %1, <vscale x 4 x i32> %b, iXLen %vl) ret <vscale x 4 x i32> %2 } define <vscale x 4 x i32> @vmaxu_vx(<vscale x 4 x i32> %a, i32 %b, iXLen %vl) { -; NOVLOPT-LABEL: vmaxu_vx: -; NOVLOPT: # %bb.0: -; NOVLOPT-NEXT: vsetvli a2, zero, e32, m2, ta, ma -; NOVLOPT-NEXT: vmaxu.vx v10, v8, a0 -; NOVLOPT-NEXT: vsetvli zero, a1, e32, m2, ta, ma -; NOVLOPT-NEXT: vadd.vv v8, v10, v8 -; NOVLOPT-NEXT: ret -; -; VLOPT-LABEL: vmaxu_vx: -; VLOPT: # %bb.0: -; VLOPT-NEXT: vsetvli zero, a1, e32, m2, ta, ma -; VLOPT-NEXT: vmaxu.vx v10, v8, a0 -; VLOPT-NEXT: vadd.vv v8, v10, v8 -; VLOPT-NEXT: ret +; CHECK-LABEL: vmaxu_vx: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, ma +; CHECK-NEXT: vmaxu.vx v10, v8, a0 +; CHECK-NEXT: vadd.vv v8, v10, v8 +; CHECK-NEXT: ret %1 = call <vscale x 4 x i32> @llvm.riscv.vmaxu.nxv4i32.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i32> %a, i32 %b, iXLen -1) %2 = call <vscale x 4 x i32> @llvm.riscv.vadd.nxv4i32.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i32> %1, <vscale x 4 x i32> %a, iXLen %vl) ret <vscale x 4 x i32> %2 } define <vscale x 4 x i32> @vmax_vv(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b, iXLen %vl) { -; NOVLOPT-LABEL: vmax_vv: -; NOVLOPT: # %bb.0: -; NOVLOPT-NEXT: vsetvli a1, zero, e32, m2, ta, ma -; NOVLOPT-NEXT: vmax.vv v8, v8, v10 -; NOVLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; NOVLOPT-NEXT: vadd.vv v8, v8, v10 -; NOVLOPT-NEXT: ret -; -; VLOPT-LABEL: vmax_vv: -; VLOPT: # %bb.0: -; VLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; VLOPT-NEXT: vmax.vv v8, v8, v10 -; VLOPT-NEXT: vadd.vv v8, v8, v10 -; VLOPT-NEXT: ret +; CHECK-LABEL: vmax_vv: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; CHECK-NEXT: vmax.vv v8, v8, v10 +; CHECK-NEXT: vadd.vv v8, v8, v10 +; CHECK-NEXT: ret %1 = call <vscale x 4 x i32> @llvm.riscv.vmax.nxv4i32.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b, iXLen -1) %2 = call <vscale x 4 x i32> @llvm.riscv.vadd.nxv4i32.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i32> %1, <vscale x 4 x i32> %b, iXLen %vl) ret <vscale x 4 x i32> %2 } define <vscale x 4 x i32> @vmax_vx(<vscale x 4 x i32> %a, i32 %b, iXLen %vl) { -; NOVLOPT-LABEL: vmax_vx: -; NOVLOPT: # %bb.0: -; NOVLOPT-NEXT: vsetvli a2, zero, e32, m2, ta, ma -; NOVLOPT-NEXT: vmax.vx v10, v8, a0 -; NOVLOPT-NEXT: vsetvli zero, a1, e32, m2, ta, ma -; NOVLOPT-NEXT: vadd.vv v8, v10, v8 -; NOVLOPT-NEXT: ret -; -; VLOPT-LABEL: vmax_vx: -; VLOPT: # %bb.0: -; VLOPT-NEXT: vsetvli zero, a1, e32, m2, ta, ma -; VLOPT-NEXT: vmax.vx v10, v8, a0 -; VLOPT-NEXT: vadd.vv v8, v10, v8 -; VLOPT-NEXT: ret +; CHECK-LABEL: vmax_vx: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, ma +; CHECK-NEXT: vmax.vx v10, v8, a0 +; CHECK-NEXT: vadd.vv v8, v10, v8 +; CHECK-NEXT: ret %1 = call <vscale x 4 x i32> @llvm.riscv.vmax.nxv4i32.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i32> %a, i32 %b, iXLen -1) %2 = call <vscale x 4 x i32> @llvm.riscv.vadd.nxv4i32.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i32> %1, <vscale x 4 x i32> %a, iXLen %vl) ret <vscale x 4 x i32> %2 } define <vscale x 4 x i32> @vmul_vv(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b, iXLen %vl) { -; NOVLOPT-LABEL: vmul_vv: -; NOVLOPT: # %bb.0: -; NOVLOPT-NEXT: vsetvli a1, zero, e32, m2, ta, ma -; NOVLOPT-NEXT: vmul.vv v8, v8, v10 -; NOVLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; NOVLOPT-NEXT: vadd.vv v8, v8, v10 -; NOVLOPT-NEXT: ret -; -; VLOPT-LABEL: vmul_vv: -; VLOPT: # %bb.0: -; VLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; VLOPT-NEXT: vmul.vv v8, v8, v10 -; VLOPT-NEXT: vadd.vv v8, v8, v10 -; VLOPT-NEXT: ret +; CHECK-LABEL: vmul_vv: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; CHECK-NEXT: vmul.vv v8, v8, v10 +; CHECK-NEXT: vadd.vv v8, v8, v10 +; CHECK-NEXT: ret %1 = call <vscale x 4 x i32> @llvm.riscv.vmul.nxv4i32.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b, iXLen -1) %2 = call <vscale x 4 x i32> @llvm.riscv.vadd.nxv4i32.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i32> %1, <vscale x 4 x i32> %b, iXLen %vl) ret <vscale x 4 x i32> %2 } define <vscale x 4 x i32> @vmul_vx(<vscale x 4 x i32> %a, i32 %b, iXLen %vl) { -; NOVLOPT-LABEL: vmul_vx: -; NOVLOPT: # %bb.0: -; NOVLOPT-NEXT: vsetvli a2, zero, e32, m2, ta, ma -; NOVLOPT-NEXT: vmul.vx v10, v8, a0 -; NOVLOPT-NEXT: vsetvli zero, a1, e32, m2, ta, ma -; NOVLOPT-NEXT: vadd.vv v8, v10, v8 -; NOVLOPT-NEXT: ret -; -; VLOPT-LABEL: vmul_vx: -; VLOPT: # %bb.0: -; VLOPT-NEXT: vsetvli zero, a1, e32, m2, ta, ma -; VLOPT-NEXT: vmul.vx v10, v8, a0 -; VLOPT-NEXT: vadd.vv v8, v10, v8 -; VLOPT-NEXT: ret +; CHECK-LABEL: vmul_vx: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, ma +; CHECK-NEXT: vmul.vx v10, v8, a0 +; CHECK-NEXT: vadd.vv v8, v10, v8 +; CHECK-NEXT: ret %1 = call <vscale x 4 x i32> @llvm.riscv.vmul.nxv4i32.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i32> %a, i32 %b, iXLen -1) %2 = call <vscale x 4 x i32> @llvm.riscv.vadd.nxv4i32.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i32> %1, <vscale x 4 x i32> %a, iXLen %vl) ret <vscale x 4 x i32> %2 } define <vscale x 4 x i32> @vmulh_vv(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b, iXLen %vl) { -; NOVLOPT-LABEL: vmulh_vv: -; NOVLOPT: # %bb.0: -; NOVLOPT-NEXT: vsetvli a1, zero, e32, m2, ta, ma -; NOVLOPT-NEXT: vmulh.vv v8, v8, v10 -; NOVLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; NOVLOPT-NEXT: vadd.vv v8, v8, v10 -; NOVLOPT-NEXT: ret -; -; VLOPT-LABEL: vmulh_vv: -; VLOPT: # %bb.0: -; VLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; VLOPT-NEXT: vmulh.vv v8, v8, v10 -; VLOPT-NEXT: vadd.vv v8, v8, v10 -; VLOPT-NEXT: ret +; CHECK-LABEL: vmulh_vv: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; CHECK-NEXT: vmulh.vv v8, v8, v10 +; CHECK-NEXT: vadd.vv v8, v8, v10 +; CHECK-NEXT: ret %1 = call <vscale x 4 x i32> @llvm.riscv.vmulh.nxv4i32.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b, iXLen -1) %2 = call <vscale x 4 x i32> @llvm.riscv.vadd.nxv4i32.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i32> %1, <vscale x 4 x i32> %b, iXLen %vl) ret <vscale x 4 x i32> %2 } define <vscale x 4 x i32> @vmulh_vx(<vscale x 4 x i32> %a, i32 %b, iXLen %vl) { -; NOVLOPT-LABEL: vmulh_vx: -; NOVLOPT: # %bb.0: -; NOVLOPT-NEXT: vsetvli a2, zero, e32, m2, ta, ma -; NOVLOPT-NEXT: vmulh.vx v10, v8, a0 -; NOVLOPT-NEXT: vsetvli zero, a1, e32, m2, ta, ma -; NOVLOPT-NEXT: vadd.vv v8, v10, v8 -; NOVLOPT-NEXT: ret -; -; VLOPT-LABEL: vmulh_vx: -; VLOPT: # %bb.0: -; VLOPT-NEXT: vsetvli zero, a1, e32, m2, ta, ma -; VLOPT-NEXT: vmulh.vx v10, v8, a0 -; VLOPT-NEXT: vadd.vv v8, v10, v8 -; VLOPT-NEXT: ret +; CHECK-LABEL: vmulh_vx: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, ma +; CHECK-NEXT: vmulh.vx v10, v8, a0 +; CHECK-NEXT: vadd.vv v8, v10, v8 +; CHECK-NEXT: ret %1 = call <vscale x 4 x i32> @llvm.riscv.vmulh.nxv4i32.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i32> %a, i32 %b, iXLen -1) %2 = call <vscale x 4 x i32> @llvm.riscv.vadd.nxv4i32.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i32> %1, <vscale x 4 x i32> %a, iXLen %vl) ret <vscale x 4 x i32> %2 } define <vscale x 4 x i32> @vmulhu_vv(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b, iXLen %vl) { -; NOVLOPT-LABEL: vmulhu_vv: -; NOVLOPT: # %bb.0: -; NOVLOPT-NEXT: vsetvli a1, zero, e32, m2, ta, ma -; NOVLOPT-NEXT: vmulhu.vv v8, v8, v10 -; NOVLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; NOVLOPT-NEXT: vadd.vv v8, v8, v10 -; NOVLOPT-NEXT: ret -; -; VLOPT-LABEL: vmulhu_vv: -; VLOPT: # %bb.0: -; VLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; VLOPT-NEXT: vmulhu.vv v8, v8, v10 -; VLOPT-NEXT: vadd.vv v8, v8, v10 -; VLOPT-NEXT: ret +; CHECK-LABEL: vmulhu_vv: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; CHECK-NEXT: vmulhu.vv v8, v8, v10 +; CHECK-NEXT: vadd.vv v8, v8, v10 +; CHECK-NEXT: ret %1 = call <vscale x 4 x i32> @llvm.riscv.vmulhu.nxv4i32.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b, iXLen -1) %2 = call <vscale x 4 x i32> @llvm.riscv.vadd.nxv4i32.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i32> %1, <vscale x 4 x i32> %b, iXLen %vl) ret <vscale x 4 x i32> %2 } define <vscale x 4 x i32> @vmulhu_vx(<vscale x 4 x i32> %a, i32 %b, iXLen %vl) { -; NOVLOPT-LABEL: vmulhu_vx: -; NOVLOPT: # %bb.0: -; NOVLOPT-NEXT: vsetvli a2, zero, e32, m2, ta, ma -; NOVLOPT-NEXT: vmulhu.vx v10, v8, a0 -; NOVLOPT-NEXT: vsetvli zero, a1, e32, m2, ta, ma -; NOVLOPT-NEXT: vadd.vv v8, v10, v8 -; NOVLOPT-NEXT: ret -; -; VLOPT-LABEL: vmulhu_vx: -; VLOPT: # %bb.0: -; VLOPT-NEXT: vsetvli zero, a1, e32, m2, ta, ma -; VLOPT-NEXT: vmulhu.vx v10, v8, a0 -; VLOPT-NEXT: vadd.vv v8, v10, v8 -; VLOPT-NEXT: ret +; CHECK-LABEL: vmulhu_vx: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, ma +; CHECK-NEXT: vmulhu.vx v10, v8, a0 +; CHECK-NEXT: vadd.vv v8, v10, v8 +; CHECK-NEXT: ret %1 = call <vscale x 4 x i32> @llvm.riscv.vmulhu.nxv4i32.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i32> %a, i32 %b, iXLen -1) %2 = call <vscale x 4 x i32> @llvm.riscv.vadd.nxv4i32.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i32> %1, <vscale x 4 x i32> %a, iXLen %vl) ret <vscale x 4 x i32> %2 } define <vscale x 4 x i32> @vmulhsu_vv(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b, iXLen %vl) { -; NOVLOPT-LABEL: vmulhsu_vv: -; NOVLOPT: # %bb.0: -; NOVLOPT-NEXT: vsetvli a1, zero, e32, m2, ta, ma -; NOVLOPT-NEXT: vmulhsu.vv v8, v8, v10 -; NOVLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; NOVLOPT-NEXT: vadd.vv v8, v8, v10 -; NOVLOPT-NEXT: ret -; -; VLOPT-LABEL: vmulhsu_vv: -; VLOPT: # %bb.0: -; VLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; VLOPT-NEXT: vmulhsu.vv v8, v8, v10 -; VLOPT-NEXT: vadd.vv v8, v8, v10 -; VLOPT-NEXT: ret +; CHECK-LABEL: vmulhsu_vv: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; CHECK-NEXT: vmulhsu.vv v8, v8, v10 +; CHECK-NEXT: vadd.vv v8, v8, v10 +; CHECK-NEXT: ret %1 = call <vscale x 4 x i32> @llvm.riscv.vmulhsu.nxv4i32.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b, iXLen -1) %2 = call <vscale x 4 x i32> @llvm.riscv.vadd.nxv4i32.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i32> %1, <vscale x 4 x i32> %b, iXLen %vl) ret <vscale x 4 x i32> %2 } define <vscale x 4 x i32> @vmulhsu_vx(<vscale x 4 x i32> %a, i32 %b, iXLen %vl) { -; NOVLOPT-LABEL: vmulhsu_vx: -; NOVLOPT: # %bb.0: -; NOVLOPT-NEXT: vsetvli a2, zero, e32, m2, ta, ma -; NOVLOPT-NEXT: vmulhsu.vx v10, v8, a0 -; NOVLOPT-NEXT: vsetvli zero, a1, e32, m2, ta, ma -; NOVLOPT-NEXT: vadd.vv v8, v10, v8 -; NOVLOPT-NEXT: ret -; -; VLOPT-LABEL: vmulhsu_vx: -; VLOPT: # %bb.0: -; VLOPT-NEXT: vsetvli zero, a1, e32, m2, ta, ma -; VLOPT-NEXT: vmulhsu.vx v10, v8, a0 -; VLOPT-NEXT: vadd.vv v8, v10, v8 -; VLOPT-NEXT: ret +; CHECK-LABEL: vmulhsu_vx: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, ma +; CHECK-NEXT: vmulhsu.vx v10, v8, a0 +; CHECK-NEXT: vadd.vv v8, v10, v8 +; CHECK-NEXT: ret %1 = call <vscale x 4 x i32> @llvm.riscv.vmulhsu.nxv4i32.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i32> %a, i32 %b, iXLen -1) %2 = call <vscale x 4 x i32> @llvm.riscv.vadd.nxv4i32.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i32> %1, <vscale x 4 x i32> %a, iXLen %vl) ret <vscale x 4 x i32> %2 } define <vscale x 4 x i32> @vdivu_vv(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b, iXLen %vl) { -; NOVLOPT-LABEL: vdivu_vv: -; NOVLOPT: # %bb.0: -; NOVLOPT-NEXT: vsetvli a1, zero, e32, m2, ta, ma -; NOVLOPT-NEXT: vdivu.vv v8, v8, v10 -; NOVLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; NOVLOPT-NEXT: vadd.vv v8, v8, v10 -; NOVLOPT-NEXT: ret -; -; VLOPT-LABEL: vdivu_vv: -; VLOPT: # %bb.0: -; VLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; VLOPT-NEXT: vdivu.vv v8, v8, v10 -; VLOPT-NEXT: vadd.vv v8, v8, v10 -; VLOPT-NEXT: ret +; CHECK-LABEL: vdivu_vv: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; CHECK-NEXT: vdivu.vv v8, v8, v10 +; CHECK-NEXT: vadd.vv v8, v8, v10 +; CHECK-NEXT: ret %1 = call <vscale x 4 x i32> @llvm.riscv.vdivu.nxv4i32.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b, iXLen -1) %2 = call <vscale x 4 x i32> @llvm.riscv.vadd.nxv4i32.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i32> %1, <vscale x 4 x i32> %b, iXLen %vl) ret <vscale x 4 x i32> %2 } define <vscale x 4 x i32> @vdivu_vx(<vscale x 4 x i32> %a, i32 %b, iXLen %vl) { -; NOVLOPT-LABEL: vdivu_vx: -; NOVLOPT: # %bb.0: -; NOVLOPT-NEXT: vsetvli a2, zero, e32, m2, ta, ma -; NOVLOPT-NEXT: vdivu.vx v10, v8, a0 -; NOVLOPT-NEXT: vsetvli zero, a1, e32, m2, ta, ma -; NOVLOPT-NEXT: vadd.vv v8, v10, v8 -; NOVLOPT-NEXT: ret -; -; VLOPT-LABEL: vdivu_vx: -; VLOPT: # %bb.0: -; VLOPT-NEXT: vsetvli zero, a1, e32, m2, ta, ma -; VLOPT-NEXT: vdivu.vx v10, v8, a0 -; VLOPT-NEXT: vadd.vv v8, v10, v8 -; VLOPT-NEXT: ret +; CHECK-LABEL: vdivu_vx: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, ma +; CHECK-NEXT: vdivu.vx v10, v8, a0 +; CHECK-NEXT: vadd.vv v8, v10, v8 +; CHECK-NEXT: ret %1 = call <vscale x 4 x i32> @llvm.riscv.vdivu.nxv4i32.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i32> %a, i32 %b, iXLen -1) %2 = call <vscale x 4 x i32> @llvm.riscv.vadd.nxv4i32.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i32> %1, <vscale x 4 x i32> %a, iXLen %vl) ret <vscale x 4 x i32> %2 } define <vscale x 4 x i32> @vdiv_vv(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b, iXLen %vl) { -; NOVLOPT-LABEL: vdiv_vv: -; NOVLOPT: # %bb.0: -; NOVLOPT-NEXT: vsetvli a1, zero, e32, m2, ta, ma -; NOVLOPT-NEXT: vdiv.vv v8, v8, v10 -; NOVLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; NOVLOPT-NEXT: vadd.vv v8, v8, v10 -; NOVLOPT-NEXT: ret -; -; VLOPT-LABEL: vdiv_vv: -; VLOPT: # %bb.0: -; VLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; VLOPT-NEXT: vdiv.vv v8, v8, v10 -; VLOPT-NEXT: vadd.vv v8, v8, v10 -; VLOPT-NEXT: ret +; CHECK-LABEL: vdiv_vv: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; CHECK-NEXT: vdiv.vv v8, v8, v10 +; CHECK-NEXT: vadd.vv v8, v8, v10 +; CHECK-NEXT: ret %1 = call <vscale x 4 x i32> @llvm.riscv.vdiv.nxv4i32.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b, iXLen -1) %2 = call <vscale x 4 x i32> @llvm.riscv.vadd.nxv4i32.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i32> %1, <vscale x 4 x i32> %b, iXLen %vl) ret <vscale x 4 x i32> %2 } define <vscale x 4 x i32> @vdiv_vx(<vscale x 4 x i32> %a, i32 %b, iXLen %vl) { -; NOVLOPT-LABEL: vdiv_vx: -; NOVLOPT: # %bb.0: -; NOVLOPT-NEXT: vsetvli a2, zero, e32, m2, ta, ma -; NOVLOPT-NEXT: vdiv.vx v10, v8, a0 -; NOVLOPT-NEXT: vsetvli zero, a1, e32, m2, ta, ma -; NOVLOPT-NEXT: vadd.vv v8, v10, v8 -; NOVLOPT-NEXT: ret -; -; VLOPT-LABEL: vdiv_vx: -; VLOPT: # %bb.0: -; VLOPT-NEXT: vsetvli zero, a1, e32, m2, ta, ma -; VLOPT-NEXT: vdiv.vx v10, v8, a0 -; VLOPT-NEXT: vadd.vv v8, v10, v8 -; VLOPT-NEXT: ret +; CHECK-LABEL: vdiv_vx: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, ma +; CHECK-NEXT: vdiv.vx v10, v8, a0 +; CHECK-NEXT: vadd.vv v8, v10, v8 +; CHECK-NEXT: ret %1 = call <vscale x 4 x i32> @llvm.riscv.vdiv.nxv4i32.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i32> %a, i32 %b, iXLen -1) %2 = call <vscale x 4 x i32> @llvm.riscv.vadd.nxv4i32.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i32> %1, <vscale x 4 x i32> %a, iXLen %vl) ret <vscale x 4 x i32> %2 } define <vscale x 4 x i32> @vremu_vv(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b, iXLen %vl) { -; NOVLOPT-LABEL: vremu_vv: -; NOVLOPT: # %bb.0: -; NOVLOPT-NEXT: vsetvli a1, zero, e32, m2, ta, ma -; NOVLOPT-NEXT: vremu.vv v8, v8, v10 -; NOVLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; NOVLOPT-NEXT: vadd.vv v8, v8, v10 -; NOVLOPT-NEXT: ret -; -; VLOPT-LABEL: vremu_vv: -; VLOPT: # %bb.0: -; VLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; VLOPT-NEXT: vremu.vv v8, v8, v10 -; VLOPT-NEXT: vadd.vv v8, v8, v10 -; VLOPT-NEXT: ret +; CHECK-LABEL: vremu_vv: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; CHECK-NEXT: vremu.vv v8, v8, v10 +; CHECK-NEXT: vadd.vv v8, v8, v10 +; CHECK-NEXT: ret %1 = call <vscale x 4 x i32> @llvm.riscv.vremu.nxv4i32.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b, iXLen -1) %2 = call <vscale x 4 x i32> @llvm.riscv.vadd.nxv4i32.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i32> %1, <vscale x 4 x i32> %b, iXLen %vl) ret <vscale x 4 x i32> %2 } define <vscale x 4 x i32> @vremu_vx(<vscale x 4 x i32> %a, i32 %b, iXLen %vl) { -; NOVLOPT-LABEL: vremu_vx: -; NOVLOPT: # %bb.0: -; NOVLOPT-NEXT: vsetvli a2, zero, e32, m2, ta, ma -; NOVLOPT-NEXT: vremu.vx v10, v8, a0 -; NOVLOPT-NEXT: vsetvli zero, a1, e32, m2, ta, ma -; NOVLOPT-NEXT: vadd.vv v8, v10, v8 -; NOVLOPT-NEXT: ret -; -; VLOPT-LABEL: vremu_vx: -; VLOPT: # %bb.0: -; VLOPT-NEXT: vsetvli zero, a1, e32, m2, ta, ma -; VLOPT-NEXT: vremu.vx v10, v8, a0 -; VLOPT-NEXT: vadd.vv v8, v10, v8 -; VLOPT-NEXT: ret +; CHECK-LABEL: vremu_vx: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, ma +; CHECK-NEXT: vremu.vx v10, v8, a0 +; CHECK-NEXT: vadd.vv v8, v10, v8 +; CHECK-NEXT: ret %1 = call <vscale x 4 x i32> @llvm.riscv.vremu.nxv4i32.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i32> %a, i32 %b, iXLen -1) %2 = call <vscale x 4 x i32> @llvm.riscv.vadd.nxv4i32.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i32> %1, <vscale x 4 x i32> %a, iXLen %vl) ret <vscale x 4 x i32> %2 } define <vscale x 4 x i32> @vrem_vv(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b, iXLen %vl) { -; NOVLOPT-LABEL: vrem_vv: -; NOVLOPT: # %bb.0: -; NOVLOPT-NEXT: vsetvli a1, zero, e32, m2, ta, ma -; NOVLOPT-NEXT: vrem.vv v8, v8, v10 -; NOVLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; NOVLOPT-NEXT: vadd.vv v8, v8, v10 -; NOVLOPT-NEXT: ret -; -; VLOPT-LABEL: vrem_vv: -; VLOPT: # %bb.0: -; VLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; VLOPT-NEXT: vrem.vv v8, v8, v10 -; VLOPT-NEXT: vadd.vv v8, v8, v10 -; VLOPT-NEXT: ret +; CHECK-LABEL: vrem_vv: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; CHECK-NEXT: vrem.vv v8, v8, v10 +; CHECK-NEXT: vadd.vv v8, v8, v10 +; CHECK-NEXT: ret %1 = call <vscale x 4 x i32> @llvm.riscv.vrem.nxv4i32.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b, iXLen -1) %2 = call <vscale x 4 x i32> @llvm.riscv.vadd.nxv4i32.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i32> %1, <vscale x 4 x i32> %b, iXLen %vl) ret <vscale x 4 x i32> %2 } define <vscale x 4 x i32> @vrem_vx(<vscale x 4 x i32> %a, i32 %b, iXLen %vl) { -; NOVLOPT-LABEL: vrem_vx: -; NOVLOPT: # %bb.0: -; NOVLOPT-NEXT: vsetvli a2, zero, e32, m2, ta, ma -; NOVLOPT-NEXT: vrem.vx v10, v8, a0 -; NOVLOPT-NEXT: vsetvli zero, a1, e32, m2, ta, ma -; NOVLOPT-NEXT: vadd.vv v8, v10, v8 -; NOVLOPT-NEXT: ret -; -; VLOPT-LABEL: vrem_vx: -; VLOPT: # %bb.0: -; VLOPT-NEXT: vsetvli zero, a1, e32, m2, ta, ma -; VLOPT-NEXT: vrem.vx v10, v8, a0 -; VLOPT-NEXT: vadd.vv v8, v10, v8 -; VLOPT-NEXT: ret +; CHECK-LABEL: vrem_vx: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, ma +; CHECK-NEXT: vrem.vx v10, v8, a0 +; CHECK-NEXT: vadd.vv v8, v10, v8 +; CHECK-NEXT: ret %1 = call <vscale x 4 x i32> @llvm.riscv.vrem.nxv4i32.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i32> %a, i32 %b, iXLen -1) %2 = call <vscale x 4 x i32> @llvm.riscv.vadd.nxv4i32.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i32> %1, <vscale x 4 x i32> %a, iXLen %vl) ret <vscale x 4 x i32> %2 } define <vscale x 4 x i64> @vwmul_vv(<vscale x 4 x i16> %a, <vscale x 4 x i16> %b, iXLen %vl) { -; NOVLOPT-LABEL: vwmul_vv: -; NOVLOPT: # %bb.0: -; NOVLOPT-NEXT: vsetvli a1, zero, e16, m1, ta, ma -; NOVLOPT-NEXT: vwmul.vv v12, v8, v9 -; NOVLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; NOVLOPT-NEXT: vwmul.vv v8, v12, v12 -; NOVLOPT-NEXT: ret -; -; VLOPT-LABEL: vwmul_vv: -; VLOPT: # %bb.0: -; VLOPT-NEXT: vsetvli zero, a0, e16, m1, ta, ma -; VLOPT-NEXT: vwmul.vv v12, v8, v9 -; VLOPT-NEXT: vsetvli zero, zero, e32, m2, ta, ma -; VLOPT-NEXT: vwmul.vv v8, v12, v12 -; VLOPT-NEXT: ret +; CHECK-LABEL: vwmul_vv: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma +; CHECK-NEXT: vwmul.vv v12, v8, v9 +; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; CHECK-NEXT: vwmul.vv v8, v12, v12 +; CHECK-NEXT: ret %1 = call <vscale x 4 x i32> @llvm.riscv.vwmul.nxv4i64.nxv4i32.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i16> %a, <vscale x 4 x i16> %b, iXLen -1) %2 = call <vscale x 4 x i64> @llvm.riscv.vwmul.nxv4i64.nxv4i32.nxv4i32(<vscale x 4 x i64> poison, <vscale x 4 x i32> %1, <vscale x 4 x i32> %1, iXLen %vl) ret <vscale x 4 x i64> %2 } define <vscale x 4 x i64> @vwmul_vx(<vscale x 4 x i16> %a, i16 %b, i32 %c, iXLen %vl) { -; NOVLOPT-LABEL: vwmul_vx: -; NOVLOPT: # %bb.0: -; NOVLOPT-NEXT: vsetvli a3, zero, e16, m1, ta, ma -; NOVLOPT-NEXT: vwmul.vx v12, v8, a0 -; NOVLOPT-NEXT: vsetvli zero, a2, e32, m2, ta, ma -; NOVLOPT-NEXT: vwmul.vx v8, v12, a1 -; NOVLOPT-NEXT: ret -; -; VLOPT-LABEL: vwmul_vx: -; VLOPT: # %bb.0: -; VLOPT-NEXT: vsetvli zero, a2, e16, m1, ta, ma -; VLOPT-NEXT: vwmul.vx v12, v8, a0 -; VLOPT-NEXT: vsetvli zero, zero, e32, m2, ta, ma -; VLOPT-NEXT: vwmul.vx v8, v12, a1 -; VLOPT-NEXT: ret +; CHECK-LABEL: vwmul_vx: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a2, e16, m1, ta, ma +; CHECK-NEXT: vwmul.vx v12, v8, a0 +; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; CHECK-NEXT: vwmul.vx v8, v12, a1 +; CHECK-NEXT: ret %1 = call <vscale x 4 x i32> @llvm.riscv.vwmul.nxv4i32.nxv4i16.i16(<vscale x 4 x i32> poison, <vscale x 4 x i16> %a, i16 %b, iXLen -1) %2 = call <vscale x 4 x i64> @llvm.riscv.vwmul.nxv4i64.nxv4i64.i32(<vscale x 4 x i64> poison, <vscale x 4 x i32> %1, i32 %c, iXLen %vl) ret <vscale x 4 x i64> %2 } define <vscale x 4 x i64> @vwmulsu_vv(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b, iXLen %vl) { -; NOVLOPT-LABEL: vwmulsu_vv: -; NOVLOPT: # %bb.0: -; NOVLOPT-NEXT: vsetvli a1, zero, e32, m2, ta, ma -; NOVLOPT-NEXT: vwmulsu.vv v12, v8, v10 -; NOVLOPT-NEXT: vsetvli zero, a0, e64, m4, ta, ma -; NOVLOPT-NEXT: vadd.vv v8, v12, v12 -; NOVLOPT-NEXT: ret -; -; VLOPT-LABEL: vwmulsu_vv: -; VLOPT: # %bb.0: -; VLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; VLOPT-NEXT: vwmulsu.vv v12, v8, v10 -; VLOPT-NEXT: vsetvli zero, zero, e64, m4, ta, ma -; VLOPT-NEXT: vadd.vv v8, v12, v12 -; VLOPT-NEXT: ret +; CHECK-LABEL: vwmulsu_vv: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; CHECK-NEXT: vwmulsu.vv v12, v8, v10 +; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, ma +; CHECK-NEXT: vadd.vv v8, v12, v12 +; CHECK-NEXT: ret %1 = call <vscale x 4 x i64> @llvm.riscv.vwmulsu.nxv4i64.nxv4i32.nxv4i32(<vscale x 4 x i64> poison, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b, iXLen -1) %2 = call <vscale x 4 x i64> @llvm.riscv.vadd.nxv4i64.nxv4i64(<vscale x 4 x i64> poison, <vscale x 4 x i64> %1, <vscale x 4 x i64> %1, iXLen %vl) ret <vscale x 4 x i64> %2 } define <vscale x 4 x i64> @vwmulsu_vx(<vscale x 4 x i32> %a, i32 %b, iXLen %vl) { -; NOVLOPT-LABEL: vwmulsu_vx: -; NOVLOPT: # %bb.0: -; NOVLOPT-NEXT: vsetvli a2, zero, e32, m2, ta, ma -; NOVLOPT-NEXT: vwmulsu.vx v12, v8, a0 -; NOVLOPT-NEXT: vsetvli zero, a1, e64, m4, ta, ma -; NOVLOPT-NEXT: vadd.vv v8, v12, v12 -; NOVLOPT-NEXT: ret -; -; VLOPT-LABEL: vwmulsu_vx: -; VLOPT: # %bb.0: -; VLOPT-NEXT: vsetvli zero, a1, e32, m2, ta, ma -; VLOPT-NEXT: vwmulsu.vx v12, v8, a0 -; VLOPT-NEXT: vsetvli zero, zero, e64, m4, ta, ma -; VLOPT-NEXT: vadd.vv v8, v12, v12 -; VLOPT-NEXT: ret +; CHECK-LABEL: vwmulsu_vx: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, ma +; CHECK-NEXT: vwmulsu.vx v12, v8, a0 +; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, ma +; CHECK-NEXT: vadd.vv v8, v12, v12 +; CHECK-NEXT: ret %1 = call <vscale x 4 x i64> @llvm.riscv.vwmulsu.nxv4i64.nxv4i32.i32(<vscale x 4 x i64> poison, <vscale x 4 x i32> %a, i32 %b, iXLen -1) %2 = call <vscale x 4 x i64> @llvm.riscv.vadd.nxv4i64.nxv4i64(<vscale x 4 x i64> poison, <vscale x 4 x i64> %1, <vscale x 4 x i64> %1, iXLen %vl) ret <vscale x 4 x i64> %2 } define <vscale x 4 x i64> @vwmulu_vv(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b, iXLen %vl) { -; NOVLOPT-LABEL: vwmulu_vv: -; NOVLOPT: # %bb.0: -; NOVLOPT-NEXT: vsetvli a1, zero, e32, m2, ta, ma -; NOVLOPT-NEXT: vwmulu.vv v12, v8, v10 -; NOVLOPT-NEXT: vsetvli zero, a0, e64, m4, ta, ma -; NOVLOPT-NEXT: vadd.vv v8, v12, v12 -; NOVLOPT-NEXT: ret -; -; VLOPT-LABEL: vwmulu_vv: -; VLOPT: # %bb.0: -; VLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; VLOPT-NEXT: vwmulu.vv v12, v8, v10 -; VLOPT-NEXT: vsetvli zero, zero, e64, m4, ta, ma -; VLOPT-NEXT: vadd.vv v8, v12, v12 -; VLOPT-NEXT: ret +; CHECK-LABEL: vwmulu_vv: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; CHECK-NEXT: vwmulu.vv v12, v8, v10 +; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, ma +; CHECK-NEXT: vadd.vv v8, v12, v12 +; CHECK-NEXT: ret %1 = call <vscale x 4 x i64> @llvm.riscv.vwmulu.nxv4i64.nxv4i32.nxv4i32(<vscale x 4 x i64> poison, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b, iXLen -1) %2 = call <vscale x 4 x i64> @llvm.riscv.vadd.nxv4i64.nxv4i64(<vscale x 4 x i64> poison, <vscale x 4 x i64> %1, <vscale x 4 x i64> %1, iXLen %vl) ret <vscale x 4 x i64> %2 } define <vscale x 4 x i64> @vwmulu_vx(<vscale x 4 x i32> %a, i32 %b, iXLen %vl) { -; NOVLOPT-LABEL: vwmulu_vx: -; NOVLOPT: # %bb.0: -; NOVLOPT-NEXT: vsetvli a2, zero, e32, m2, ta, ma -; NOVLOPT-NEXT: vwmulu.vx v12, v8, a0 -; NOVLOPT-NEXT: vsetvli zero, a1, e64, m4, ta, ma -; NOVLOPT-NEXT: vadd.vv v8, v12, v12 -; NOVLOPT-NEXT: ret -; -; VLOPT-LABEL: vwmulu_vx: -; VLOPT: # %bb.0: -; VLOPT-NEXT: vsetvli zero, a1, e32, m2, ta, ma -; VLOPT-NEXT: vwmulu.vx v12, v8, a0 -; VLOPT-NEXT: vsetvli zero, zero, e64, m4, ta, ma -; VLOPT-NEXT: vadd.vv v8, v12, v12 -; VLOPT-NEXT: ret +; CHECK-LABEL: vwmulu_vx: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, ma +; CHECK-NEXT: vwmulu.vx v12, v8, a0 +; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, ma +; CHECK-NEXT: vadd.vv v8, v12, v12 +; CHECK-NEXT: ret %1 = call <vscale x 4 x i64> @llvm.riscv.vwmulu.nxv4i64.nxv4i32.i32(<vscale x 4 x i64> poison, <vscale x 4 x i32> %a, i32 %b, iXLen -1) %2 = call <vscale x 4 x i64> @llvm.riscv.vadd.nxv4i64.nxv4i64(<vscale x 4 x i64> poison, <vscale x 4 x i64> %1, <vscale x 4 x i64> %1, iXLen %vl) ret <vscale x 4 x i64> %2 } define <vscale x 4 x i32> @vwmacc_vv(<vscale x 4 x i32> %a, <vscale x 4 x i16> %b, <vscale x 4 x i16> %c, <vscale x 4 x i32> %d, iXLen %vl) { -; NOVLOPT-LABEL: vwmacc_vv: -; NOVLOPT: # %bb.0: -; NOVLOPT-NEXT: vsetvli a1, zero, e16, m1, tu, ma -; NOVLOPT-NEXT: vwmacc.vv v8, v10, v11 -; NOVLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; NOVLOPT-NEXT: vadd.vv v8, v8, v12 -; NOVLOPT-NEXT: ret -; -; VLOPT-LABEL: vwmacc_vv: -; VLOPT: # %bb.0: -; VLOPT-NEXT: vsetvli zero, a0, e16, m1, tu, ma -; VLOPT-NEXT: vwmacc.vv v8, v10, v11 -; VLOPT-NEXT: vsetvli zero, zero, e32, m2, ta, ma -; VLOPT-NEXT: vadd.vv v8, v8, v12 -; VLOPT-NEXT: ret +; CHECK-LABEL: vwmacc_vv: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m1, tu, ma +; CHECK-NEXT: vwmacc.vv v8, v10, v11 +; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; CHECK-NEXT: vadd.vv v8, v8, v12 +; CHECK-NEXT: ret %1 = call <vscale x 4 x i32> @llvm.riscv.vwmacc.nxv4i32.nxv4i16(<vscale x 4 x i32> %a, <vscale x 4 x i16> %b, <vscale x 4 x i16> %c, iXLen -1, iXLen 0) %2 = call <vscale x 4 x i32> @llvm.riscv.vadd.nxv4i32.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i32> %1, <vscale x 4 x i32> %d, iXLen %vl) ret <vscale x 4 x i32> %2 } define <vscale x 4 x i32> @vmacc_vv(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b, iXLen %vl) { -; NOVLOPT-LABEL: vmacc_vv: -; NOVLOPT: # %bb.0: -; NOVLOPT-NEXT: vsetvli a1, zero, e32, m2, tu, ma -; NOVLOPT-NEXT: vmacc.vv v8, v8, v10 -; NOVLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; NOVLOPT-NEXT: vadd.vv v8, v8, v10 -; NOVLOPT-NEXT: ret -; -; VLOPT-LABEL: vmacc_vv: -; VLOPT: # %bb.0: -; VLOPT-NEXT: vsetvli zero, a0, e32, m2, tu, ma -; VLOPT-NEXT: vmacc.vv v8, v8, v10 -; VLOPT-NEXT: vsetvli zero, zero, e32, m2, ta, ma -; VLOPT-NEXT: vadd.vv v8, v8, v10 -; VLOPT-NEXT: ret +; CHECK-LABEL: vmacc_vv: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m2, tu, ma +; CHECK-NEXT: vmacc.vv v8, v8, v10 +; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; CHECK-NEXT: vadd.vv v8, v8, v10 +; CHECK-NEXT: ret %1 = call <vscale x 4 x i32> @llvm.riscv.vmacc.nxv4i32.nxv4i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b, iXLen -1, iXLen 0) %2 = call <vscale x 4 x i32> @llvm.riscv.vadd.nxv4i32.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i32> %1, <vscale x 4 x i32> %b, iXLen %vl) ret <vscale x 4 x i32> %2 } define <vscale x 4 x i32> @vmacc_vx(<vscale x 4 x i32> %a, i32 %b, iXLen %vl) { -; NOVLOPT-LABEL: vmacc_vx: -; NOVLOPT: # %bb.0: -; NOVLOPT-NEXT: vsetvli a2, zero, e32, m2, tu, ma -; NOVLOPT-NEXT: vmv2r.v v10, v8 -; NOVLOPT-NEXT: vmacc.vx v10, a0, v8 -; NOVLOPT-NEXT: vsetvli zero, a1, e32, m2, ta, ma -; NOVLOPT-NEXT: vadd.vv v8, v10, v8 -; NOVLOPT-NEXT: ret -; -; VLOPT-LABEL: vmacc_vx: -; VLOPT: # %bb.0: -; VLOPT-NEXT: vsetvli zero, a1, e32, m2, tu, ma -; VLOPT-NEXT: vmv2r.v v10, v8 -; VLOPT-NEXT: vmacc.vx v10, a0, v8 -; VLOPT-NEXT: vsetvli zero, zero, e32, m2, ta, ma -; VLOPT-NEXT: vadd.vv v8, v10, v8 -; VLOPT-NEXT: ret +; CHECK-LABEL: vmacc_vx: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e32, m2, tu, ma +; CHECK-NEXT: vmv2r.v v10, v8 +; CHECK-NEXT: vmacc.vx v10, a0, v8 +; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; CHECK-NEXT: vadd.vv v8, v10, v8 +; CHECK-NEXT: ret %1 = call <vscale x 4 x i32> @llvm.riscv.vmacc.nxv4i32.i32(<vscale x 4 x i32> %a, i32 %b, <vscale x 4 x i32> %a, iXLen -1, iXLen 0) %2 = call <vscale x 4 x i32> @llvm.riscv.vadd.nxv4i32.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i32> %1, <vscale x 4 x i32> %a, iXLen %vl) ret <vscale x 4 x i32> %2 } define <vscale x 4 x i32> @vmadd_vv(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b, iXLen %vl) { -; NOVLOPT-LABEL: vmadd_vv: -; NOVLOPT: # %bb.0: -; NOVLOPT-NEXT: vsetvli a1, zero, e32, m2, tu, ma -; NOVLOPT-NEXT: vmadd.vv v8, v8, v10 -; NOVLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; NOVLOPT-NEXT: vadd.vv v8, v8, v10 -; NOVLOPT-NEXT: ret -; -; VLOPT-LABEL: vmadd_vv: -; VLOPT: # %bb.0: -; VLOPT-NEXT: vsetvli zero, a0, e32, m2, tu, ma -; VLOPT-NEXT: vmadd.vv v8, v8, v10 -; VLOPT-NEXT: vsetvli zero, zero, e32, m2, ta, ma -; VLOPT-NEXT: vadd.vv v8, v8, v10 -; VLOPT-NEXT: ret +; CHECK-LABEL: vmadd_vv: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m2, tu, ma +; CHECK-NEXT: vmadd.vv v8, v8, v10 +; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; CHECK-NEXT: vadd.vv v8, v8, v10 +; CHECK-NEXT: ret %1 = call <vscale x 4 x i32> @llvm.riscv.vmadd.nxv4i32.nxv4i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b, iXLen -1, iXLen 0) %2 = call <vscale x 4 x i32> @llvm.riscv.vadd.nxv4i32.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i32> %1, <vscale x 4 x i32> %b, iXLen %vl) ret <vscale x 4 x i32> %2 } define <vscale x 4 x i32> @vmadd_vx(<vscale x 4 x i32> %a, i32 %b, iXLen %vl) { -; NOVLOPT-LABEL: vmadd_vx: -; NOVLOPT: # %bb.0: -; NOVLOPT-NEXT: vsetvli a2, zero, e32, m2, tu, ma -; NOVLOPT-NEXT: vmv2r.v v10, v8 -; NOVLOPT-NEXT: vmadd.vx v10, a0, v8 -; NOVLOPT-NEXT: vsetvli zero, a1, e32, m2, ta, ma -; NOVLOPT-NEXT: vadd.vv v8, v10, v8 -; NOVLOPT-NEXT: ret -; -; VLOPT-LABEL: vmadd_vx: -; VLOPT: # %bb.0: -; VLOPT-NEXT: vsetvli zero, a1, e32, m2, tu, ma -; VLOPT-NEXT: vmv2r.v v10, v8 -; VLOPT-NEXT: vmadd.vx v10, a0, v8 -; VLOPT-NEXT: vsetvli zero, zero, e32, m2, ta, ma -; VLOPT-NEXT: vadd.vv v8, v10, v8 -; VLOPT-NEXT: ret +; CHECK-LABEL: vmadd_vx: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e32, m2, tu, ma +; CHECK-NEXT: vmv2r.v v10, v8 +; CHECK-NEXT: vmadd.vx v10, a0, v8 +; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; CHECK-NEXT: vadd.vv v8, v10, v8 +; CHECK-NEXT: ret %1 = call <vscale x 4 x i32> @llvm.riscv.vmadd.nxv4i32.nxv4i32(<vscale x 4 x i32> %a, i32 %b, <vscale x 4 x i32> %a, iXLen -1, iXLen 0) %2 = call <vscale x 4 x i32> @llvm.riscv.vadd.nxv4i32.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i32> %1, <vscale x 4 x i32> %a, iXLen %vl) ret <vscale x 4 x i32> %2 } define <vscale x 4 x i32> @vnmsac_vv(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b, iXLen %vl) { -; NOVLOPT-LABEL: vnmsac_vv: -; NOVLOPT: # %bb.0: -; NOVLOPT-NEXT: vsetvli a1, zero, e32, m2, tu, ma -; NOVLOPT-NEXT: vnmsac.vv v8, v8, v10 -; NOVLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; NOVLOPT-NEXT: vadd.vv v8, v8, v10 -; NOVLOPT-NEXT: ret -; -; VLOPT-LABEL: vnmsac_vv: -; VLOPT: # %bb.0: -; VLOPT-NEXT: vsetvli zero, a0, e32, m2, tu, ma -; VLOPT-NEXT: vnmsac.vv v8, v8, v10 -; VLOPT-NEXT: vsetvli zero, zero, e32, m2, ta, ma -; VLOPT-NEXT: vadd.vv v8, v8, v10 -; VLOPT-NEXT: ret +; CHECK-LABEL: vnmsac_vv: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m2, tu, ma +; CHECK-NEXT: vnmsac.vv v8, v8, v10 +; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; CHECK-NEXT: vadd.vv v8, v8, v10 +; CHECK-NEXT: ret %1 = call <vscale x 4 x i32> @llvm.riscv.vnmsac.nxv4i32.nxv4i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b, iXLen -1, iXLen 0) %2 = call <vscale x 4 x i32> @llvm.riscv.vadd.nxv4i32.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i32> %1, <vscale x 4 x i32> %b, iXLen %vl) ret <vscale x 4 x i32> %2 } define <vscale x 4 x i32> @vnmsac_vx(<vscale x 4 x i32> %a, i32 %b, iXLen %vl) { -; NOVLOPT-LABEL: vnmsac_vx: -; NOVLOPT: # %bb.0: -; NOVLOPT-NEXT: vsetvli a2, zero, e32, m2, tu, ma -; NOVLOPT-NEXT: vmv2r.v v10, v8 -; NOVLOPT-NEXT: vnmsac.vx v10, a0, v8 -; NOVLOPT-NEXT: vsetvli zero, a1, e32, m2, ta, ma -; NOVLOPT-NEXT: vadd.vv v8, v10, v8 -; NOVLOPT-NEXT: ret -; -; VLOPT-LABEL: vnmsac_vx: -; VLOPT: # %bb.0: -; VLOPT-NEXT: vsetvli zero, a1, e32, m2, tu, ma -; VLOPT-NEXT: vmv2r.v v10, v8 -; VLOPT-NEXT: vnmsac.vx v10, a0, v8 -; VLOPT-NEXT: vsetvli zero, zero, e32, m2, ta, ma -; VLOPT-NEXT: vadd.vv v8, v10, v8 -; VLOPT-NEXT: ret +; CHECK-LABEL: vnmsac_vx: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e32, m2, tu, ma +; CHECK-NEXT: vmv2r.v v10, v8 +; CHECK-NEXT: vnmsac.vx v10, a0, v8 +; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; CHECK-NEXT: vadd.vv v8, v10, v8 +; CHECK-NEXT: ret %1 = call <vscale x 4 x i32> @llvm.riscv.vnmsac.nxv4i32.nxv4i32(<vscale x 4 x i32> %a, i32 %b, <vscale x 4 x i32> %a, iXLen -1, iXLen 0) %2 = call <vscale x 4 x i32> @llvm.riscv.vadd.nxv4i32.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i32> %1, <vscale x 4 x i32> %a, iXLen %vl) ret <vscale x 4 x i32> %2 } define <vscale x 4 x i32> @vnmsub_vv(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b, iXLen %vl) { -; NOVLOPT-LABEL: vnmsub_vv: -; NOVLOPT: # %bb.0: -; NOVLOPT-NEXT: vsetvli a1, zero, e32, m2, tu, ma -; NOVLOPT-NEXT: vnmsub.vv v8, v8, v10 -; NOVLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; NOVLOPT-NEXT: vadd.vv v8, v8, v10 -; NOVLOPT-NEXT: ret -; -; VLOPT-LABEL: vnmsub_vv: -; VLOPT: # %bb.0: -; VLOPT-NEXT: vsetvli zero, a0, e32, m2, tu, ma -; VLOPT-NEXT: vnmsub.vv v8, v8, v10 -; VLOPT-NEXT: vsetvli zero, zero, e32, m2, ta, ma -; VLOPT-NEXT: vadd.vv v8, v8, v10 -; VLOPT-NEXT: ret +; CHECK-LABEL: vnmsub_vv: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m2, tu, ma +; CHECK-NEXT: vnmsub.vv v8, v8, v10 +; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; CHECK-NEXT: vadd.vv v8, v8, v10 +; CHECK-NEXT: ret %1 = call <vscale x 4 x i32> @llvm.riscv.vnmsub.nxv4i32.nxv4i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b, iXLen -1, iXLen 0) %2 = call <vscale x 4 x i32> @llvm.riscv.vadd.nxv4i32.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i32> %1, <vscale x 4 x i32> %b, iXLen %vl) ret <vscale x 4 x i32> %2 } define <vscale x 4 x i32> @vnmsub_vx(<vscale x 4 x i32> %a, i32 %b, iXLen %vl) { -; NOVLOPT-LABEL: vnmsub_vx: -; NOVLOPT: # %bb.0: -; NOVLOPT-NEXT: vsetvli a2, zero, e32, m2, tu, ma -; NOVLOPT-NEXT: vmv2r.v v10, v8 -; NOVLOPT-NEXT: vnmsub.vx v10, a0, v8 -; NOVLOPT-NEXT: vsetvli zero, a1, e32, m2, ta, ma -; NOVLOPT-NEXT: vadd.vv v8, v10, v8 -; NOVLOPT-NEXT: ret -; -; VLOPT-LABEL: vnmsub_vx: -; VLOPT: # %bb.0: -; VLOPT-NEXT: vsetvli zero, a1, e32, m2, tu, ma -; VLOPT-NEXT: vmv2r.v v10, v8 -; VLOPT-NEXT: vnmsub.vx v10, a0, v8 -; VLOPT-NEXT: vsetvli zero, zero, e32, m2, ta, ma -; VLOPT-NEXT: vadd.vv v8, v10, v8 -; VLOPT-NEXT: ret +; CHECK-LABEL: vnmsub_vx: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e32, m2, tu, ma +; CHECK-NEXT: vmv2r.v v10, v8 +; CHECK-NEXT: vnmsub.vx v10, a0, v8 +; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; CHECK-NEXT: vadd.vv v8, v10, v8 +; CHECK-NEXT: ret %1 = call <vscale x 4 x i32> @llvm.riscv.vnmsub.nxv4i32.nxv4i32(<vscale x 4 x i32> %a, i32 %b, <vscale x 4 x i32> %a, iXLen -1, iXLen 0) %2 = call <vscale x 4 x i32> @llvm.riscv.vadd.nxv4i32.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i32> %1, <vscale x 4 x i32> %a, iXLen %vl) ret <vscale x 4 x i32> %2 } define <vscale x 4 x i32> @vwmacc_vx(<vscale x 4 x i32> %a, i16 %b, <vscale x 4 x i16> %c, iXLen %vl) { -; NOVLOPT-LABEL: vwmacc_vx: -; NOVLOPT: # %bb.0: -; NOVLOPT-NEXT: vsetvli a2, zero, e16, m1, tu, ma -; NOVLOPT-NEXT: vwmacc.vx v8, a0, v10 -; NOVLOPT-NEXT: vsetvli zero, a1, e32, m2, ta, ma -; NOVLOPT-NEXT: vadd.vv v8, v8, v8 -; NOVLOPT-NEXT: ret -; -; VLOPT-LABEL: vwmacc_vx: -; VLOPT: # %bb.0: -; VLOPT-NEXT: vsetvli zero, a1, e16, m1, tu, ma -; VLOPT-NEXT: vwmacc.vx v8, a0, v10 -; VLOPT-NEXT: vsetvli zero, zero, e32, m2, ta, ma -; VLOPT-NEXT: vadd.vv v8, v8, v8 -; VLOPT-NEXT: ret +; CHECK-LABEL: vwmacc_vx: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e16, m1, tu, ma +; CHECK-NEXT: vwmacc.vx v8, a0, v10 +; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; CHECK-NEXT: vadd.vv v8, v8, v8 +; CHECK-NEXT: ret %1 = call <vscale x 4 x i32> @llvm.riscv.vwmacc.nxv4i32.i16(<vscale x 4 x i32> %a, i16 %b, <vscale x 4 x i16> %c, iXLen -1, iXLen 0) %2 = call <vscale x 4 x i32> @llvm.riscv.vadd.nxv4i32.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i32> %1, <vscale x 4 x i32> %1, iXLen %vl) ret <vscale x 4 x i32> %2 } define <vscale x 4 x i32> @vwmaccu_vv(<vscale x 4 x i32> %a, <vscale x 4 x i16> %b, <vscale x 4 x i16> %c, <vscale x 4 x i32> %d, iXLen %vl) { -; NOVLOPT-LABEL: vwmaccu_vv: -; NOVLOPT: # %bb.0: -; NOVLOPT-NEXT: vsetvli a1, zero, e16, m1, tu, ma -; NOVLOPT-NEXT: vwmaccu.vv v8, v10, v11 -; NOVLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; NOVLOPT-NEXT: vadd.vv v8, v8, v12 -; NOVLOPT-NEXT: ret -; -; VLOPT-LABEL: vwmaccu_vv: -; VLOPT: # %bb.0: -; VLOPT-NEXT: vsetvli zero, a0, e16, m1, tu, ma -; VLOPT-NEXT: vwmaccu.vv v8, v10, v11 -; VLOPT-NEXT: vsetvli zero, zero, e32, m2, ta, ma -; VLOPT-NEXT: vadd.vv v8, v8, v12 -; VLOPT-NEXT: ret +; CHECK-LABEL: vwmaccu_vv: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m1, tu, ma +; CHECK-NEXT: vwmaccu.vv v8, v10, v11 +; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; CHECK-NEXT: vadd.vv v8, v8, v12 +; CHECK-NEXT: ret %1 = call <vscale x 4 x i32> @llvm.riscv.vwmaccu.nxv4i32.nxv4i16(<vscale x 4 x i32> %a, <vscale x 4 x i16> %b, <vscale x 4 x i16> %c, iXLen -1, iXLen 0) %2 = call <vscale x 4 x i32> @llvm.riscv.vadd.nxv4i32.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i32> %1, <vscale x 4 x i32> %d, iXLen %vl) ret <vscale x 4 x i32> %2 } define <vscale x 4 x i32> @vwmaccu_vx(<vscale x 4 x i32> %a, i16 %b, <vscale x 4 x i16> %c, <vscale x 4 x i32> %d, i32 %e, iXLen %vl) { -; NOVLOPT-LABEL: vwmaccu_vx: -; NOVLOPT: # %bb.0: -; NOVLOPT-NEXT: vsetvli a1, zero, e16, m1, tu, ma -; NOVLOPT-NEXT: vwmaccu.vx v8, a0, v10 -; NOVLOPT-NEXT: vsetvli zero, a2, e32, m2, ta, ma -; NOVLOPT-NEXT: vadd.vv v8, v8, v12 -; NOVLOPT-NEXT: ret -; -; VLOPT-LABEL: vwmaccu_vx: -; VLOPT: # %bb.0: -; VLOPT-NEXT: vsetvli zero, a2, e16, m1, tu, ma -; VLOPT-NEXT: vwmaccu.vx v8, a0, v10 -; VLOPT-NEXT: vsetvli zero, zero, e32, m2, ta, ma -; VLOPT-NEXT: vadd.vv v8, v8, v12 -; VLOPT-NEXT: ret +; CHECK-LABEL: vwmaccu_vx: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a2, e16, m1, tu, ma +; CHECK-NEXT: vwmaccu.vx v8, a0, v10 +; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; CHECK-NEXT: vadd.vv v8, v8, v12 +; CHECK-NEXT: ret %1 = call <vscale x 4 x i32> @llvm.riscv.vwmaccu.nxv4i32.i16(<vscale x 4 x i32> %a, i16 %b, <vscale x 4 x i16> %c, iXLen -1, iXLen 0) %2 = call <vscale x 4 x i32> @llvm.riscv.vadd.nxv4i32.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i32> %1, <vscale x 4 x i32> %d, iXLen %vl) ret <vscale x 4 x i32> %2 } define <vscale x 4 x i32> @vwmaccsu_vv(<vscale x 4 x i32> %a, <vscale x 4 x i16> %b, <vscale x 4 x i16> %c, iXLen %vl) { -; NOVLOPT-LABEL: vwmaccsu_vv: -; NOVLOPT: # %bb.0: -; NOVLOPT-NEXT: vsetvli a1, zero, e16, m1, tu, ma -; NOVLOPT-NEXT: vwmaccsu.vv v8, v10, v11 -; NOVLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; NOVLOPT-NEXT: vadd.vv v8, v8, v8 -; NOVLOPT-NEXT: ret -; -; VLOPT-LABEL: vwmaccsu_vv: -; VLOPT: # %bb.0: -; VLOPT-NEXT: vsetvli zero, a0, e16, m1, tu, ma -; VLOPT-NEXT: vwmaccsu.vv v8, v10, v11 -; VLOPT-NEXT: vsetvli zero, zero, e32, m2, ta, ma -; VLOPT-NEXT: vadd.vv v8, v8, v8 -; VLOPT-NEXT: ret +; CHECK-LABEL: vwmaccsu_vv: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m1, tu, ma +; CHECK-NEXT: vwmaccsu.vv v8, v10, v11 +; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; CHECK-NEXT: vadd.vv v8, v8, v8 +; CHECK-NEXT: ret %1 = call <vscale x 4 x i32> @llvm.riscv.vwmaccsu.nxv4i32.nxv4i16(<vscale x 4 x i32> %a, <vscale x 4 x i16> %b, <vscale x 4 x i16> %c, iXLen -1, iXLen 0) %2 = call <vscale x 4 x i32> @llvm.riscv.vadd.nxv4i32.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i32> %1, <vscale x 4 x i32> %1, iXLen %vl) ret <vscale x 4 x i32> %2 } define <vscale x 4 x i32> @vwmaccsu_vx(<vscale x 4 x i32> %a, i16 %b, <vscale x 4 x i16> %c, iXLen %vl) { -; NOVLOPT-LABEL: vwmaccsu_vx: -; NOVLOPT: # %bb.0: -; NOVLOPT-NEXT: vsetvli a2, zero, e16, m1, tu, ma -; NOVLOPT-NEXT: vwmaccsu.vx v8, a0, v10 -; NOVLOPT-NEXT: vsetvli zero, a1, e32, m2, ta, ma -; NOVLOPT-NEXT: vadd.vv v8, v8, v8 -; NOVLOPT-NEXT: ret -; -; VLOPT-LABEL: vwmaccsu_vx: -; VLOPT: # %bb.0: -; VLOPT-NEXT: vsetvli zero, a1, e16, m1, tu, ma -; VLOPT-NEXT: vwmaccsu.vx v8, a0, v10 -; VLOPT-NEXT: vsetvli zero, zero, e32, m2, ta, ma -; VLOPT-NEXT: vadd.vv v8, v8, v8 -; VLOPT-NEXT: ret +; CHECK-LABEL: vwmaccsu_vx: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e16, m1, tu, ma +; CHECK-NEXT: vwmaccsu.vx v8, a0, v10 +; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; CHECK-NEXT: vadd.vv v8, v8, v8 +; CHECK-NEXT: ret %1 = call <vscale x 4 x i32> @llvm.riscv.vwmaccsu.nxv4i32.i16(<vscale x 4 x i32> %a, i16 %b, <vscale x 4 x i16> %c, iXLen -1, iXLen 0) %2 = call <vscale x 4 x i32> @llvm.riscv.vadd.nxv4i32.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i32> %1, <vscale x 4 x i32> %1, iXLen %vl) ret <vscale x 4 x i32> %2 } define <vscale x 4 x i32> @vwmaccus_vx(<vscale x 4 x i32> %a, i16 %b, <vscale x 4 x i16> %c, iXLen %vl) { -; NOVLOPT-LABEL: vwmaccus_vx: -; NOVLOPT: # %bb.0: -; NOVLOPT-NEXT: vsetvli a2, zero, e16, m1, tu, ma -; NOVLOPT-NEXT: vwmaccus.vx v8, a0, v10 -; NOVLOPT-NEXT: vsetvli zero, a1, e32, m2, ta, ma -; NOVLOPT-NEXT: vadd.vv v8, v8, v8 -; NOVLOPT-NEXT: ret -; -; VLOPT-LABEL: vwmaccus_vx: -; VLOPT: # %bb.0: -; VLOPT-NEXT: vsetvli zero, a1, e16, m1, tu, ma -; VLOPT-NEXT: vwmaccus.vx v8, a0, v10 -; VLOPT-NEXT: vsetvli zero, zero, e32, m2, ta, ma -; VLOPT-NEXT: vadd.vv v8, v8, v8 -; VLOPT-NEXT: ret +; CHECK-LABEL: vwmaccus_vx: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e16, m1, tu, ma +; CHECK-NEXT: vwmaccus.vx v8, a0, v10 +; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; CHECK-NEXT: vadd.vv v8, v8, v8 +; CHECK-NEXT: ret %1 = call <vscale x 4 x i32> @llvm.riscv.vwmaccus.nxv4i32.i16(<vscale x 4 x i32> %a, i16 %b, <vscale x 4 x i16> %c, iXLen -1, iXLen 0) %2 = call <vscale x 4 x i32> @llvm.riscv.vadd.nxv4i32.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i32> %1, <vscale x 4 x i32> %1, iXLen %vl) ret <vscale x 4 x i32> %2 } define <vscale x 4 x i32> @vsaddu_vv(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b, iXLen %vl) { -; NOVLOPT-LABEL: vsaddu_vv: -; NOVLOPT: # %bb.0: -; NOVLOPT-NEXT: vsetvli a1, zero, e32, m2, ta, ma -; NOVLOPT-NEXT: vsaddu.vv v10, v8, v10 -; NOVLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; NOVLOPT-NEXT: vadd.vv v8, v10, v8 -; NOVLOPT-NEXT: ret -; -; VLOPT-LABEL: vsaddu_vv: -; VLOPT: # %bb.0: -; VLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; VLOPT-NEXT: vsaddu.vv v10, v8, v10 -; VLOPT-NEXT: vadd.vv v8, v10, v8 -; VLOPT-NEXT: ret +; CHECK-LABEL: vsaddu_vv: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; CHECK-NEXT: vsaddu.vv v10, v8, v10 +; CHECK-NEXT: vadd.vv v8, v10, v8 +; CHECK-NEXT: ret %1 = call <vscale x 4 x i32> @llvm.riscv.vsaddu(<vscale x 4 x i32> poison, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b, iXLen -1) %2 = call <vscale x 4 x i32> @llvm.riscv.vadd(<vscale x 4 x i32> poison, <vscale x 4 x i32> %1, <vscale x 4 x i32> %a, iXLen %vl) ret <vscale x 4 x i32> %2 } define <vscale x 4 x i32> @vsaddu_vx(<vscale x 4 x i32> %a, i32 %b, iXLen %vl) { -; NOVLOPT-LABEL: vsaddu_vx: -; NOVLOPT: # %bb.0: -; NOVLOPT-NEXT: vsetvli a2, zero, e32, m2, ta, ma -; NOVLOPT-NEXT: vsaddu.vx v10, v8, a0 -; NOVLOPT-NEXT: vsetvli zero, a1, e32, m2, ta, ma -; NOVLOPT-NEXT: vadd.vv v8, v10, v8 -; NOVLOPT-NEXT: ret -; -; VLOPT-LABEL: vsaddu_vx: -; VLOPT: # %bb.0: -; VLOPT-NEXT: vsetvli zero, a1, e32, m2, ta, ma -; VLOPT-NEXT: vsaddu.vx v10, v8, a0 -; VLOPT-NEXT: vadd.vv v8, v10, v8 -; VLOPT-NEXT: ret +; CHECK-LABEL: vsaddu_vx: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, ma +; CHECK-NEXT: vsaddu.vx v10, v8, a0 +; CHECK-NEXT: vadd.vv v8, v10, v8 +; CHECK-NEXT: ret %1 = call <vscale x 4 x i32> @llvm.riscv.vsaddu(<vscale x 4 x i32> poison, <vscale x 4 x i32> %a, i32 %b, iXLen -1) %2 = call <vscale x 4 x i32> @llvm.riscv.vadd(<vscale x 4 x i32> poison, <vscale x 4 x i32> %1, <vscale x 4 x i32> %a, iXLen %vl) ret <vscale x 4 x i32> %2 } define <vscale x 4 x i32> @vsaddu_vi(<vscale x 4 x i32> %a, iXLen %vl) { -; NOVLOPT-LABEL: vsaddu_vi: -; NOVLOPT: # %bb.0: -; NOVLOPT-NEXT: vsetvli a1, zero, e32, m2, ta, ma -; NOVLOPT-NEXT: vsaddu.vi v10, v8, 5 -; NOVLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; NOVLOPT-NEXT: vadd.vv v8, v10, v8 -; NOVLOPT-NEXT: ret -; -; VLOPT-LABEL: vsaddu_vi: -; VLOPT: # %bb.0: -; VLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; VLOPT-NEXT: vsaddu.vi v10, v8, 5 -; VLOPT-NEXT: vadd.vv v8, v10, v8 -; VLOPT-NEXT: ret +; CHECK-LABEL: vsaddu_vi: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; CHECK-NEXT: vsaddu.vi v10, v8, 5 +; CHECK-NEXT: vadd.vv v8, v10, v8 +; CHECK-NEXT: ret %1 = call <vscale x 4 x i32> @llvm.riscv.vsaddu(<vscale x 4 x i32> poison, <vscale x 4 x i32> %a, i32 5, iXLen -1) %2 = call <vscale x 4 x i32> @llvm.riscv.vadd(<vscale x 4 x i32> poison, <vscale x 4 x i32> %1, <vscale x 4 x i32> %a, iXLen %vl) ret <vscale x 4 x i32> %2 } define <vscale x 4 x i32> @vsadd_vv(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b, iXLen %vl) { -; NOVLOPT-LABEL: vsadd_vv: -; NOVLOPT: # %bb.0: -; NOVLOPT-NEXT: vsetvli a1, zero, e32, m2, ta, ma -; NOVLOPT-NEXT: vsadd.vv v10, v8, v10 -; NOVLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; NOVLOPT-NEXT: vadd.vv v8, v10, v8 -; NOVLOPT-NEXT: ret -; -; VLOPT-LABEL: vsadd_vv: -; VLOPT: # %bb.0: -; VLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; VLOPT-NEXT: vsadd.vv v10, v8, v10 -; VLOPT-NEXT: vadd.vv v8, v10, v8 -; VLOPT-NEXT: ret +; CHECK-LABEL: vsadd_vv: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; CHECK-NEXT: vsadd.vv v10, v8, v10 +; CHECK-NEXT: vadd.vv v8, v10, v8 +; CHECK-NEXT: ret %1 = call <vscale x 4 x i32> @llvm.riscv.vsadd(<vscale x 4 x i32> poison, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b, iXLen -1) %2 = call <vscale x 4 x i32> @llvm.riscv.vadd(<vscale x 4 x i32> poison, <vscale x 4 x i32> %1, <vscale x 4 x i32> %a, iXLen %vl) ret <vscale x 4 x i32> %2 } define <vscale x 4 x i32> @vsadd_vx(<vscale x 4 x i32> %a, i32 %b, iXLen %vl) { -; NOVLOPT-LABEL: vsadd_vx: -; NOVLOPT: # %bb.0: -; NOVLOPT-NEXT: vsetvli a2, zero, e32, m2, ta, ma -; NOVLOPT-NEXT: vsadd.vx v10, v8, a0 -; NOVLOPT-NEXT: vsetvli zero, a1, e32, m2, ta, ma -; NOVLOPT-NEXT: vadd.vv v8, v10, v8 -; NOVLOPT-NEXT: ret -; -; VLOPT-LABEL: vsadd_vx: -; VLOPT: # %bb.0: -; VLOPT-NEXT: vsetvli zero, a1, e32, m2, ta, ma -; VLOPT-NEXT: vsadd.vx v10, v8, a0 -; VLOPT-NEXT: vadd.vv v8, v10, v8 -; VLOPT-NEXT: ret +; CHECK-LABEL: vsadd_vx: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, ma +; CHECK-NEXT: vsadd.vx v10, v8, a0 +; CHECK-NEXT: vadd.vv v8, v10, v8 +; CHECK-NEXT: ret %1 = call <vscale x 4 x i32> @llvm.riscv.vsadd(<vscale x 4 x i32> poison, <vscale x 4 x i32> %a, i32 %b, iXLen -1) %2 = call <vscale x 4 x i32> @llvm.riscv.vadd(<vscale x 4 x i32> poison, <vscale x 4 x i32> %1, <vscale x 4 x i32> %a, iXLen %vl) ret <vscale x 4 x i32> %2 } define <vscale x 4 x i32> @vsadd_vi(<vscale x 4 x i32> %a, iXLen %vl) { -; NOVLOPT-LABEL: vsadd_vi: -; NOVLOPT: # %bb.0: -; NOVLOPT-NEXT: vsetvli a1, zero, e32, m2, ta, ma -; NOVLOPT-NEXT: vsadd.vi v10, v8, 5 -; NOVLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; NOVLOPT-NEXT: vadd.vv v8, v10, v8 -; NOVLOPT-NEXT: ret -; -; VLOPT-LABEL: vsadd_vi: -; VLOPT: # %bb.0: -; VLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; VLOPT-NEXT: vsadd.vi v10, v8, 5 -; VLOPT-NEXT: vadd.vv v8, v10, v8 -; VLOPT-NEXT: ret +; CHECK-LABEL: vsadd_vi: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; CHECK-NEXT: vsadd.vi v10, v8, 5 +; CHECK-NEXT: vadd.vv v8, v10, v8 +; CHECK-NEXT: ret %1 = call <vscale x 4 x i32> @llvm.riscv.vsadd(<vscale x 4 x i32> poison, <vscale x 4 x i32> %a, i32 5, iXLen -1) %2 = call <vscale x 4 x i32> @llvm.riscv.vadd(<vscale x 4 x i32> poison, <vscale x 4 x i32> %1, <vscale x 4 x i32> %a, iXLen %vl) ret <vscale x 4 x i32> %2 } define <vscale x 4 x i32> @vssubu_vv(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b, iXLen %vl) { -; NOVLOPT-LABEL: vssubu_vv: -; NOVLOPT: # %bb.0: -; NOVLOPT-NEXT: vsetvli a1, zero, e32, m2, ta, ma -; NOVLOPT-NEXT: vssubu.vv v10, v8, v10 -; NOVLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; NOVLOPT-NEXT: vadd.vv v8, v10, v8 -; NOVLOPT-NEXT: ret -; -; VLOPT-LABEL: vssubu_vv: -; VLOPT: # %bb.0: -; VLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; VLOPT-NEXT: vssubu.vv v10, v8, v10 -; VLOPT-NEXT: vadd.vv v8, v10, v8 -; VLOPT-NEXT: ret +; CHECK-LABEL: vssubu_vv: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; CHECK-NEXT: vssubu.vv v10, v8, v10 +; CHECK-NEXT: vadd.vv v8, v10, v8 +; CHECK-NEXT: ret %1 = call <vscale x 4 x i32> @llvm.riscv.vssubu(<vscale x 4 x i32> poison, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b, iXLen -1) %2 = call <vscale x 4 x i32> @llvm.riscv.vadd(<vscale x 4 x i32> poison, <vscale x 4 x i32> %1, <vscale x 4 x i32> %a, iXLen %vl) ret <vscale x 4 x i32> %2 } define <vscale x 4 x i32> @vssubu_vx(<vscale x 4 x i32> %a, i32 %b, iXLen %vl) { -; NOVLOPT-LABEL: vssubu_vx: -; NOVLOPT: # %bb.0: -; NOVLOPT-NEXT: vsetvli a2, zero, e32, m2, ta, ma -; NOVLOPT-NEXT: vssubu.vx v10, v8, a0 -; NOVLOPT-NEXT: vsetvli zero, a1, e32, m2, ta, ma -; NOVLOPT-NEXT: vadd.vv v8, v10, v8 -; NOVLOPT-NEXT: ret -; -; VLOPT-LABEL: vssubu_vx: -; VLOPT: # %bb.0: -; VLOPT-NEXT: vsetvli zero, a1, e32, m2, ta, ma -; VLOPT-NEXT: vssubu.vx v10, v8, a0 -; VLOPT-NEXT: vadd.vv v8, v10, v8 -; VLOPT-NEXT: ret +; CHECK-LABEL: vssubu_vx: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, ma +; CHECK-NEXT: vssubu.vx v10, v8, a0 +; CHECK-NEXT: vadd.vv v8, v10, v8 +; CHECK-NEXT: ret %1 = call <vscale x 4 x i32> @llvm.riscv.vssubu(<vscale x 4 x i32> poison, <vscale x 4 x i32> %a, i32 %b, iXLen -1) %2 = call <vscale x 4 x i32> @llvm.riscv.vadd(<vscale x 4 x i32> poison, <vscale x 4 x i32> %1, <vscale x 4 x i32> %a, iXLen %vl) ret <vscale x 4 x i32> %2 } define <vscale x 4 x i32> @vssub_vv(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b, iXLen %vl) { -; NOVLOPT-LABEL: vssub_vv: -; NOVLOPT: # %bb.0: -; NOVLOPT-NEXT: vsetvli a1, zero, e32, m2, ta, ma -; NOVLOPT-NEXT: vssub.vv v10, v8, v10 -; NOVLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; NOVLOPT-NEXT: vadd.vv v8, v10, v8 -; NOVLOPT-NEXT: ret -; -; VLOPT-LABEL: vssub_vv: -; VLOPT: # %bb.0: -; VLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; VLOPT-NEXT: vssub.vv v10, v8, v10 -; VLOPT-NEXT: vadd.vv v8, v10, v8 -; VLOPT-NEXT: ret +; CHECK-LABEL: vssub_vv: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; CHECK-NEXT: vssub.vv v10, v8, v10 +; CHECK-NEXT: vadd.vv v8, v10, v8 +; CHECK-NEXT: ret %1 = call <vscale x 4 x i32> @llvm.riscv.vssub(<vscale x 4 x i32> poison, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b, iXLen -1) %2 = call <vscale x 4 x i32> @llvm.riscv.vadd(<vscale x 4 x i32> poison, <vscale x 4 x i32> %1, <vscale x 4 x i32> %a, iXLen %vl) ret <vscale x 4 x i32> %2 } define <vscale x 4 x i32> @vssub_vx(<vscale x 4 x i32> %a, i32 %b, iXLen %vl) { -; NOVLOPT-LABEL: vssub_vx: -; NOVLOPT: # %bb.0: -; NOVLOPT-NEXT: vsetvli a2, zero, e32, m2, ta, ma -; NOVLOPT-NEXT: vssub.vx v10, v8, a0 -; NOVLOPT-NEXT: vsetvli zero, a1, e32, m2, ta, ma -; NOVLOPT-NEXT: vadd.vv v8, v10, v8 -; NOVLOPT-NEXT: ret -; -; VLOPT-LABEL: vssub_vx: -; VLOPT: # %bb.0: -; VLOPT-NEXT: vsetvli zero, a1, e32, m2, ta, ma -; VLOPT-NEXT: vssub.vx v10, v8, a0 -; VLOPT-NEXT: vadd.vv v8, v10, v8 -; VLOPT-NEXT: ret +; CHECK-LABEL: vssub_vx: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, ma +; CHECK-NEXT: vssub.vx v10, v8, a0 +; CHECK-NEXT: vadd.vv v8, v10, v8 +; CHECK-NEXT: ret %1 = call <vscale x 4 x i32> @llvm.riscv.vssub(<vscale x 4 x i32> poison, <vscale x 4 x i32> %a, i32 %b, iXLen -1) %2 = call <vscale x 4 x i32> @llvm.riscv.vadd(<vscale x 4 x i32> poison, <vscale x 4 x i32> %1, <vscale x 4 x i32> %a, iXLen %vl) ret <vscale x 4 x i32> %2 } define <vscale x 4 x i32> @vsmul_vv(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b, iXLen %vl) { -; NOVLOPT-LABEL: vsmul_vv: -; NOVLOPT: # %bb.0: -; NOVLOPT-NEXT: csrwi vxrm, 0 -; NOVLOPT-NEXT: vsetvli a1, zero, e32, m2, ta, ma -; NOVLOPT-NEXT: vsmul.vv v8, v8, v10 -; NOVLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; NOVLOPT-NEXT: vadd.vv v8, v8, v10 -; NOVLOPT-NEXT: ret -; -; VLOPT-LABEL: vsmul_vv: -; VLOPT: # %bb.0: -; VLOPT-NEXT: csrwi vxrm, 0 -; VLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; VLOPT-NEXT: vsmul.vv v8, v8, v10 -; VLOPT-NEXT: vadd.vv v8, v8, v10 -; VLOPT-NEXT: ret +; CHECK-LABEL: vsmul_vv: +; CHECK: # %bb.0: +; CHECK-NEXT: csrwi vxrm, 0 +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; CHECK-NEXT: vsmul.vv v8, v8, v10 +; CHECK-NEXT: vadd.vv v8, v8, v10 +; CHECK-NEXT: ret %1 = call <vscale x 4 x i32> @llvm.riscv.vsmul.nxv4i32.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b, iXLen 0, iXLen -1) %2 = call <vscale x 4 x i32> @llvm.riscv.vadd.nxv4i32.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i32> %1, <vscale x 4 x i32> %b, iXLen %vl) ret <vscale x 4 x i32> %2 } define <vscale x 4 x i32> @vsmul_vx(<vscale x 4 x i32> %a, i32 %b, iXLen %vl) { -; NOVLOPT-LABEL: vsmul_vx: -; NOVLOPT: # %bb.0: -; NOVLOPT-NEXT: csrwi vxrm, 0 -; NOVLOPT-NEXT: vsetvli a2, zero, e32, m2, ta, ma -; NOVLOPT-NEXT: vsmul.vx v10, v8, a0 -; NOVLOPT-NEXT: vsetvli zero, a1, e32, m2, ta, ma -; NOVLOPT-NEXT: vadd.vv v8, v10, v8 -; NOVLOPT-NEXT: ret -; -; VLOPT-LABEL: vsmul_vx: -; VLOPT: # %bb.0: -; VLOPT-NEXT: csrwi vxrm, 0 -; VLOPT-NEXT: vsetvli zero, a1, e32, m2, ta, ma -; VLOPT-NEXT: vsmul.vx v10, v8, a0 -; VLOPT-NEXT: vadd.vv v8, v10, v8 -; VLOPT-NEXT: ret +; CHECK-LABEL: vsmul_vx: +; CHECK: # %bb.0: +; CHECK-NEXT: csrwi vxrm, 0 +; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, ma +; CHECK-NEXT: vsmul.vx v10, v8, a0 +; CHECK-NEXT: vadd.vv v8, v10, v8 +; CHECK-NEXT: ret %1 = call <vscale x 4 x i32> @llvm.riscv.vsmul.nxv4i32.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i32> %a, i32 %b, iXLen 0, iXLen -1) %2 = call <vscale x 4 x i32> @llvm.riscv.vadd.nxv4i32.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i32> %1, <vscale x 4 x i32> %a, iXLen %vl) ret <vscale x 4 x i32> %2 } define <vscale x 4 x i32> @vssrl_vv(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b, iXLen %vl) { -; NOVLOPT-LABEL: vssrl_vv: -; NOVLOPT: # %bb.0: -; NOVLOPT-NEXT: csrwi vxrm, 0 -; NOVLOPT-NEXT: vsetvli a1, zero, e32, m2, ta, ma -; NOVLOPT-NEXT: vssrl.vv v8, v8, v10 -; NOVLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; NOVLOPT-NEXT: vadd.vv v8, v8, v10 -; NOVLOPT-NEXT: ret -; -; VLOPT-LABEL: vssrl_vv: -; VLOPT: # %bb.0: -; VLOPT-NEXT: csrwi vxrm, 0 -; VLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; VLOPT-NEXT: vssrl.vv v8, v8, v10 -; VLOPT-NEXT: vadd.vv v8, v8, v10 -; VLOPT-NEXT: ret +; CHECK-LABEL: vssrl_vv: +; CHECK: # %bb.0: +; CHECK-NEXT: csrwi vxrm, 0 +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; CHECK-NEXT: vssrl.vv v8, v8, v10 +; CHECK-NEXT: vadd.vv v8, v8, v10 +; CHECK-NEXT: ret %1 = call <vscale x 4 x i32> @llvm.riscv.vssrl.nxv4i32.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b, iXLen 0, iXLen -1) %2 = call <vscale x 4 x i32> @llvm.riscv.vadd.nxv4i32.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i32> %1, <vscale x 4 x i32> %b, iXLen %vl) ret <vscale x 4 x i32> %2 } define <vscale x 4 x i32> @vssrl_vx(<vscale x 4 x i32> %a, iXLen %b, iXLen %vl) { -; NOVLOPT-LABEL: vssrl_vx: -; NOVLOPT: # %bb.0: -; NOVLOPT-NEXT: csrwi vxrm, 0 -; NOVLOPT-NEXT: vsetvli a2, zero, e32, m2, ta, ma -; NOVLOPT-NEXT: vssrl.vx v10, v8, a0 -; NOVLOPT-NEXT: vsetvli zero, a1, e32, m2, ta, ma -; NOVLOPT-NEXT: vadd.vv v8, v10, v8 -; NOVLOPT-NEXT: ret -; -; VLOPT-LABEL: vssrl_vx: -; VLOPT: # %bb.0: -; VLOPT-NEXT: csrwi vxrm, 0 -; VLOPT-NEXT: vsetvli zero, a1, e32, m2, ta, ma -; VLOPT-NEXT: vssrl.vx v10, v8, a0 -; VLOPT-NEXT: vadd.vv v8, v10, v8 -; VLOPT-NEXT: ret +; CHECK-LABEL: vssrl_vx: +; CHECK: # %bb.0: +; CHECK-NEXT: csrwi vxrm, 0 +; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, ma +; CHECK-NEXT: vssrl.vx v10, v8, a0 +; CHECK-NEXT: vadd.vv v8, v10, v8 +; CHECK-NEXT: ret %1 = call <vscale x 4 x i32> @llvm.riscv.vssrl.nxv4i32.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i32> %a, iXLen %b, iXLen 0, iXLen -1) %2 = call <vscale x 4 x i32> @llvm.riscv.vadd.nxv4i32.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i32> %1, <vscale x 4 x i32> %a, iXLen %vl) ret <vscale x 4 x i32> %2 } define <vscale x 4 x i32> @vssrl_vi(<vscale x 4 x i32> %a, iXLen %vl) { -; NOVLOPT-LABEL: vssrl_vi: -; NOVLOPT: # %bb.0: -; NOVLOPT-NEXT: csrwi vxrm, 0 -; NOVLOPT-NEXT: vsetvli a1, zero, e32, m2, ta, ma -; NOVLOPT-NEXT: vssrl.vi v10, v8, 5 -; NOVLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; NOVLOPT-NEXT: vadd.vv v8, v10, v8 -; NOVLOPT-NEXT: ret -; -; VLOPT-LABEL: vssrl_vi: -; VLOPT: # %bb.0: -; VLOPT-NEXT: csrwi vxrm, 0 -; VLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; VLOPT-NEXT: vssrl.vi v10, v8, 5 -; VLOPT-NEXT: vadd.vv v8, v10, v8 -; VLOPT-NEXT: ret +; CHECK-LABEL: vssrl_vi: +; CHECK: # %bb.0: +; CHECK-NEXT: csrwi vxrm, 0 +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; CHECK-NEXT: vssrl.vi v10, v8, 5 +; CHECK-NEXT: vadd.vv v8, v10, v8 +; CHECK-NEXT: ret %1 = call <vscale x 4 x i32> @llvm.riscv.vssrl.nxv4i32.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i32> %a, iXLen 5, iXLen 0, iXLen -1) %2 = call <vscale x 4 x i32> @llvm.riscv.vadd.nxv4i32.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i32> %1, <vscale x 4 x i32> %a, iXLen %vl) ret <vscale x 4 x i32> %2 } define <vscale x 4 x i32> @vssra_vv(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b, iXLen %vl) { -; NOVLOPT-LABEL: vssra_vv: -; NOVLOPT: # %bb.0: -; NOVLOPT-NEXT: csrwi vxrm, 0 -; NOVLOPT-NEXT: vsetvli a1, zero, e32, m2, ta, ma -; NOVLOPT-NEXT: vssra.vv v8, v8, v10 -; NOVLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; NOVLOPT-NEXT: vadd.vv v8, v8, v10 -; NOVLOPT-NEXT: ret -; -; VLOPT-LABEL: vssra_vv: -; VLOPT: # %bb.0: -; VLOPT-NEXT: csrwi vxrm, 0 -; VLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; VLOPT-NEXT: vssra.vv v8, v8, v10 -; VLOPT-NEXT: vadd.vv v8, v8, v10 -; VLOPT-NEXT: ret +; CHECK-LABEL: vssra_vv: +; CHECK: # %bb.0: +; CHECK-NEXT: csrwi vxrm, 0 +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; CHECK-NEXT: vssra.vv v8, v8, v10 +; CHECK-NEXT: vadd.vv v8, v8, v10 +; CHECK-NEXT: ret %1 = call <vscale x 4 x i32> @llvm.riscv.vssra.nxv4i32.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b, iXLen 0, iXLen -1) %2 = call <vscale x 4 x i32> @llvm.riscv.vadd.nxv4i32.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i32> %1, <vscale x 4 x i32> %b, iXLen %vl) ret <vscale x 4 x i32> %2 } define <vscale x 4 x i32> @vssra_vx(<vscale x 4 x i32> %a, iXLen %b, iXLen %vl) { -; NOVLOPT-LABEL: vssra_vx: -; NOVLOPT: # %bb.0: -; NOVLOPT-NEXT: csrwi vxrm, 0 -; NOVLOPT-NEXT: vsetvli a2, zero, e32, m2, ta, ma -; NOVLOPT-NEXT: vssra.vx v10, v8, a0 -; NOVLOPT-NEXT: vsetvli zero, a1, e32, m2, ta, ma -; NOVLOPT-NEXT: vadd.vv v8, v10, v8 -; NOVLOPT-NEXT: ret -; -; VLOPT-LABEL: vssra_vx: -; VLOPT: # %bb.0: -; VLOPT-NEXT: csrwi vxrm, 0 -; VLOPT-NEXT: vsetvli zero, a1, e32, m2, ta, ma -; VLOPT-NEXT: vssra.vx v10, v8, a0 -; VLOPT-NEXT: vadd.vv v8, v10, v8 -; VLOPT-NEXT: ret +; CHECK-LABEL: vssra_vx: +; CHECK: # %bb.0: +; CHECK-NEXT: csrwi vxrm, 0 +; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, ma +; CHECK-NEXT: vssra.vx v10, v8, a0 +; CHECK-NEXT: vadd.vv v8, v10, v8 +; CHECK-NEXT: ret %1 = call <vscale x 4 x i32> @llvm.riscv.vssra.nxv4i32.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i32> %a, iXLen %b, iXLen 0, iXLen -1) %2 = call <vscale x 4 x i32> @llvm.riscv.vadd.nxv4i32.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i32> %1, <vscale x 4 x i32> %a, iXLen %vl) ret <vscale x 4 x i32> %2 } define <vscale x 4 x i32> @vssra_vi(<vscale x 4 x i32> %a, iXLen %vl) { -; NOVLOPT-LABEL: vssra_vi: -; NOVLOPT: # %bb.0: -; NOVLOPT-NEXT: csrwi vxrm, 0 -; NOVLOPT-NEXT: vsetvli a1, zero, e32, m2, ta, ma -; NOVLOPT-NEXT: vssra.vi v10, v8, 5 -; NOVLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; NOVLOPT-NEXT: vadd.vv v8, v10, v8 -; NOVLOPT-NEXT: ret -; -; VLOPT-LABEL: vssra_vi: -; VLOPT: # %bb.0: -; VLOPT-NEXT: csrwi vxrm, 0 -; VLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; VLOPT-NEXT: vssra.vi v10, v8, 5 -; VLOPT-NEXT: vadd.vv v8, v10, v8 -; VLOPT-NEXT: ret +; CHECK-LABEL: vssra_vi: +; CHECK: # %bb.0: +; CHECK-NEXT: csrwi vxrm, 0 +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; CHECK-NEXT: vssra.vi v10, v8, 5 +; CHECK-NEXT: vadd.vv v8, v10, v8 +; CHECK-NEXT: ret %1 = call <vscale x 4 x i32> @llvm.riscv.vssra.nxv4i32.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i32> %a, iXLen 5, iXLen 0, iXLen -1) %2 = call <vscale x 4 x i32> @llvm.riscv.vadd.nxv4i32.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i32> %1, <vscale x 4 x i32> %a, iXLen %vl) ret <vscale x 4 x i32> %2 } define <vscale x 4 x i32> @vnclipu_vv(<vscale x 4 x i64> %a, <vscale x 4 x i32> %b, iXLen %vl) { -; NOVLOPT-LABEL: vnclipu_vv: -; NOVLOPT: # %bb.0: -; NOVLOPT-NEXT: csrwi vxrm, 0 -; NOVLOPT-NEXT: vsetvli a1, zero, e32, m2, ta, ma -; NOVLOPT-NEXT: vnclipu.wv v14, v8, v12 -; NOVLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; NOVLOPT-NEXT: vadd.vv v8, v14, v14 -; NOVLOPT-NEXT: ret -; -; VLOPT-LABEL: vnclipu_vv: -; VLOPT: # %bb.0: -; VLOPT-NEXT: csrwi vxrm, 0 -; VLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; VLOPT-NEXT: vnclipu.wv v14, v8, v12 -; VLOPT-NEXT: vadd.vv v8, v14, v14 -; VLOPT-NEXT: ret +; CHECK-LABEL: vnclipu_vv: +; CHECK: # %bb.0: +; CHECK-NEXT: csrwi vxrm, 0 +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; CHECK-NEXT: vnclipu.wv v14, v8, v12 +; CHECK-NEXT: vadd.vv v8, v14, v14 +; CHECK-NEXT: ret %1 = call <vscale x 4 x i32> @llvm.riscv.vnclipu(<vscale x 4 x i32> poison, <vscale x 4 x i64> %a, <vscale x 4 x i32> %b, iXLen 0, iXLen -1) %2 = call <vscale x 4 x i32> @llvm.riscv.vadd(<vscale x 4 x i32> poison, <vscale x 4 x i32> %1, <vscale x 4 x i32> %1, iXLen %vl) ret <vscale x 4 x i32> %2 } define <vscale x 4 x i32> @vnclipu_vx(<vscale x 4 x i64> %a, iXLen %b, iXLen %vl) { -; NOVLOPT-LABEL: vnclipu_vx: -; NOVLOPT: # %bb.0: -; NOVLOPT-NEXT: csrwi vxrm, 0 -; NOVLOPT-NEXT: vsetvli a2, zero, e32, m2, ta, ma -; NOVLOPT-NEXT: vnclipu.wx v12, v8, a0 -; NOVLOPT-NEXT: vsetvli zero, a1, e32, m2, ta, ma -; NOVLOPT-NEXT: vadd.vv v8, v12, v12 -; NOVLOPT-NEXT: ret -; -; VLOPT-LABEL: vnclipu_vx: -; VLOPT: # %bb.0: -; VLOPT-NEXT: csrwi vxrm, 0 -; VLOPT-NEXT: vsetvli zero, a1, e32, m2, ta, ma -; VLOPT-NEXT: vnclipu.wx v12, v8, a0 -; VLOPT-NEXT: vadd.vv v8, v12, v12 -; VLOPT-NEXT: ret +; CHECK-LABEL: vnclipu_vx: +; CHECK: # %bb.0: +; CHECK-NEXT: csrwi vxrm, 0 +; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, ma +; CHECK-NEXT: vnclipu.wx v12, v8, a0 +; CHECK-NEXT: vadd.vv v8, v12, v12 +; CHECK-NEXT: ret %1 = call <vscale x 4 x i32> @llvm.riscv.vnclipu(<vscale x 4 x i32> poison, <vscale x 4 x i64> %a, iXLen %b, iXLen 0, iXLen -1) %2 = call <vscale x 4 x i32> @llvm.riscv.vadd(<vscale x 4 x i32> poison, <vscale x 4 x i32> %1, <vscale x 4 x i32> %1, iXLen %vl) ret <vscale x 4 x i32> %2 } define <vscale x 4 x i32> @vnclipu_vi(<vscale x 4 x i64> %a, iXLen %vl) { -; NOVLOPT-LABEL: vnclipu_vi: -; NOVLOPT: # %bb.0: -; NOVLOPT-NEXT: csrwi vxrm, 0 -; NOVLOPT-NEXT: vsetvli a1, zero, e32, m2, ta, ma -; NOVLOPT-NEXT: vnclipu.wi v12, v8, 5 -; NOVLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; NOVLOPT-NEXT: vadd.vv v8, v12, v12 -; NOVLOPT-NEXT: ret -; -; VLOPT-LABEL: vnclipu_vi: -; VLOPT: # %bb.0: -; VLOPT-NEXT: csrwi vxrm, 0 -; VLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; VLOPT-NEXT: vnclipu.wi v12, v8, 5 -; VLOPT-NEXT: vadd.vv v8, v12, v12 -; VLOPT-NEXT: ret +; CHECK-LABEL: vnclipu_vi: +; CHECK: # %bb.0: +; CHECK-NEXT: csrwi vxrm, 0 +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; CHECK-NEXT: vnclipu.wi v12, v8, 5 +; CHECK-NEXT: vadd.vv v8, v12, v12 +; CHECK-NEXT: ret %1 = call <vscale x 4 x i32> @llvm.riscv.vnclipu(<vscale x 4 x i32> poison, <vscale x 4 x i64> %a, iXLen 5, iXLen 0, iXLen -1) %2 = call <vscale x 4 x i32> @llvm.riscv.vadd(<vscale x 4 x i32> poison, <vscale x 4 x i32> %1, <vscale x 4 x i32> %1, iXLen %vl) ret <vscale x 4 x i32> %2 } define <vscale x 4 x i32> @vnclip_vv(<vscale x 4 x i64> %a, <vscale x 4 x i32> %b, iXLen %vl) { -; NOVLOPT-LABEL: vnclip_vv: -; NOVLOPT: # %bb.0: -; NOVLOPT-NEXT: csrwi vxrm, 0 -; NOVLOPT-NEXT: vsetvli a1, zero, e32, m2, ta, ma -; NOVLOPT-NEXT: vnclip.wv v14, v8, v12 -; NOVLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; NOVLOPT-NEXT: vadd.vv v8, v14, v14 -; NOVLOPT-NEXT: ret -; -; VLOPT-LABEL: vnclip_vv: -; VLOPT: # %bb.0: -; VLOPT-NEXT: csrwi vxrm, 0 -; VLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; VLOPT-NEXT: vnclip.wv v14, v8, v12 -; VLOPT-NEXT: vadd.vv v8, v14, v14 -; VLOPT-NEXT: ret +; CHECK-LABEL: vnclip_vv: +; CHECK: # %bb.0: +; CHECK-NEXT: csrwi vxrm, 0 +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; CHECK-NEXT: vnclip.wv v14, v8, v12 +; CHECK-NEXT: vadd.vv v8, v14, v14 +; CHECK-NEXT: ret %1 = call <vscale x 4 x i32> @llvm.riscv.vnclip(<vscale x 4 x i32> poison, <vscale x 4 x i64> %a, <vscale x 4 x i32> %b, iXLen 0, iXLen -1) %2 = call <vscale x 4 x i32> @llvm.riscv.vadd(<vscale x 4 x i32> poison, <vscale x 4 x i32> %1, <vscale x 4 x i32> %1, iXLen %vl) ret <vscale x 4 x i32> %2 } define <vscale x 4 x i32> @vnclip_vx(<vscale x 4 x i64> %a, iXLen %b, iXLen %vl) { -; NOVLOPT-LABEL: vnclip_vx: -; NOVLOPT: # %bb.0: -; NOVLOPT-NEXT: csrwi vxrm, 0 -; NOVLOPT-NEXT: vsetvli a2, zero, e32, m2, ta, ma -; NOVLOPT-NEXT: vnclip.wx v12, v8, a0 -; NOVLOPT-NEXT: vsetvli zero, a1, e32, m2, ta, ma -; NOVLOPT-NEXT: vadd.vv v8, v12, v12 -; NOVLOPT-NEXT: ret -; -; VLOPT-LABEL: vnclip_vx: -; VLOPT: # %bb.0: -; VLOPT-NEXT: csrwi vxrm, 0 -; VLOPT-NEXT: vsetvli zero, a1, e32, m2, ta, ma -; VLOPT-NEXT: vnclip.wx v12, v8, a0 -; VLOPT-NEXT: vadd.vv v8, v12, v12 -; VLOPT-NEXT: ret +; CHECK-LABEL: vnclip_vx: +; CHECK: # %bb.0: +; CHECK-NEXT: csrwi vxrm, 0 +; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, ma +; CHECK-NEXT: vnclip.wx v12, v8, a0 +; CHECK-NEXT: vadd.vv v8, v12, v12 +; CHECK-NEXT: ret %1 = call <vscale x 4 x i32> @llvm.riscv.vnclip(<vscale x 4 x i32> poison, <vscale x 4 x i64> %a, iXLen %b, iXLen 0, iXLen -1) %2 = call <vscale x 4 x i32> @llvm.riscv.vadd(<vscale x 4 x i32> poison, <vscale x 4 x i32> %1, <vscale x 4 x i32> %1, iXLen %vl) ret <vscale x 4 x i32> %2 } define <vscale x 4 x i32> @vnclip_vi(<vscale x 4 x i64> %a, iXLen %vl) { -; NOVLOPT-LABEL: vnclip_vi: -; NOVLOPT: # %bb.0: -; NOVLOPT-NEXT: csrwi vxrm, 0 -; NOVLOPT-NEXT: vsetvli a1, zero, e32, m2, ta, ma -; NOVLOPT-NEXT: vnclip.wi v12, v8, 5 -; NOVLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; NOVLOPT-NEXT: vadd.vv v8, v12, v12 -; NOVLOPT-NEXT: ret -; -; VLOPT-LABEL: vnclip_vi: -; VLOPT: # %bb.0: -; VLOPT-NEXT: csrwi vxrm, 0 -; VLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; VLOPT-NEXT: vnclip.wi v12, v8, 5 -; VLOPT-NEXT: vadd.vv v8, v12, v12 -; VLOPT-NEXT: ret +; CHECK-LABEL: vnclip_vi: +; CHECK: # %bb.0: +; CHECK-NEXT: csrwi vxrm, 0 +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; CHECK-NEXT: vnclip.wi v12, v8, 5 +; CHECK-NEXT: vadd.vv v8, v12, v12 +; CHECK-NEXT: ret %1 = call <vscale x 4 x i32> @llvm.riscv.vnclip(<vscale x 4 x i32> poison, <vscale x 4 x i64> %a, iXLen 5, iXLen 0, iXLen -1) %2 = call <vscale x 4 x i32> @llvm.riscv.vadd(<vscale x 4 x i32> poison, <vscale x 4 x i32> %1, <vscale x 4 x i32> %1, iXLen %vl) ret <vscale x 4 x i32> %2 } define <vscale x 4 x i32> @vmv_v_i(<vscale x 4 x i32> %a, i32 %x, iXLen %vl) { -; NOVLOPT-LABEL: vmv_v_i: -; NOVLOPT: # %bb.0: -; NOVLOPT-NEXT: vsetvli a0, zero, e32, m2, ta, ma -; NOVLOPT-NEXT: vmv.v.i v10, 5 -; NOVLOPT-NEXT: vsetvli zero, a1, e32, m2, ta, ma -; NOVLOPT-NEXT: vadd.vv v8, v10, v8 -; NOVLOPT-NEXT: ret -; -; VLOPT-LABEL: vmv_v_i: -; VLOPT: # %bb.0: -; VLOPT-NEXT: vsetvli zero, a1, e32, m2, ta, ma -; VLOPT-NEXT: vmv.v.i v10, 5 -; VLOPT-NEXT: vadd.vv v8, v10, v8 -; VLOPT-NEXT: ret +; CHECK-LABEL: vmv_v_i: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, ma +; CHECK-NEXT: vmv.v.i v10, 5 +; CHECK-NEXT: vadd.vv v8, v10, v8 +; CHECK-NEXT: ret %1 = call <vscale x 4 x i32> @llvm.riscv.vmv.v.x.nxv4i32(<vscale x 4 x i32> poison, i32 5, iXLen -1) %2 = call <vscale x 4 x i32> @llvm.riscv.vadd.nxv4i32.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i32> %1, <vscale x 4 x i32> %a, iXLen %vl) ret <vscale x 4 x i32> %2 } define <vscale x 4 x i32> @vmv_v_x(<vscale x 4 x i32> %a, i32 %x, iXLen %vl) { -; NOVLOPT-LABEL: vmv_v_x: -; NOVLOPT: # %bb.0: -; NOVLOPT-NEXT: vsetvli a2, zero, e32, m2, ta, ma -; NOVLOPT-NEXT: vmv.v.x v10, a0 -; NOVLOPT-NEXT: vsetvli zero, a1, e32, m2, ta, ma -; NOVLOPT-NEXT: vadd.vv v8, v10, v8 -; NOVLOPT-NEXT: ret -; -; VLOPT-LABEL: vmv_v_x: -; VLOPT: # %bb.0: -; VLOPT-NEXT: vsetvli zero, a1, e32, m2, ta, ma -; VLOPT-NEXT: vmv.v.x v10, a0 -; VLOPT-NEXT: vadd.vv v8, v10, v8 -; VLOPT-NEXT: ret +; CHECK-LABEL: vmv_v_x: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, ma +; CHECK-NEXT: vmv.v.x v10, a0 +; CHECK-NEXT: vadd.vv v8, v10, v8 +; CHECK-NEXT: ret %1 = call <vscale x 4 x i32> @llvm.riscv.vmv.v.x.nxv4i32(<vscale x 4 x i32> poison, i32 %x, iXLen -1) %2 = call <vscale x 4 x i32> @llvm.riscv.vadd.nxv4i32.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i32> %1, <vscale x 4 x i32> %a, iXLen %vl) ret <vscale x 4 x i32> %2 @@ -3161,110 +1909,67 @@ define <vscale x 4 x i32> @vmv_v_x(<vscale x 4 x i32> %a, i32 %x, iXLen %vl) { ; The vmv.v.v is optimized away if we use a vadd as the user. define <vscale x 1 x i8> @vmv_v_v(<vscale x 1 x i8> %a, <vscale x 1 x i8> %b, <vscale x 1 x i8> %c, <vscale x 1 x i1> %m, iXLen %vl) { -; NOVLOPT-LABEL: vmv_v_v: -; NOVLOPT: # %bb.0: -; NOVLOPT-NEXT: vsetvli zero, a0, e8, mf8, tu, ma -; NOVLOPT-NEXT: vmv.v.v v8, v9 -; NOVLOPT-NEXT: vsetvli zero, zero, e8, mf8, ta, ma -; NOVLOPT-NEXT: vmerge.vvm v8, v8, v10, v0 -; NOVLOPT-NEXT: ret -; -; VLOPT-LABEL: vmv_v_v: -; VLOPT: # %bb.0: -; VLOPT-NEXT: vsetvli zero, a0, e8, mf8, tu, ma -; VLOPT-NEXT: vmv.v.v v8, v9 -; VLOPT-NEXT: vsetvli zero, zero, e8, mf8, ta, ma -; VLOPT-NEXT: vmerge.vvm v8, v8, v10, v0 -; VLOPT-NEXT: ret +; CHECK-LABEL: vmv_v_v: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, mf8, tu, ma +; CHECK-NEXT: vmv.v.v v8, v9 +; CHECK-NEXT: vsetvli zero, zero, e8, mf8, ta, ma +; CHECK-NEXT: vmerge.vvm v8, v8, v10, v0 +; CHECK-NEXT: ret %2 = call <vscale x 1 x i8> @llvm.riscv.vmv.v.v.nxv1i8.nxv1i8(<vscale x 1 x i8> %a, <vscale x 1 x i8> %b, iXLen -1) %3 = call <vscale x 1 x i8> @llvm.riscv.vmerge.nxv1i8.nxv1i8(<vscale x 1 x i8> undef, <vscale x 1 x i8> %2, <vscale x 1 x i8> %c, <vscale x 1 x i1> %m, iXLen %vl) ret <vscale x 1 x i8> %3 } define <vscale x 4 x i32> @vwsll_vi(<vscale x 4 x i16> %a, iXLen %vl) { -; NOVLOPT-LABEL: vwsll_vi: -; NOVLOPT: # %bb.0: -; NOVLOPT-NEXT: vsetvli a1, zero, e16, m1, ta, ma -; NOVLOPT-NEXT: vwsll.vi v10, v8, 1 -; NOVLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; NOVLOPT-NEXT: vadd.vv v8, v10, v10 -; NOVLOPT-NEXT: ret -; -; VLOPT-LABEL: vwsll_vi: -; VLOPT: # %bb.0: -; VLOPT-NEXT: vsetvli zero, a0, e16, m1, ta, ma -; VLOPT-NEXT: vwsll.vi v10, v8, 1 -; VLOPT-NEXT: vsetvli zero, zero, e32, m2, ta, ma -; VLOPT-NEXT: vadd.vv v8, v10, v10 -; VLOPT-NEXT: ret +; CHECK-LABEL: vwsll_vi: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma +; CHECK-NEXT: vwsll.vi v10, v8, 1 +; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; CHECK-NEXT: vadd.vv v8, v10, v10 +; CHECK-NEXT: ret %1 = call <vscale x 4 x i32> @llvm.riscv.vwsll.nxv4i32.nxv4i16(<vscale x 4 x i32> poison, <vscale x 4 x i16> %a, iXLen 1, iXLen -1) %2 = call <vscale x 4 x i32> @llvm.riscv.vadd.nxv4i32.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i32> %1, <vscale x 4 x i32> %1, iXLen %vl) ret <vscale x 4 x i32> %2 } define <vscale x 4 x i32> @vwsll_vx(<vscale x 4 x i16> %a, iXLen %b, iXLen %vl) { -; NOVLOPT-LABEL: vwsll_vx: -; NOVLOPT: # %bb.0: -; NOVLOPT-NEXT: vsetvli a2, zero, e16, m1, ta, ma -; NOVLOPT-NEXT: vwsll.vx v10, v8, a0 -; NOVLOPT-NEXT: vsetvli zero, a1, e32, m2, ta, ma -; NOVLOPT-NEXT: vadd.vv v8, v10, v10 -; NOVLOPT-NEXT: ret -; -; VLOPT-LABEL: vwsll_vx: -; VLOPT: # %bb.0: -; VLOPT-NEXT: vsetvli zero, a1, e16, m1, ta, ma -; VLOPT-NEXT: vwsll.vx v10, v8, a0 -; VLOPT-NEXT: vsetvli zero, zero, e32, m2, ta, ma -; VLOPT-NEXT: vadd.vv v8, v10, v10 -; VLOPT-NEXT: ret +; CHECK-LABEL: vwsll_vx: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma +; CHECK-NEXT: vwsll.vx v10, v8, a0 +; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; CHECK-NEXT: vadd.vv v8, v10, v10 +; CHECK-NEXT: ret %1 = call <vscale x 4 x i32> @llvm.riscv.vwsll.nxv4i32.nxv4i16(<vscale x 4 x i32> poison, <vscale x 4 x i16> %a, iXLen %b, iXLen -1) %2 = call <vscale x 4 x i32> @llvm.riscv.vadd.nxv4i32.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i32> %1, <vscale x 4 x i32> %1, iXLen %vl) ret <vscale x 4 x i32> %2 } define <vscale x 4 x i32> @vwsll_vv(<vscale x 4 x i16> %a, <vscale x 4 x i16> %b, iXLen %vl) { -; NOVLOPT-LABEL: vwsll_vv: -; NOVLOPT: # %bb.0: -; NOVLOPT-NEXT: vsetvli a1, zero, e16, m1, ta, ma -; NOVLOPT-NEXT: vwsll.vv v10, v8, v9 -; NOVLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; NOVLOPT-NEXT: vadd.vv v8, v10, v10 -; NOVLOPT-NEXT: ret -; -; VLOPT-LABEL: vwsll_vv: -; VLOPT: # %bb.0: -; VLOPT-NEXT: vsetvli zero, a0, e16, m1, ta, ma -; VLOPT-NEXT: vwsll.vv v10, v8, v9 -; VLOPT-NEXT: vsetvli zero, zero, e32, m2, ta, ma -; VLOPT-NEXT: vadd.vv v8, v10, v10 -; VLOPT-NEXT: ret +; CHECK-LABEL: vwsll_vv: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma +; CHECK-NEXT: vwsll.vv v10, v8, v9 +; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; CHECK-NEXT: vadd.vv v8, v10, v10 +; CHECK-NEXT: ret %1 = call <vscale x 4 x i32> @llvm.riscv.vwsll.nxv4i32.nxv4i16(<vscale x 4 x i32> poison, <vscale x 4 x i16> %a, <vscale x 4 x i16> %b, iXLen -1) %2 = call <vscale x 4 x i32> @llvm.riscv.vadd.nxv4i32.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i32> %1, <vscale x 4 x i32> %1, iXLen %vl) ret <vscale x 4 x i32> %2 } define <vscale x 1 x i32> @vmand_mm(<vscale x 1 x i1> %a, <vscale x 1 x i1> %b, <vscale x 1 x i32> %c, iXLen %vl) { -; NOVLOPT-LABEL: vmand_mm: -; NOVLOPT: # %bb.0: -; NOVLOPT-NEXT: vsetvli a1, zero, e8, mf8, ta, ma -; NOVLOPT-NEXT: vmand.mm v8, v0, v8 -; NOVLOPT-NEXT: vsetvli zero, a0, e8, mf8, ta, ma -; NOVLOPT-NEXT: vmand.mm v0, v0, v8 -; NOVLOPT-NEXT: vmv1r.v v8, v9 -; NOVLOPT-NEXT: vsetvli zero, zero, e32, mf2, tu, mu -; NOVLOPT-NEXT: vadd.vv v8, v9, v9, v0.t -; NOVLOPT-NEXT: ret -; -; VLOPT-LABEL: vmand_mm: -; VLOPT: # %bb.0: -; VLOPT-NEXT: vsetvli zero, a0, e8, mf8, ta, ma -; VLOPT-NEXT: vmand.mm v8, v0, v8 -; VLOPT-NEXT: vmand.mm v0, v0, v8 -; VLOPT-NEXT: vmv1r.v v8, v9 -; VLOPT-NEXT: vsetvli zero, zero, e32, mf2, tu, mu -; VLOPT-NEXT: vadd.vv v8, v9, v9, v0.t -; VLOPT-NEXT: ret +; CHECK-LABEL: vmand_mm: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, ma +; CHECK-NEXT: vmand.mm v8, v0, v8 +; CHECK-NEXT: vmand.mm v0, v0, v8 +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: vsetvli zero, zero, e32, mf2, tu, mu +; CHECK-NEXT: vadd.vv v8, v9, v9, v0.t +; CHECK-NEXT: ret %1 = call <vscale x 1 x i1> @llvm.riscv.vmand.nxv1i1(<vscale x 1 x i1> %a, <vscale x 1 x i1> %b, iXLen -1) %2 = call <vscale x 1 x i1> @llvm.riscv.vmand.nxv1i1(<vscale x 1 x i1> %a, <vscale x 1 x i1> %1, iXLen %vl) %3 = call <vscale x 1 x i32> @llvm.riscv.vadd.mask.nxv1i32.nxv1i32(<vscale x 1 x i32> %c, <vscale x 1 x i32> %c, <vscale x 1 x i32> %c, <vscale x 1 x i1> %2, iXLen %vl, iXLen 0) @@ -3272,26 +1977,15 @@ define <vscale x 1 x i32> @vmand_mm(<vscale x 1 x i1> %a, <vscale x 1 x i1> %b, } define <vscale x 1 x i32> @vmnand_mm(<vscale x 1 x i1> %a, <vscale x 1 x i1> %b, <vscale x 1 x i32> %c, iXLen %vl) { -; NOVLOPT-LABEL: vmnand_mm: -; NOVLOPT: # %bb.0: -; NOVLOPT-NEXT: vsetvli a1, zero, e8, mf8, ta, ma -; NOVLOPT-NEXT: vmnand.mm v8, v0, v8 -; NOVLOPT-NEXT: vsetvli zero, a0, e8, mf8, ta, ma -; NOVLOPT-NEXT: vmand.mm v0, v0, v8 -; NOVLOPT-NEXT: vmv1r.v v8, v9 -; NOVLOPT-NEXT: vsetvli zero, zero, e32, mf2, tu, mu -; NOVLOPT-NEXT: vadd.vv v8, v9, v9, v0.t -; NOVLOPT-NEXT: ret -; -; VLOPT-LABEL: vmnand_mm: -; VLOPT: # %bb.0: -; VLOPT-NEXT: vsetvli zero, a0, e8, mf8, ta, ma -; VLOPT-NEXT: vmnand.mm v8, v0, v8 -; VLOPT-NEXT: vmand.mm v0, v0, v8 -; VLOPT-NEXT: vmv1r.v v8, v9 -; VLOPT-NEXT: vsetvli zero, zero, e32, mf2, tu, mu -; VLOPT-NEXT: vadd.vv v8, v9, v9, v0.t -; VLOPT-NEXT: ret +; CHECK-LABEL: vmnand_mm: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, ma +; CHECK-NEXT: vmnand.mm v8, v0, v8 +; CHECK-NEXT: vmand.mm v0, v0, v8 +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: vsetvli zero, zero, e32, mf2, tu, mu +; CHECK-NEXT: vadd.vv v8, v9, v9, v0.t +; CHECK-NEXT: ret %1 = call <vscale x 1 x i1> @llvm.riscv.vmnand.nxv1i1(<vscale x 1 x i1> %a, <vscale x 1 x i1> %b, iXLen -1) %2 = call <vscale x 1 x i1> @llvm.riscv.vmand.nxv1i1(<vscale x 1 x i1> %a, <vscale x 1 x i1> %1, iXLen %vl) %3 = call <vscale x 1 x i32> @llvm.riscv.vadd.mask.nxv1i32.nxv1i32(<vscale x 1 x i32> %c, <vscale x 1 x i32> %c, <vscale x 1 x i32> %c, <vscale x 1 x i1> %2, iXLen %vl, iXLen 0) @@ -3299,26 +1993,15 @@ define <vscale x 1 x i32> @vmnand_mm(<vscale x 1 x i1> %a, <vscale x 1 x i1> %b, } define <vscale x 1 x i32> @vmandn_mm(<vscale x 1 x i1> %a, <vscale x 1 x i1> %b, <vscale x 1 x i32> %c, iXLen %vl) { -; NOVLOPT-LABEL: vmandn_mm: -; NOVLOPT: # %bb.0: -; NOVLOPT-NEXT: vsetvli a1, zero, e8, mf8, ta, ma -; NOVLOPT-NEXT: vmandn.mm v8, v0, v8 -; NOVLOPT-NEXT: vsetvli zero, a0, e8, mf8, ta, ma -; NOVLOPT-NEXT: vmand.mm v0, v0, v8 -; NOVLOPT-NEXT: vmv1r.v v8, v9 -; NOVLOPT-NEXT: vsetvli zero, zero, e32, mf2, tu, mu -; NOVLOPT-NEXT: vadd.vv v8, v9, v9, v0.t -; NOVLOPT-NEXT: ret -; -; VLOPT-LABEL: vmandn_mm: -; VLOPT: # %bb.0: -; VLOPT-NEXT: vsetvli zero, a0, e8, mf8, ta, ma -; VLOPT-NEXT: vmandn.mm v8, v0, v8 -; VLOPT-NEXT: vmand.mm v0, v0, v8 -; VLOPT-NEXT: vmv1r.v v8, v9 -; VLOPT-NEXT: vsetvli zero, zero, e32, mf2, tu, mu -; VLOPT-NEXT: vadd.vv v8, v9, v9, v0.t -; VLOPT-NEXT: ret +; CHECK-LABEL: vmandn_mm: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, ma +; CHECK-NEXT: vmandn.mm v8, v0, v8 +; CHECK-NEXT: vmand.mm v0, v0, v8 +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: vsetvli zero, zero, e32, mf2, tu, mu +; CHECK-NEXT: vadd.vv v8, v9, v9, v0.t +; CHECK-NEXT: ret %1 = call <vscale x 1 x i1> @llvm.riscv.vmandn.nxv1i1(<vscale x 1 x i1> %a, <vscale x 1 x i1> %b, iXLen -1) %2 = call <vscale x 1 x i1> @llvm.riscv.vmand.nxv1i1(<vscale x 1 x i1> %a, <vscale x 1 x i1> %1, iXLen %vl) %3 = call <vscale x 1 x i32> @llvm.riscv.vadd.mask.nxv1i32.nxv1i32(<vscale x 1 x i32> %c, <vscale x 1 x i32> %c, <vscale x 1 x i32> %c, <vscale x 1 x i1> %2, iXLen %vl, iXLen 0) @@ -3326,26 +2009,15 @@ define <vscale x 1 x i32> @vmandn_mm(<vscale x 1 x i1> %a, <vscale x 1 x i1> %b, } define <vscale x 1 x i32> @vmxor_mm(<vscale x 1 x i1> %a, <vscale x 1 x i1> %b, <vscale x 1 x i32> %c, iXLen %vl) { -; NOVLOPT-LABEL: vmxor_mm: -; NOVLOPT: # %bb.0: -; NOVLOPT-NEXT: vsetvli a1, zero, e8, mf8, ta, ma -; NOVLOPT-NEXT: vmxor.mm v8, v0, v8 -; NOVLOPT-NEXT: vsetvli zero, a0, e8, mf8, ta, ma -; NOVLOPT-NEXT: vmand.mm v0, v0, v8 -; NOVLOPT-NEXT: vmv1r.v v8, v9 -; NOVLOPT-NEXT: vsetvli zero, zero, e32, mf2, tu, mu -; NOVLOPT-NEXT: vadd.vv v8, v9, v9, v0.t -; NOVLOPT-NEXT: ret -; -; VLOPT-LABEL: vmxor_mm: -; VLOPT: # %bb.0: -; VLOPT-NEXT: vsetvli zero, a0, e8, mf8, ta, ma -; VLOPT-NEXT: vmxor.mm v8, v0, v8 -; VLOPT-NEXT: vmand.mm v0, v0, v8 -; VLOPT-NEXT: vmv1r.v v8, v9 -; VLOPT-NEXT: vsetvli zero, zero, e32, mf2, tu, mu -; VLOPT-NEXT: vadd.vv v8, v9, v9, v0.t -; VLOPT-NEXT: ret +; CHECK-LABEL: vmxor_mm: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, ma +; CHECK-NEXT: vmxor.mm v8, v0, v8 +; CHECK-NEXT: vmand.mm v0, v0, v8 +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: vsetvli zero, zero, e32, mf2, tu, mu +; CHECK-NEXT: vadd.vv v8, v9, v9, v0.t +; CHECK-NEXT: ret %1 = call <vscale x 1 x i1> @llvm.riscv.vmxor.nxv1i1(<vscale x 1 x i1> %a, <vscale x 1 x i1> %b, iXLen -1) %2 = call <vscale x 1 x i1> @llvm.riscv.vmand.nxv1i1(<vscale x 1 x i1> %a, <vscale x 1 x i1> %1, iXLen %vl) %3 = call <vscale x 1 x i32> @llvm.riscv.vadd.mask.nxv1i32.nxv1i32(<vscale x 1 x i32> %c, <vscale x 1 x i32> %c, <vscale x 1 x i32> %c, <vscale x 1 x i1> %2, iXLen %vl, iXLen 0) @@ -3353,26 +2025,15 @@ define <vscale x 1 x i32> @vmxor_mm(<vscale x 1 x i1> %a, <vscale x 1 x i1> %b, } define <vscale x 1 x i32> @vmor_mm(<vscale x 1 x i1> %a, <vscale x 1 x i1> %b, <vscale x 1 x i32> %c, iXLen %vl) { -; NOVLOPT-LABEL: vmor_mm: -; NOVLOPT: # %bb.0: -; NOVLOPT-NEXT: vsetvli a1, zero, e8, mf8, ta, ma -; NOVLOPT-NEXT: vmor.mm v8, v0, v8 -; NOVLOPT-NEXT: vsetvli zero, a0, e8, mf8, ta, ma -; NOVLOPT-NEXT: vmand.mm v0, v0, v8 -; NOVLOPT-NEXT: vmv1r.v v8, v9 -; NOVLOPT-NEXT: vsetvli zero, zero, e32, mf2, tu, mu -; NOVLOPT-NEXT: vadd.vv v8, v9, v9, v0.t -; NOVLOPT-NEXT: ret -; -; VLOPT-LABEL: vmor_mm: -; VLOPT: # %bb.0: -; VLOPT-NEXT: vsetvli zero, a0, e8, mf8, ta, ma -; VLOPT-NEXT: vmor.mm v8, v0, v8 -; VLOPT-NEXT: vmand.mm v0, v0, v8 -; VLOPT-NEXT: vmv1r.v v8, v9 -; VLOPT-NEXT: vsetvli zero, zero, e32, mf2, tu, mu -; VLOPT-NEXT: vadd.vv v8, v9, v9, v0.t -; VLOPT-NEXT: ret +; CHECK-LABEL: vmor_mm: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, ma +; CHECK-NEXT: vmor.mm v8, v0, v8 +; CHECK-NEXT: vmand.mm v0, v0, v8 +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: vsetvli zero, zero, e32, mf2, tu, mu +; CHECK-NEXT: vadd.vv v8, v9, v9, v0.t +; CHECK-NEXT: ret %1 = call <vscale x 1 x i1> @llvm.riscv.vmor.nxv1i1(<vscale x 1 x i1> %a, <vscale x 1 x i1> %b, iXLen -1) %2 = call <vscale x 1 x i1> @llvm.riscv.vmand.nxv1i1(<vscale x 1 x i1> %a, <vscale x 1 x i1> %1, iXLen %vl) %3 = call <vscale x 1 x i32> @llvm.riscv.vadd.mask.nxv1i32.nxv1i32(<vscale x 1 x i32> %c, <vscale x 1 x i32> %c, <vscale x 1 x i32> %c, <vscale x 1 x i1> %2, iXLen %vl, iXLen 0) @@ -3381,26 +2042,15 @@ define <vscale x 1 x i32> @vmor_mm(<vscale x 1 x i1> %a, <vscale x 1 x i1> %b, < define <vscale x 1 x i32> @vmnor_mm(<vscale x 1 x i1> %a, <vscale x 1 x i1> %b, <vscale x 1 x i32> %c, iXLen %vl) { -; NOVLOPT-LABEL: vmnor_mm: -; NOVLOPT: # %bb.0: -; NOVLOPT-NEXT: vsetvli a1, zero, e8, mf8, ta, ma -; NOVLOPT-NEXT: vmnor.mm v8, v0, v8 -; NOVLOPT-NEXT: vsetvli zero, a0, e8, mf8, ta, ma -; NOVLOPT-NEXT: vmand.mm v0, v0, v8 -; NOVLOPT-NEXT: vmv1r.v v8, v9 -; NOVLOPT-NEXT: vsetvli zero, zero, e32, mf2, tu, mu -; NOVLOPT-NEXT: vadd.vv v8, v9, v9, v0.t -; NOVLOPT-NEXT: ret -; -; VLOPT-LABEL: vmnor_mm: -; VLOPT: # %bb.0: -; VLOPT-NEXT: vsetvli zero, a0, e8, mf8, ta, ma -; VLOPT-NEXT: vmnor.mm v8, v0, v8 -; VLOPT-NEXT: vmand.mm v0, v0, v8 -; VLOPT-NEXT: vmv1r.v v8, v9 -; VLOPT-NEXT: vsetvli zero, zero, e32, mf2, tu, mu -; VLOPT-NEXT: vadd.vv v8, v9, v9, v0.t -; VLOPT-NEXT: ret +; CHECK-LABEL: vmnor_mm: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, ma +; CHECK-NEXT: vmnor.mm v8, v0, v8 +; CHECK-NEXT: vmand.mm v0, v0, v8 +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: vsetvli zero, zero, e32, mf2, tu, mu +; CHECK-NEXT: vadd.vv v8, v9, v9, v0.t +; CHECK-NEXT: ret %1 = call <vscale x 1 x i1> @llvm.riscv.vmnor.nxv1i1(<vscale x 1 x i1> %a, <vscale x 1 x i1> %b, iXLen -1) %2 = call <vscale x 1 x i1> @llvm.riscv.vmand.nxv1i1(<vscale x 1 x i1> %a, <vscale x 1 x i1> %1, iXLen %vl) %3 = call <vscale x 1 x i32> @llvm.riscv.vadd.mask.nxv1i32.nxv1i32(<vscale x 1 x i32> %c, <vscale x 1 x i32> %c, <vscale x 1 x i32> %c, <vscale x 1 x i1> %2, iXLen %vl, iXLen 0) @@ -3408,26 +2058,15 @@ define <vscale x 1 x i32> @vmnor_mm(<vscale x 1 x i1> %a, <vscale x 1 x i1> %b, } define <vscale x 1 x i32> @vmorn_mm(<vscale x 1 x i1> %a, <vscale x 1 x i1> %b, <vscale x 1 x i32> %c, iXLen %vl) { -; NOVLOPT-LABEL: vmorn_mm: -; NOVLOPT: # %bb.0: -; NOVLOPT-NEXT: vsetvli a1, zero, e8, mf8, ta, ma -; NOVLOPT-NEXT: vmorn.mm v8, v0, v8 -; NOVLOPT-NEXT: vsetvli zero, a0, e8, mf8, ta, ma -; NOVLOPT-NEXT: vmand.mm v0, v0, v8 -; NOVLOPT-NEXT: vmv1r.v v8, v9 -; NOVLOPT-NEXT: vsetvli zero, zero, e32, mf2, tu, mu -; NOVLOPT-NEXT: vadd.vv v8, v9, v9, v0.t -; NOVLOPT-NEXT: ret -; -; VLOPT-LABEL: vmorn_mm: -; VLOPT: # %bb.0: -; VLOPT-NEXT: vsetvli zero, a0, e8, mf8, ta, ma -; VLOPT-NEXT: vmorn.mm v8, v0, v8 -; VLOPT-NEXT: vmand.mm v0, v0, v8 -; VLOPT-NEXT: vmv1r.v v8, v9 -; VLOPT-NEXT: vsetvli zero, zero, e32, mf2, tu, mu -; VLOPT-NEXT: vadd.vv v8, v9, v9, v0.t -; VLOPT-NEXT: ret +; CHECK-LABEL: vmorn_mm: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, ma +; CHECK-NEXT: vmorn.mm v8, v0, v8 +; CHECK-NEXT: vmand.mm v0, v0, v8 +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: vsetvli zero, zero, e32, mf2, tu, mu +; CHECK-NEXT: vadd.vv v8, v9, v9, v0.t +; CHECK-NEXT: ret %1 = call <vscale x 1 x i1> @llvm.riscv.vmorn.nxv1i1(<vscale x 1 x i1> %a, <vscale x 1 x i1> %b, iXLen -1) %2 = call <vscale x 1 x i1> @llvm.riscv.vmand.nxv1i1(<vscale x 1 x i1> %a, <vscale x 1 x i1> %1, iXLen %vl) %3 = call <vscale x 1 x i32> @llvm.riscv.vadd.mask.nxv1i32.nxv1i32(<vscale x 1 x i32> %c, <vscale x 1 x i32> %c, <vscale x 1 x i32> %c, <vscale x 1 x i1> %2, iXLen %vl, iXLen 0) @@ -3435,26 +2074,15 @@ define <vscale x 1 x i32> @vmorn_mm(<vscale x 1 x i1> %a, <vscale x 1 x i1> %b, } define <vscale x 1 x i32> @vmxnor_mm(<vscale x 1 x i1> %a, <vscale x 1 x i1> %b, <vscale x 1 x i32> %c, iXLen %vl) { -; NOVLOPT-LABEL: vmxnor_mm: -; NOVLOPT: # %bb.0: -; NOVLOPT-NEXT: vsetvli a1, zero, e8, mf8, ta, ma -; NOVLOPT-NEXT: vmxnor.mm v8, v0, v8 -; NOVLOPT-NEXT: vsetvli zero, a0, e8, mf8, ta, ma -; NOVLOPT-NEXT: vmand.mm v0, v0, v8 -; NOVLOPT-NEXT: vmv1r.v v8, v9 -; NOVLOPT-NEXT: vsetvli zero, zero, e32, mf2, tu, mu -; NOVLOPT-NEXT: vadd.vv v8, v9, v9, v0.t -; NOVLOPT-NEXT: ret -; -; VLOPT-LABEL: vmxnor_mm: -; VLOPT: # %bb.0: -; VLOPT-NEXT: vsetvli zero, a0, e8, mf8, ta, ma -; VLOPT-NEXT: vmxnor.mm v8, v0, v8 -; VLOPT-NEXT: vmand.mm v0, v0, v8 -; VLOPT-NEXT: vmv1r.v v8, v9 -; VLOPT-NEXT: vsetvli zero, zero, e32, mf2, tu, mu -; VLOPT-NEXT: vadd.vv v8, v9, v9, v0.t -; VLOPT-NEXT: ret +; CHECK-LABEL: vmxnor_mm: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, ma +; CHECK-NEXT: vmxnor.mm v8, v0, v8 +; CHECK-NEXT: vmand.mm v0, v0, v8 +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: vsetvli zero, zero, e32, mf2, tu, mu +; CHECK-NEXT: vadd.vv v8, v9, v9, v0.t +; CHECK-NEXT: ret %1 = call <vscale x 1 x i1> @llvm.riscv.vmxnor.nxv1i1(<vscale x 1 x i1> %a, <vscale x 1 x i1> %b, iXLen -1) %2 = call <vscale x 1 x i1> @llvm.riscv.vmand.nxv1i1(<vscale x 1 x i1> %a, <vscale x 1 x i1> %1, iXLen %vl) %3 = call <vscale x 1 x i32> @llvm.riscv.vadd.mask.nxv1i32.nxv1i32(<vscale x 1 x i32> %c, <vscale x 1 x i32> %c, <vscale x 1 x i32> %c, <vscale x 1 x i1> %2, iXLen %vl, iXLen 0) @@ -3462,24 +2090,14 @@ define <vscale x 1 x i32> @vmxnor_mm(<vscale x 1 x i1> %a, <vscale x 1 x i1> %b, } define <vscale x 1 x i32> @vmsbf_m(<vscale x 1 x i1> %a, <vscale x 1 x i32> %c, iXLen %vl) { -; NOVLOPT-LABEL: vmsbf_m: -; NOVLOPT: # %bb.0: -; NOVLOPT-NEXT: vsetvli a1, zero, e8, mf8, ta, ma -; NOVLOPT-NEXT: vmsbf.m v9, v0 -; NOVLOPT-NEXT: vsetvli zero, a0, e8, mf8, ta, ma -; NOVLOPT-NEXT: vmand.mm v0, v0, v9 -; NOVLOPT-NEXT: vsetvli zero, zero, e32, mf2, tu, mu -; NOVLOPT-NEXT: vadd.vv v8, v8, v8, v0.t -; NOVLOPT-NEXT: ret -; -; VLOPT-LABEL: vmsbf_m: -; VLOPT: # %bb.0: -; VLOPT-NEXT: vsetvli zero, a0, e8, mf8, ta, ma -; VLOPT-NEXT: vmsbf.m v9, v0 -; VLOPT-NEXT: vmand.mm v0, v0, v9 -; VLOPT-NEXT: vsetvli zero, zero, e32, mf2, tu, mu -; VLOPT-NEXT: vadd.vv v8, v8, v8, v0.t -; VLOPT-NEXT: ret +; CHECK-LABEL: vmsbf_m: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, ma +; CHECK-NEXT: vmsbf.m v9, v0 +; CHECK-NEXT: vmand.mm v0, v0, v9 +; CHECK-NEXT: vsetvli zero, zero, e32, mf2, tu, mu +; CHECK-NEXT: vadd.vv v8, v8, v8, v0.t +; CHECK-NEXT: ret %1 = call <vscale x 1 x i1> @llvm.riscv.vmsbf.nxv1i1(<vscale x 1 x i1> %a, iXLen -1) %2 = call <vscale x 1 x i1> @llvm.riscv.vmand.nxv1i1(<vscale x 1 x i1> %a, <vscale x 1 x i1> %1, iXLen %vl) %3 = call <vscale x 1 x i32> @llvm.riscv.vadd.mask.nxv1i32.nxv1i32(<vscale x 1 x i32> %c, <vscale x 1 x i32> %c, <vscale x 1 x i32> %c, <vscale x 1 x i1> %2, iXLen %vl, iXLen 0) @@ -3487,24 +2105,14 @@ define <vscale x 1 x i32> @vmsbf_m(<vscale x 1 x i1> %a, <vscale x 1 x i32> %c, } define <vscale x 1 x i32> @vmsif_m(<vscale x 1 x i1> %a, <vscale x 1 x i32> %c, iXLen %vl) { -; NOVLOPT-LABEL: vmsif_m: -; NOVLOPT: # %bb.0: -; NOVLOPT-NEXT: vsetvli a1, zero, e8, mf8, ta, ma -; NOVLOPT-NEXT: vmsif.m v9, v0 -; NOVLOPT-NEXT: vsetvli zero, a0, e8, mf8, ta, ma -; NOVLOPT-NEXT: vmand.mm v0, v0, v9 -; NOVLOPT-NEXT: vsetvli zero, zero, e32, mf2, tu, mu -; NOVLOPT-NEXT: vadd.vv v8, v8, v8, v0.t -; NOVLOPT-NEXT: ret -; -; VLOPT-LABEL: vmsif_m: -; VLOPT: # %bb.0: -; VLOPT-NEXT: vsetvli zero, a0, e8, mf8, ta, ma -; VLOPT-NEXT: vmsif.m v9, v0 -; VLOPT-NEXT: vmand.mm v0, v0, v9 -; VLOPT-NEXT: vsetvli zero, zero, e32, mf2, tu, mu -; VLOPT-NEXT: vadd.vv v8, v8, v8, v0.t -; VLOPT-NEXT: ret +; CHECK-LABEL: vmsif_m: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, ma +; CHECK-NEXT: vmsif.m v9, v0 +; CHECK-NEXT: vmand.mm v0, v0, v9 +; CHECK-NEXT: vsetvli zero, zero, e32, mf2, tu, mu +; CHECK-NEXT: vadd.vv v8, v8, v8, v0.t +; CHECK-NEXT: ret %1 = call <vscale x 1 x i1> @llvm.riscv.vmsif.nxv1i1(<vscale x 1 x i1> %a, iXLen -1) %2 = call <vscale x 1 x i1> @llvm.riscv.vmand.nxv1i1(<vscale x 1 x i1> %a, <vscale x 1 x i1> %1, iXLen %vl) %3 = call <vscale x 1 x i32> @llvm.riscv.vadd.mask.nxv1i32.nxv1i32(<vscale x 1 x i32> %c, <vscale x 1 x i32> %c, <vscale x 1 x i32> %c, <vscale x 1 x i1> %2, iXLen %vl, iXLen 0) @@ -3512,24 +2120,14 @@ define <vscale x 1 x i32> @vmsif_m(<vscale x 1 x i1> %a, <vscale x 1 x i32> %c, } define <vscale x 1 x i32> @vmsof_m(<vscale x 1 x i1> %a, <vscale x 1 x i32> %c, iXLen %vl) { -; NOVLOPT-LABEL: vmsof_m: -; NOVLOPT: # %bb.0: -; NOVLOPT-NEXT: vsetvli a1, zero, e8, mf8, ta, ma -; NOVLOPT-NEXT: vmsof.m v9, v0 -; NOVLOPT-NEXT: vsetvli zero, a0, e8, mf8, ta, ma -; NOVLOPT-NEXT: vmand.mm v0, v0, v9 -; NOVLOPT-NEXT: vsetvli zero, zero, e32, mf2, tu, mu -; NOVLOPT-NEXT: vadd.vv v8, v8, v8, v0.t -; NOVLOPT-NEXT: ret -; -; VLOPT-LABEL: vmsof_m: -; VLOPT: # %bb.0: -; VLOPT-NEXT: vsetvli zero, a0, e8, mf8, ta, ma -; VLOPT-NEXT: vmsof.m v9, v0 -; VLOPT-NEXT: vmand.mm v0, v0, v9 -; VLOPT-NEXT: vsetvli zero, zero, e32, mf2, tu, mu -; VLOPT-NEXT: vadd.vv v8, v8, v8, v0.t -; VLOPT-NEXT: ret +; CHECK-LABEL: vmsof_m: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, ma +; CHECK-NEXT: vmsof.m v9, v0 +; CHECK-NEXT: vmand.mm v0, v0, v9 +; CHECK-NEXT: vsetvli zero, zero, e32, mf2, tu, mu +; CHECK-NEXT: vadd.vv v8, v8, v8, v0.t +; CHECK-NEXT: ret %1 = call <vscale x 1 x i1> @llvm.riscv.vmsof.nxv1i1(<vscale x 1 x i1> %a, iXLen -1) %2 = call <vscale x 1 x i1> @llvm.riscv.vmand.nxv1i1(<vscale x 1 x i1> %a, <vscale x 1 x i1> %1, iXLen %vl) %3 = call <vscale x 1 x i32> @llvm.riscv.vadd.mask.nxv1i32.nxv1i32(<vscale x 1 x i32> %c, <vscale x 1 x i32> %c, <vscale x 1 x i32> %c, <vscale x 1 x i1> %2, iXLen %vl, iXLen 0) @@ -3537,160 +2135,96 @@ define <vscale x 1 x i32> @vmsof_m(<vscale x 1 x i1> %a, <vscale x 1 x i32> %c, } define <vscale x 4 x i32> @viota_m(<vscale x 4 x i1> %a, <vscale x 4 x i32> %c, iXLen %vl) { -; NOVLOPT-LABEL: viota_m: -; NOVLOPT: # %bb.0: -; NOVLOPT-NEXT: vsetvli a1, zero, e32, m2, ta, ma -; NOVLOPT-NEXT: viota.m v10, v0 -; NOVLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; NOVLOPT-NEXT: vadd.vv v8, v10, v8 -; NOVLOPT-NEXT: ret -; -; VLOPT-LABEL: viota_m: -; VLOPT: # %bb.0: -; VLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; VLOPT-NEXT: viota.m v10, v0 -; VLOPT-NEXT: vadd.vv v8, v10, v8 -; VLOPT-NEXT: ret +; CHECK-LABEL: viota_m: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; CHECK-NEXT: viota.m v10, v0 +; CHECK-NEXT: vadd.vv v8, v10, v8 +; CHECK-NEXT: ret %1 = call <vscale x 4 x i32> @llvm.riscv.viota.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i1> %a, iXLen -1) %2 = call <vscale x 4 x i32> @llvm.riscv.vadd.nxv4i32.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i32> %1, <vscale x 4 x i32> %c, iXLen %vl) ret <vscale x 4 x i32> %2 } define <vscale x 4 x i32> @vid.v(<vscale x 4 x i32> %c, iXLen %vl) { -; NOVLOPT-LABEL: vid.v: -; NOVLOPT: # %bb.0: -; NOVLOPT-NEXT: vsetvli a1, zero, e32, m2, ta, ma -; NOVLOPT-NEXT: vid.v v10 -; NOVLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; NOVLOPT-NEXT: vadd.vv v8, v10, v8 -; NOVLOPT-NEXT: ret -; -; VLOPT-LABEL: vid.v: -; VLOPT: # %bb.0: -; VLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; VLOPT-NEXT: vid.v v10 -; VLOPT-NEXT: vadd.vv v8, v10, v8 -; VLOPT-NEXT: ret +; CHECK-LABEL: vid.v: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; CHECK-NEXT: vid.v v10 +; CHECK-NEXT: vadd.vv v8, v10, v8 +; CHECK-NEXT: ret %1 = call <vscale x 4 x i32> @llvm.riscv.vid.nxv4i32(<vscale x 4 x i32> poison, iXLen -1) %2 = call <vscale x 4 x i32> @llvm.riscv.vadd.nxv4i32.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i32> %1, <vscale x 4 x i32> %c, iXLen %vl) ret <vscale x 4 x i32> %2 } define <vscale x 4 x i32> @vslideup_vx(<vscale x 4 x i32> %a, iXLen %b, iXLen %vl) { -; NOVLOPT-LABEL: vslideup_vx: -; NOVLOPT: # %bb.0: -; NOVLOPT-NEXT: vsetvli a2, zero, e32, m2, ta, ma -; NOVLOPT-NEXT: vslideup.vx v10, v8, a0 -; NOVLOPT-NEXT: vsetvli zero, a1, e32, m2, ta, ma -; NOVLOPT-NEXT: vadd.vv v8, v10, v10 -; NOVLOPT-NEXT: ret -; -; VLOPT-LABEL: vslideup_vx: -; VLOPT: # %bb.0: -; VLOPT-NEXT: vsetvli zero, a1, e32, m2, ta, ma -; VLOPT-NEXT: vslideup.vx v10, v8, a0 -; VLOPT-NEXT: vadd.vv v8, v10, v10 -; VLOPT-NEXT: ret +; CHECK-LABEL: vslideup_vx: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, ma +; CHECK-NEXT: vslideup.vx v10, v8, a0 +; CHECK-NEXT: vadd.vv v8, v10, v10 +; CHECK-NEXT: ret %1 = call <vscale x 4 x i32> @llvm.riscv.vslideup(<vscale x 4 x i32> poison, <vscale x 4 x i32> %a, iXLen %b, iXLen -1, iXLen 3) %2 = call <vscale x 4 x i32> @llvm.riscv.vadd(<vscale x 4 x i32> poison, <vscale x 4 x i32> %1, <vscale x 4 x i32> %1, iXLen %vl) ret <vscale x 4 x i32> %2 } define <vscale x 4 x i32> @vslideup_vi(<vscale x 4 x i32> %a, iXLen %vl) { -; NOVLOPT-LABEL: vslideup_vi: -; NOVLOPT: # %bb.0: -; NOVLOPT-NEXT: vsetvli a1, zero, e32, m2, ta, ma -; NOVLOPT-NEXT: vslideup.vi v10, v8, 2 -; NOVLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; NOVLOPT-NEXT: vadd.vv v8, v10, v10 -; NOVLOPT-NEXT: ret -; -; VLOPT-LABEL: vslideup_vi: -; VLOPT: # %bb.0: -; VLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; VLOPT-NEXT: vslideup.vi v10, v8, 2 -; VLOPT-NEXT: vadd.vv v8, v10, v10 -; VLOPT-NEXT: ret +; CHECK-LABEL: vslideup_vi: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; CHECK-NEXT: vslideup.vi v10, v8, 2 +; CHECK-NEXT: vadd.vv v8, v10, v10 +; CHECK-NEXT: ret %1 = call <vscale x 4 x i32> @llvm.riscv.vslideup(<vscale x 4 x i32> poison, <vscale x 4 x i32> %a, iXLen 2, iXLen -1, iXLen 3) %2 = call <vscale x 4 x i32> @llvm.riscv.vadd(<vscale x 4 x i32> poison, <vscale x 4 x i32> %1, <vscale x 4 x i32> %1, iXLen %vl) ret <vscale x 4 x i32> %2 } define <vscale x 4 x i32> @vslidedown_vx(<vscale x 4 x i32> %a, iXLen %b, iXLen %vl) { -; NOVLOPT-LABEL: vslidedown_vx: -; NOVLOPT: # %bb.0: -; NOVLOPT-NEXT: vsetvli a2, zero, e32, m2, ta, ma -; NOVLOPT-NEXT: vslidedown.vx v8, v8, a0 -; NOVLOPT-NEXT: vsetvli zero, a1, e32, m2, ta, ma -; NOVLOPT-NEXT: vadd.vv v8, v8, v8 -; NOVLOPT-NEXT: ret -; -; VLOPT-LABEL: vslidedown_vx: -; VLOPT: # %bb.0: -; VLOPT-NEXT: vsetvli zero, a1, e32, m2, ta, ma -; VLOPT-NEXT: vslidedown.vx v8, v8, a0 -; VLOPT-NEXT: vadd.vv v8, v8, v8 -; VLOPT-NEXT: ret +; CHECK-LABEL: vslidedown_vx: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, ma +; CHECK-NEXT: vslidedown.vx v8, v8, a0 +; CHECK-NEXT: vadd.vv v8, v8, v8 +; CHECK-NEXT: ret %1 = call <vscale x 4 x i32> @llvm.riscv.vslidedown(<vscale x 4 x i32> poison, <vscale x 4 x i32> %a, iXLen %b, iXLen -1, iXLen 3) %2 = call <vscale x 4 x i32> @llvm.riscv.vadd(<vscale x 4 x i32> poison, <vscale x 4 x i32> %1, <vscale x 4 x i32> %1, iXLen %vl) ret <vscale x 4 x i32> %2 } define <vscale x 4 x i32> @vslidedown_vi(<vscale x 4 x i32> %a, iXLen %vl) { -; NOVLOPT-LABEL: vslidedown_vi: -; NOVLOPT: # %bb.0: -; NOVLOPT-NEXT: vsetvli a1, zero, e32, m2, ta, ma -; NOVLOPT-NEXT: vslidedown.vi v8, v8, 2 -; NOVLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; NOVLOPT-NEXT: vadd.vv v8, v8, v8 -; NOVLOPT-NEXT: ret -; -; VLOPT-LABEL: vslidedown_vi: -; VLOPT: # %bb.0: -; VLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; VLOPT-NEXT: vslidedown.vi v8, v8, 2 -; VLOPT-NEXT: vadd.vv v8, v8, v8 -; VLOPT-NEXT: ret +; CHECK-LABEL: vslidedown_vi: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; CHECK-NEXT: vslidedown.vi v8, v8, 2 +; CHECK-NEXT: vadd.vv v8, v8, v8 +; CHECK-NEXT: ret %1 = call <vscale x 4 x i32> @llvm.riscv.vslidedown(<vscale x 4 x i32> poison, <vscale x 4 x i32> %a, iXLen 2, iXLen -1, iXLen 3) %2 = call <vscale x 4 x i32> @llvm.riscv.vadd(<vscale x 4 x i32> poison, <vscale x 4 x i32> %1, <vscale x 4 x i32> %1, iXLen %vl) ret <vscale x 4 x i32> %2 } define <vscale x 4 x i32> @vslide1up_vx(<vscale x 4 x i32> %a, iXLen %b, iXLen %vl) { -; NOVLOPT-LABEL: vslide1up_vx: -; NOVLOPT: # %bb.0: -; NOVLOPT-NEXT: vsetvli a2, zero, e32, m2, ta, ma -; NOVLOPT-NEXT: vslide1up.vx v10, v8, a0 -; NOVLOPT-NEXT: vsetvli zero, a1, e32, m2, ta, ma -; NOVLOPT-NEXT: vadd.vv v8, v10, v10 -; NOVLOPT-NEXT: ret -; -; VLOPT-LABEL: vslide1up_vx: -; VLOPT: # %bb.0: -; VLOPT-NEXT: vsetvli zero, a1, e32, m2, ta, ma -; VLOPT-NEXT: vslide1up.vx v10, v8, a0 -; VLOPT-NEXT: vadd.vv v8, v10, v10 -; VLOPT-NEXT: ret +; CHECK-LABEL: vslide1up_vx: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, ma +; CHECK-NEXT: vslide1up.vx v10, v8, a0 +; CHECK-NEXT: vadd.vv v8, v10, v10 +; CHECK-NEXT: ret %1 = call <vscale x 4 x i32> @llvm.riscv.vslide1up(<vscale x 4 x i32> poison, <vscale x 4 x i32> %a, iXLen %b, iXLen -1) %2 = call <vscale x 4 x i32> @llvm.riscv.vadd(<vscale x 4 x i32> poison, <vscale x 4 x i32> %1, <vscale x 4 x i32> %1, iXLen %vl) ret <vscale x 4 x i32> %2 } define <vscale x 4 x float> @vfslide1up_vf(<vscale x 4 x float> %a, float %b, iXLen %vl) { -; NOVLOPT-LABEL: vfslide1up_vf: -; NOVLOPT: # %bb.0: -; NOVLOPT-NEXT: vsetvli a1, zero, e32, m2, ta, ma -; NOVLOPT-NEXT: vfslide1up.vf v10, v8, fa0 -; NOVLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; NOVLOPT-NEXT: vfadd.vv v8, v10, v10 -; NOVLOPT-NEXT: ret -; -; VLOPT-LABEL: vfslide1up_vf: -; VLOPT: # %bb.0: -; VLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; VLOPT-NEXT: vfslide1up.vf v10, v8, fa0 -; VLOPT-NEXT: vfadd.vv v8, v10, v10 -; VLOPT-NEXT: ret +; CHECK-LABEL: vfslide1up_vf: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; CHECK-NEXT: vfslide1up.vf v10, v8, fa0 +; CHECK-NEXT: vfadd.vv v8, v10, v10 +; CHECK-NEXT: ret %1 = call <vscale x 4 x float> @llvm.riscv.vfslide1up(<vscale x 4 x float> poison, <vscale x 4 x float> %a, float %b, iXLen -1) %2 = call <vscale x 4 x float> @llvm.riscv.vfadd(<vscale x 4 x float> poison, <vscale x 4 x float> %1, <vscale x 4 x float> %1, iXLen 7, iXLen %vl) ret <vscale x 4 x float> %2 @@ -3699,21 +2233,13 @@ define <vscale x 4 x float> @vfslide1up_vf(<vscale x 4 x float> %a, float %b, iX ; Negative test – not safe to reduce vl define <vscale x 4 x i32> @vslide1down_vx(<vscale x 4 x i32> %a, iXLen %b, iXLen %vl) { -; NOVLOPT-LABEL: vslide1down_vx: -; NOVLOPT: # %bb.0: -; NOVLOPT-NEXT: vsetvli a2, zero, e32, m2, ta, ma -; NOVLOPT-NEXT: vslide1down.vx v8, v8, a0 -; NOVLOPT-NEXT: vsetvli zero, a1, e32, m2, ta, ma -; NOVLOPT-NEXT: vadd.vv v8, v8, v8 -; NOVLOPT-NEXT: ret -; -; VLOPT-LABEL: vslide1down_vx: -; VLOPT: # %bb.0: -; VLOPT-NEXT: vsetvli a2, zero, e32, m2, ta, ma -; VLOPT-NEXT: vslide1down.vx v8, v8, a0 -; VLOPT-NEXT: vsetvli zero, a1, e32, m2, ta, ma -; VLOPT-NEXT: vadd.vv v8, v8, v8 -; VLOPT-NEXT: ret +; CHECK-LABEL: vslide1down_vx: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a2, zero, e32, m2, ta, ma +; CHECK-NEXT: vslide1down.vx v8, v8, a0 +; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, ma +; CHECK-NEXT: vadd.vv v8, v8, v8 +; CHECK-NEXT: ret %1 = call <vscale x 4 x i32> @llvm.riscv.vslide1down(<vscale x 4 x i32> poison, <vscale x 4 x i32> %a, iXLen %b, iXLen -1) %2 = call <vscale x 4 x i32> @llvm.riscv.vadd(<vscale x 4 x i32> poison, <vscale x 4 x i32> %1, <vscale x 4 x i32> %1, iXLen %vl) ret <vscale x 4 x i32> %2 @@ -3722,1911 +2248,1152 @@ define <vscale x 4 x i32> @vslide1down_vx(<vscale x 4 x i32> %a, iXLen %b, iXLen ; Negative test – not safe to reduce vl define <vscale x 4 x float> @vfslide1down_vf(<vscale x 4 x float> %a, float %b, iXLen %vl) { -; NOVLOPT-LABEL: vfslide1down_vf: -; NOVLOPT: # %bb.0: -; NOVLOPT-NEXT: vsetvli a1, zero, e32, m2, ta, ma -; NOVLOPT-NEXT: vfslide1down.vf v8, v8, fa0 -; NOVLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; NOVLOPT-NEXT: vfadd.vv v8, v8, v8 -; NOVLOPT-NEXT: ret -; -; VLOPT-LABEL: vfslide1down_vf: -; VLOPT: # %bb.0: -; VLOPT-NEXT: vsetvli a1, zero, e32, m2, ta, ma -; VLOPT-NEXT: vfslide1down.vf v8, v8, fa0 -; VLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; VLOPT-NEXT: vfadd.vv v8, v8, v8 -; VLOPT-NEXT: ret +; CHECK-LABEL: vfslide1down_vf: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e32, m2, ta, ma +; CHECK-NEXT: vfslide1down.vf v8, v8, fa0 +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; CHECK-NEXT: vfadd.vv v8, v8, v8 +; CHECK-NEXT: ret %1 = call <vscale x 4 x float> @llvm.riscv.vfslide1down(<vscale x 4 x float> poison, <vscale x 4 x float> %a, float %b, iXLen -1) %2 = call <vscale x 4 x float> @llvm.riscv.vfadd(<vscale x 4 x float> poison, <vscale x 4 x float> %1, <vscale x 4 x float> %1, iXLen 7, iXLen %vl) ret <vscale x 4 x float> %2 } define <vscale x 4 x float> @vfadd_vv(<vscale x 4 x float> %a, <vscale x 4 x float> %b, iXLen %vl) { -; NOVLOPT-LABEL: vfadd_vv: -; NOVLOPT: # %bb.0: -; NOVLOPT-NEXT: vsetvli a1, zero, e32, m2, ta, ma -; NOVLOPT-NEXT: vfadd.vv v8, v8, v10 -; NOVLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; NOVLOPT-NEXT: vfadd.vv v8, v8, v10 -; NOVLOPT-NEXT: ret -; -; VLOPT-LABEL: vfadd_vv: -; VLOPT: # %bb.0: -; VLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; VLOPT-NEXT: vfadd.vv v8, v8, v10 -; VLOPT-NEXT: vfadd.vv v8, v8, v10 -; VLOPT-NEXT: ret +; CHECK-LABEL: vfadd_vv: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; CHECK-NEXT: vfadd.vv v8, v8, v10 +; CHECK-NEXT: vfadd.vv v8, v8, v10 +; CHECK-NEXT: ret %1 = call <vscale x 4 x float> @llvm.riscv.vfadd.nxv4f32.nxv4f32(<vscale x 4 x float> poison, <vscale x 4 x float> %a, <vscale x 4 x float> %b, iXLen 7, iXLen -1) %2 = call <vscale x 4 x float> @llvm.riscv.vfadd.nxv4f32.nxv4f32(<vscale x 4 x float> poison, <vscale x 4 x float> %1, <vscale x 4 x float> %b, iXLen 7, iXLen %vl) ret <vscale x 4 x float> %2 } define <vscale x 4 x float> @vfadd_vf(<vscale x 4 x float> %a, float %b, iXLen %vl) { -; NOVLOPT-LABEL: vfadd_vf: -; NOVLOPT: # %bb.0: -; NOVLOPT-NEXT: vsetvli a1, zero, e32, m2, ta, ma -; NOVLOPT-NEXT: vfadd.vf v10, v8, fa0 -; NOVLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; NOVLOPT-NEXT: vfadd.vv v8, v10, v8 -; NOVLOPT-NEXT: ret -; -; VLOPT-LABEL: vfadd_vf: -; VLOPT: # %bb.0: -; VLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; VLOPT-NEXT: vfadd.vf v10, v8, fa0 -; VLOPT-NEXT: vfadd.vv v8, v10, v8 -; VLOPT-NEXT: ret +; CHECK-LABEL: vfadd_vf: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; CHECK-NEXT: vfadd.vf v10, v8, fa0 +; CHECK-NEXT: vfadd.vv v8, v10, v8 +; CHECK-NEXT: ret %1 = call <vscale x 4 x float> @llvm.riscv.vfadd.nxv4f32.nxv4f32(<vscale x 4 x float> poison, <vscale x 4 x float> %a, float %b, iXLen 7, iXLen -1) %2 = call <vscale x 4 x float> @llvm.riscv.vfadd.nxv4f32.nxv4f32(<vscale x 4 x float> poison, <vscale x 4 x float> %1, <vscale x 4 x float> %a, iXLen 7, iXLen %vl) ret <vscale x 4 x float> %2 } define <vscale x 4 x float> @vfsub_vv(<vscale x 4 x float> %a, <vscale x 4 x float> %b, iXLen %vl) { -; NOVLOPT-LABEL: vfsub_vv: -; NOVLOPT: # %bb.0: -; NOVLOPT-NEXT: vsetvli a1, zero, e32, m2, ta, ma -; NOVLOPT-NEXT: vfsub.vv v8, v8, v10 -; NOVLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; NOVLOPT-NEXT: vfadd.vv v8, v8, v10 -; NOVLOPT-NEXT: ret -; -; VLOPT-LABEL: vfsub_vv: -; VLOPT: # %bb.0: -; VLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; VLOPT-NEXT: vfsub.vv v8, v8, v10 -; VLOPT-NEXT: vfadd.vv v8, v8, v10 -; VLOPT-NEXT: ret +; CHECK-LABEL: vfsub_vv: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; CHECK-NEXT: vfsub.vv v8, v8, v10 +; CHECK-NEXT: vfadd.vv v8, v8, v10 +; CHECK-NEXT: ret %1 = call <vscale x 4 x float> @llvm.riscv.vfsub.nxv4f32.nxv4f32(<vscale x 4 x float> poison, <vscale x 4 x float> %a, <vscale x 4 x float> %b, iXLen 7, iXLen -1) %2 = call <vscale x 4 x float> @llvm.riscv.vfadd.nxv4f32.nxv4f32(<vscale x 4 x float> poison, <vscale x 4 x float> %1, <vscale x 4 x float> %b, iXLen 7, iXLen %vl) ret <vscale x 4 x float> %2 } define <vscale x 4 x float> @vfsub_vf(<vscale x 4 x float> %a, float %b, iXLen %vl) { -; NOVLOPT-LABEL: vfsub_vf: -; NOVLOPT: # %bb.0: -; NOVLOPT-NEXT: vsetvli a1, zero, e32, m2, ta, ma -; NOVLOPT-NEXT: vfsub.vf v10, v8, fa0 -; NOVLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; NOVLOPT-NEXT: vfadd.vv v8, v10, v8 -; NOVLOPT-NEXT: ret -; -; VLOPT-LABEL: vfsub_vf: -; VLOPT: # %bb.0: -; VLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; VLOPT-NEXT: vfsub.vf v10, v8, fa0 -; VLOPT-NEXT: vfadd.vv v8, v10, v8 -; VLOPT-NEXT: ret +; CHECK-LABEL: vfsub_vf: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; CHECK-NEXT: vfsub.vf v10, v8, fa0 +; CHECK-NEXT: vfadd.vv v8, v10, v8 +; CHECK-NEXT: ret %1 = call <vscale x 4 x float> @llvm.riscv.vfsub.nxv4f32.nxv4f32(<vscale x 4 x float> poison, <vscale x 4 x float> %a, float %b, iXLen 7, iXLen -1) %2 = call <vscale x 4 x float> @llvm.riscv.vfadd.nxv4f32.nxv4f32(<vscale x 4 x float> poison, <vscale x 4 x float> %1, <vscale x 4 x float> %a, iXLen 7, iXLen %vl) ret <vscale x 4 x float> %2 } define <vscale x 4 x float> @vfrsub_vf(<vscale x 4 x float> %a, float %b, iXLen %vl) { -; NOVLOPT-LABEL: vfrsub_vf: -; NOVLOPT: # %bb.0: -; NOVLOPT-NEXT: vsetvli a1, zero, e32, m2, ta, ma -; NOVLOPT-NEXT: vfrsub.vf v10, v8, fa0 -; NOVLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; NOVLOPT-NEXT: vfadd.vv v8, v10, v8 -; NOVLOPT-NEXT: ret -; -; VLOPT-LABEL: vfrsub_vf: -; VLOPT: # %bb.0: -; VLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; VLOPT-NEXT: vfrsub.vf v10, v8, fa0 -; VLOPT-NEXT: vfadd.vv v8, v10, v8 -; VLOPT-NEXT: ret +; CHECK-LABEL: vfrsub_vf: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; CHECK-NEXT: vfrsub.vf v10, v8, fa0 +; CHECK-NEXT: vfadd.vv v8, v10, v8 +; CHECK-NEXT: ret %1 = call <vscale x 4 x float> @llvm.riscv.vfrsub.nxv4f32.nxv4f32(<vscale x 4 x float> poison, <vscale x 4 x float> %a, float %b, iXLen 7, iXLen -1) %2 = call <vscale x 4 x float> @llvm.riscv.vfadd.nxv4f32.nxv4f32(<vscale x 4 x float> poison, <vscale x 4 x float> %1, <vscale x 4 x float> %a, iXLen 7, iXLen %vl) ret <vscale x 4 x float> %2 } define <vscale x 4 x double> @vfwadd_vv(<vscale x 4 x float> %a, <vscale x 4 x float> %b, iXLen %vl) { -; NOVLOPT-LABEL: vfwadd_vv: -; NOVLOPT: # %bb.0: -; NOVLOPT-NEXT: vsetvli a1, zero, e32, m2, ta, ma -; NOVLOPT-NEXT: vfwadd.vv v12, v8, v10 -; NOVLOPT-NEXT: vsetvli zero, a0, e64, m4, ta, ma -; NOVLOPT-NEXT: vfadd.vv v8, v12, v12 -; NOVLOPT-NEXT: ret -; -; VLOPT-LABEL: vfwadd_vv: -; VLOPT: # %bb.0: -; VLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; VLOPT-NEXT: vfwadd.vv v12, v8, v10 -; VLOPT-NEXT: vsetvli zero, zero, e64, m4, ta, ma -; VLOPT-NEXT: vfadd.vv v8, v12, v12 -; VLOPT-NEXT: ret +; CHECK-LABEL: vfwadd_vv: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; CHECK-NEXT: vfwadd.vv v12, v8, v10 +; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, ma +; CHECK-NEXT: vfadd.vv v8, v12, v12 +; CHECK-NEXT: ret %1 = call <vscale x 4 x double> @llvm.riscv.vfwadd.nxv4f64.nxv4f32.nxv4f32(<vscale x 4 x double> poison, <vscale x 4 x float> %a, <vscale x 4 x float> %b, iXLen 7, iXLen -1) %2 = call <vscale x 4 x double> @llvm.riscv.vfadd.nxv4f64.nxv4f64(<vscale x 4 x double> poison, <vscale x 4 x double> %1, <vscale x 4 x double> %1, iXLen 7, iXLen %vl) ret <vscale x 4 x double> %2 } define <vscale x 4 x double> @vfwadd_vf(<vscale x 4 x float> %a, float %b, iXLen %vl) { -; NOVLOPT-LABEL: vfwadd_vf: -; NOVLOPT: # %bb.0: -; NOVLOPT-NEXT: vsetvli a1, zero, e32, m2, ta, ma -; NOVLOPT-NEXT: vfwadd.vf v12, v8, fa0 -; NOVLOPT-NEXT: vsetvli zero, a0, e64, m4, ta, ma -; NOVLOPT-NEXT: vfadd.vv v8, v12, v12 -; NOVLOPT-NEXT: ret -; -; VLOPT-LABEL: vfwadd_vf: -; VLOPT: # %bb.0: -; VLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; VLOPT-NEXT: vfwadd.vf v12, v8, fa0 -; VLOPT-NEXT: vsetvli zero, zero, e64, m4, ta, ma -; VLOPT-NEXT: vfadd.vv v8, v12, v12 -; VLOPT-NEXT: ret +; CHECK-LABEL: vfwadd_vf: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; CHECK-NEXT: vfwadd.vf v12, v8, fa0 +; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, ma +; CHECK-NEXT: vfadd.vv v8, v12, v12 +; CHECK-NEXT: ret %1 = call <vscale x 4 x double> @llvm.riscv.vfwadd.nxv4f64.nxv4f32.f32(<vscale x 4 x double> poison, <vscale x 4 x float> %a, float %b, iXLen 7, iXLen -1) %2 = call <vscale x 4 x double> @llvm.riscv.vfadd.nxv4f64.nxv4f64(<vscale x 4 x double> poison, <vscale x 4 x double> %1, <vscale x 4 x double> %1, iXLen 7, iXLen %vl) ret <vscale x 4 x double> %2 } define <vscale x 4 x double> @vfwsub_vv(<vscale x 4 x float> %a, <vscale x 4 x float> %b, iXLen %vl) { -; NOVLOPT-LABEL: vfwsub_vv: -; NOVLOPT: # %bb.0: -; NOVLOPT-NEXT: vsetvli a1, zero, e32, m2, ta, ma -; NOVLOPT-NEXT: vfwsub.vv v12, v8, v10 -; NOVLOPT-NEXT: vsetvli zero, a0, e64, m4, ta, ma -; NOVLOPT-NEXT: vfadd.vv v8, v12, v12 -; NOVLOPT-NEXT: ret -; -; VLOPT-LABEL: vfwsub_vv: -; VLOPT: # %bb.0: -; VLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; VLOPT-NEXT: vfwsub.vv v12, v8, v10 -; VLOPT-NEXT: vsetvli zero, zero, e64, m4, ta, ma -; VLOPT-NEXT: vfadd.vv v8, v12, v12 -; VLOPT-NEXT: ret +; CHECK-LABEL: vfwsub_vv: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; CHECK-NEXT: vfwsub.vv v12, v8, v10 +; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, ma +; CHECK-NEXT: vfadd.vv v8, v12, v12 +; CHECK-NEXT: ret %1 = call <vscale x 4 x double> @llvm.riscv.vfwsub.nxv4f64.nxv4f32.nxv4f32(<vscale x 4 x double> poison, <vscale x 4 x float> %a, <vscale x 4 x float> %b, iXLen 7, iXLen -1) %2 = call <vscale x 4 x double> @llvm.riscv.vfadd.nxv4f64.nxv4f64(<vscale x 4 x double> poison, <vscale x 4 x double> %1, <vscale x 4 x double> %1, iXLen 7, iXLen %vl) ret <vscale x 4 x double> %2 } define <vscale x 4 x double> @vfwsub_vf(<vscale x 4 x float> %a, float %b, iXLen %vl) { -; NOVLOPT-LABEL: vfwsub_vf: -; NOVLOPT: # %bb.0: -; NOVLOPT-NEXT: vsetvli a1, zero, e32, m2, ta, ma -; NOVLOPT-NEXT: vfwsub.vf v12, v8, fa0 -; NOVLOPT-NEXT: vsetvli zero, a0, e64, m4, ta, ma -; NOVLOPT-NEXT: vfadd.vv v8, v12, v12 -; NOVLOPT-NEXT: ret -; -; VLOPT-LABEL: vfwsub_vf: -; VLOPT: # %bb.0: -; VLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; VLOPT-NEXT: vfwsub.vf v12, v8, fa0 -; VLOPT-NEXT: vsetvli zero, zero, e64, m4, ta, ma -; VLOPT-NEXT: vfadd.vv v8, v12, v12 -; VLOPT-NEXT: ret +; CHECK-LABEL: vfwsub_vf: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; CHECK-NEXT: vfwsub.vf v12, v8, fa0 +; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, ma +; CHECK-NEXT: vfadd.vv v8, v12, v12 +; CHECK-NEXT: ret %1 = call <vscale x 4 x double> @llvm.riscv.vfwsub.nxv4f64.nxv4f32.f32(<vscale x 4 x double> poison, <vscale x 4 x float> %a, float %b, iXLen 7, iXLen -1) %2 = call <vscale x 4 x double> @llvm.riscv.vfadd.nxv4f64.nxv4f64(<vscale x 4 x double> poison, <vscale x 4 x double> %1, <vscale x 4 x double> %1, iXLen 7, iXLen %vl) ret <vscale x 4 x double> %2 } define <vscale x 4 x double> @vfwadd_wv(<vscale x 4 x double> %a, <vscale x 4 x float> %b, iXLen %vl) { -; NOVLOPT-LABEL: vfwadd_wv: -; NOVLOPT: # %bb.0: -; NOVLOPT-NEXT: vsetvli a1, zero, e32, m2, ta, ma -; NOVLOPT-NEXT: vfwadd.wv v8, v8, v12 -; NOVLOPT-NEXT: vsetvli zero, a0, e64, m4, ta, ma -; NOVLOPT-NEXT: vfadd.vv v8, v8, v8 -; NOVLOPT-NEXT: ret -; -; VLOPT-LABEL: vfwadd_wv: -; VLOPT: # %bb.0: -; VLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; VLOPT-NEXT: vfwadd.wv v8, v8, v12 -; VLOPT-NEXT: vsetvli zero, zero, e64, m4, ta, ma -; VLOPT-NEXT: vfadd.vv v8, v8, v8 -; VLOPT-NEXT: ret +; CHECK-LABEL: vfwadd_wv: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; CHECK-NEXT: vfwadd.wv v8, v8, v12 +; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, ma +; CHECK-NEXT: vfadd.vv v8, v8, v8 +; CHECK-NEXT: ret %1 = call <vscale x 4 x double> @llvm.riscv.vfwadd.w.nxv4f64.nxv4f32.nxv4f32(<vscale x 4 x double> poison, <vscale x 4 x double> %a, <vscale x 4 x float> %b, iXLen 7, iXLen -1) %2 = call <vscale x 4 x double> @llvm.riscv.vfadd.nxv4f64.nxv4f64(<vscale x 4 x double> poison, <vscale x 4 x double> %1, <vscale x 4 x double> %1, iXLen 7, iXLen %vl) ret <vscale x 4 x double> %2 } define <vscale x 4 x double> @vfwadd_wf(<vscale x 4 x double> %a, float %b, iXLen %vl) { -; NOVLOPT-LABEL: vfwadd_wf: -; NOVLOPT: # %bb.0: -; NOVLOPT-NEXT: vsetvli a1, zero, e32, m2, ta, ma -; NOVLOPT-NEXT: vfwadd.wf v8, v8, fa0 -; NOVLOPT-NEXT: vsetvli zero, a0, e64, m4, ta, ma -; NOVLOPT-NEXT: vfadd.vv v8, v8, v8 -; NOVLOPT-NEXT: ret -; -; VLOPT-LABEL: vfwadd_wf: -; VLOPT: # %bb.0: -; VLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; VLOPT-NEXT: vfwadd.wf v8, v8, fa0 -; VLOPT-NEXT: vsetvli zero, zero, e64, m4, ta, ma -; VLOPT-NEXT: vfadd.vv v8, v8, v8 -; VLOPT-NEXT: ret +; CHECK-LABEL: vfwadd_wf: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; CHECK-NEXT: vfwadd.wf v8, v8, fa0 +; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, ma +; CHECK-NEXT: vfadd.vv v8, v8, v8 +; CHECK-NEXT: ret %1 = call <vscale x 4 x double> @llvm.riscv.vfwadd.w.nxv4f64.nxv4f32.f32(<vscale x 4 x double> poison, <vscale x 4 x double> %a, float %b, iXLen 7, iXLen -1) %2 = call <vscale x 4 x double> @llvm.riscv.vfadd.nxv4f64.nxv4f64(<vscale x 4 x double> poison, <vscale x 4 x double> %1, <vscale x 4 x double> %1, iXLen 7, iXLen %vl) ret <vscale x 4 x double> %2 } define <vscale x 4 x double> @vfwsub_wv(<vscale x 4 x double> %a, <vscale x 4 x float> %b, iXLen %vl) { -; NOVLOPT-LABEL: vfwsub_wv: -; NOVLOPT: # %bb.0: -; NOVLOPT-NEXT: vsetvli a1, zero, e32, m2, ta, ma -; NOVLOPT-NEXT: vfwsub.wv v8, v8, v12 -; NOVLOPT-NEXT: vsetvli zero, a0, e64, m4, ta, ma -; NOVLOPT-NEXT: vfadd.vv v8, v8, v8 -; NOVLOPT-NEXT: ret -; -; VLOPT-LABEL: vfwsub_wv: -; VLOPT: # %bb.0: -; VLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; VLOPT-NEXT: vfwsub.wv v8, v8, v12 -; VLOPT-NEXT: vsetvli zero, zero, e64, m4, ta, ma -; VLOPT-NEXT: vfadd.vv v8, v8, v8 -; VLOPT-NEXT: ret +; CHECK-LABEL: vfwsub_wv: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; CHECK-NEXT: vfwsub.wv v8, v8, v12 +; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, ma +; CHECK-NEXT: vfadd.vv v8, v8, v8 +; CHECK-NEXT: ret %1 = call <vscale x 4 x double> @llvm.riscv.vfwsub.w.nxv4f64.nxv4f32.nxv4f32(<vscale x 4 x double> poison, <vscale x 4 x double> %a, <vscale x 4 x float> %b, iXLen 7, iXLen -1) %2 = call <vscale x 4 x double> @llvm.riscv.vfadd.nxv4f64.nxv4f64(<vscale x 4 x double> poison, <vscale x 4 x double> %1, <vscale x 4 x double> %1, iXLen 7, iXLen %vl) ret <vscale x 4 x double> %2 } define <vscale x 4 x double> @vfwsub_wf(<vscale x 4 x double> %a, float %b, iXLen %vl) { -; NOVLOPT-LABEL: vfwsub_wf: -; NOVLOPT: # %bb.0: -; NOVLOPT-NEXT: vsetvli a1, zero, e32, m2, ta, ma -; NOVLOPT-NEXT: vfwsub.wf v8, v8, fa0 -; NOVLOPT-NEXT: vsetvli zero, a0, e64, m4, ta, ma -; NOVLOPT-NEXT: vfadd.vv v8, v8, v8 -; NOVLOPT-NEXT: ret -; -; VLOPT-LABEL: vfwsub_wf: -; VLOPT: # %bb.0: -; VLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; VLOPT-NEXT: vfwsub.wf v8, v8, fa0 -; VLOPT-NEXT: vsetvli zero, zero, e64, m4, ta, ma -; VLOPT-NEXT: vfadd.vv v8, v8, v8 -; VLOPT-NEXT: ret +; CHECK-LABEL: vfwsub_wf: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; CHECK-NEXT: vfwsub.wf v8, v8, fa0 +; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, ma +; CHECK-NEXT: vfadd.vv v8, v8, v8 +; CHECK-NEXT: ret %1 = call <vscale x 4 x double> @llvm.riscv.vfwsub.w.nxv4f64.nxv4f32.f32(<vscale x 4 x double> poison, <vscale x 4 x double> %a, float %b, iXLen 7, iXLen -1) %2 = call <vscale x 4 x double> @llvm.riscv.vfadd.nxv4f64.nxv4f64(<vscale x 4 x double> poison, <vscale x 4 x double> %1, <vscale x 4 x double> %1, iXLen 7, iXLen %vl) ret <vscale x 4 x double> %2 } define <vscale x 4 x float> @vfmul_vv(<vscale x 4 x float> %a, <vscale x 4 x float> %b, iXLen %vl) { -; NOVLOPT-LABEL: vfmul_vv: -; NOVLOPT: # %bb.0: -; NOVLOPT-NEXT: vsetvli a1, zero, e32, m2, ta, ma -; NOVLOPT-NEXT: vfmul.vv v8, v8, v10 -; NOVLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; NOVLOPT-NEXT: vfadd.vv v8, v8, v10 -; NOVLOPT-NEXT: ret -; -; VLOPT-LABEL: vfmul_vv: -; VLOPT: # %bb.0: -; VLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; VLOPT-NEXT: vfmul.vv v8, v8, v10 -; VLOPT-NEXT: vfadd.vv v8, v8, v10 -; VLOPT-NEXT: ret +; CHECK-LABEL: vfmul_vv: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; CHECK-NEXT: vfmul.vv v8, v8, v10 +; CHECK-NEXT: vfadd.vv v8, v8, v10 +; CHECK-NEXT: ret %1 = call <vscale x 4 x float> @llvm.riscv.vfmul.nxv4f32.nxv4f32(<vscale x 4 x float> poison, <vscale x 4 x float> %a, <vscale x 4 x float> %b, iXLen 7, iXLen -1) %2 = call <vscale x 4 x float> @llvm.riscv.vfadd.nxv4f32.nxv4f32(<vscale x 4 x float> poison, <vscale x 4 x float> %1, <vscale x 4 x float> %b, iXLen 7, iXLen %vl) ret <vscale x 4 x float> %2 } define <vscale x 4 x float> @vfmul_vf(<vscale x 4 x float> %a, float %b, iXLen %vl) { -; NOVLOPT-LABEL: vfmul_vf: -; NOVLOPT: # %bb.0: -; NOVLOPT-NEXT: vsetvli a1, zero, e32, m2, ta, ma -; NOVLOPT-NEXT: vfmul.vf v10, v8, fa0 -; NOVLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; NOVLOPT-NEXT: vfadd.vv v8, v10, v8 -; NOVLOPT-NEXT: ret -; -; VLOPT-LABEL: vfmul_vf: -; VLOPT: # %bb.0: -; VLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; VLOPT-NEXT: vfmul.vf v10, v8, fa0 -; VLOPT-NEXT: vfadd.vv v8, v10, v8 -; VLOPT-NEXT: ret +; CHECK-LABEL: vfmul_vf: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; CHECK-NEXT: vfmul.vf v10, v8, fa0 +; CHECK-NEXT: vfadd.vv v8, v10, v8 +; CHECK-NEXT: ret %1 = call <vscale x 4 x float> @llvm.riscv.vfmul.nxv4f32.nxv4f32(<vscale x 4 x float> poison, <vscale x 4 x float> %a, float %b, iXLen 7, iXLen -1) %2 = call <vscale x 4 x float> @llvm.riscv.vfadd.nxv4f32.nxv4f32(<vscale x 4 x float> poison, <vscale x 4 x float> %1, <vscale x 4 x float> %a, iXLen 7, iXLen %vl) ret <vscale x 4 x float> %2 } define <vscale x 4 x float> @vfdiv_vv(<vscale x 4 x float> %a, <vscale x 4 x float> %b, iXLen %vl) { -; NOVLOPT-LABEL: vfdiv_vv: -; NOVLOPT: # %bb.0: -; NOVLOPT-NEXT: vsetvli a1, zero, e32, m2, ta, ma -; NOVLOPT-NEXT: vfdiv.vv v8, v8, v10 -; NOVLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; NOVLOPT-NEXT: vfadd.vv v8, v8, v10 -; NOVLOPT-NEXT: ret -; -; VLOPT-LABEL: vfdiv_vv: -; VLOPT: # %bb.0: -; VLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; VLOPT-NEXT: vfdiv.vv v8, v8, v10 -; VLOPT-NEXT: vfadd.vv v8, v8, v10 -; VLOPT-NEXT: ret +; CHECK-LABEL: vfdiv_vv: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; CHECK-NEXT: vfdiv.vv v8, v8, v10 +; CHECK-NEXT: vfadd.vv v8, v8, v10 +; CHECK-NEXT: ret %1 = call <vscale x 4 x float> @llvm.riscv.vfdiv.nxv4f32.nxv4f32(<vscale x 4 x float> poison, <vscale x 4 x float> %a, <vscale x 4 x float> %b, iXLen 7, iXLen -1) %2 = call <vscale x 4 x float> @llvm.riscv.vfadd.nxv4f32.nxv4f32(<vscale x 4 x float> poison, <vscale x 4 x float> %1, <vscale x 4 x float> %b, iXLen 7, iXLen %vl) ret <vscale x 4 x float> %2 } define <vscale x 4 x float> @vfdiv_vf(<vscale x 4 x float> %a, float %b, iXLen %vl) { -; NOVLOPT-LABEL: vfdiv_vf: -; NOVLOPT: # %bb.0: -; NOVLOPT-NEXT: vsetvli a1, zero, e32, m2, ta, ma -; NOVLOPT-NEXT: vfdiv.vf v10, v8, fa0 -; NOVLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; NOVLOPT-NEXT: vfadd.vv v8, v10, v8 -; NOVLOPT-NEXT: ret -; -; VLOPT-LABEL: vfdiv_vf: -; VLOPT: # %bb.0: -; VLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; VLOPT-NEXT: vfdiv.vf v10, v8, fa0 -; VLOPT-NEXT: vfadd.vv v8, v10, v8 -; VLOPT-NEXT: ret +; CHECK-LABEL: vfdiv_vf: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; CHECK-NEXT: vfdiv.vf v10, v8, fa0 +; CHECK-NEXT: vfadd.vv v8, v10, v8 +; CHECK-NEXT: ret %1 = call <vscale x 4 x float> @llvm.riscv.vfdiv.nxv4f32.nxv4f32(<vscale x 4 x float> poison, <vscale x 4 x float> %a, float %b, iXLen 7, iXLen -1) %2 = call <vscale x 4 x float> @llvm.riscv.vfadd.nxv4f32.nxv4f32(<vscale x 4 x float> poison, <vscale x 4 x float> %1, <vscale x 4 x float> %a, iXLen 7, iXLen %vl) ret <vscale x 4 x float> %2 } define <vscale x 4 x float> @vfrdiv_vf(<vscale x 4 x float> %a, float %b, iXLen %vl) { -; NOVLOPT-LABEL: vfrdiv_vf: -; NOVLOPT: # %bb.0: -; NOVLOPT-NEXT: vsetvli a1, zero, e32, m2, ta, ma -; NOVLOPT-NEXT: vfrdiv.vf v10, v8, fa0 -; NOVLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; NOVLOPT-NEXT: vfadd.vv v8, v10, v8 -; NOVLOPT-NEXT: ret -; -; VLOPT-LABEL: vfrdiv_vf: -; VLOPT: # %bb.0: -; VLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; VLOPT-NEXT: vfrdiv.vf v10, v8, fa0 -; VLOPT-NEXT: vfadd.vv v8, v10, v8 -; VLOPT-NEXT: ret +; CHECK-LABEL: vfrdiv_vf: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; CHECK-NEXT: vfrdiv.vf v10, v8, fa0 +; CHECK-NEXT: vfadd.vv v8, v10, v8 +; CHECK-NEXT: ret %1 = call <vscale x 4 x float> @llvm.riscv.vfrdiv.nxv4f32.nxv4f32(<vscale x 4 x float> poison, <vscale x 4 x float> %a, float %b, iXLen 7, iXLen -1) %2 = call <vscale x 4 x float> @llvm.riscv.vfadd.nxv4f32.nxv4f32(<vscale x 4 x float> poison, <vscale x 4 x float> %1, <vscale x 4 x float> %a, iXLen 7, iXLen %vl) ret <vscale x 4 x float> %2 } define <vscale x 4 x double> @vfwmul_vv(<vscale x 4 x float> %a, <vscale x 4 x float> %b, iXLen %vl) { -; NOVLOPT-LABEL: vfwmul_vv: -; NOVLOPT: # %bb.0: -; NOVLOPT-NEXT: vsetvli a1, zero, e32, m2, ta, ma -; NOVLOPT-NEXT: vfwmul.vv v12, v8, v10 -; NOVLOPT-NEXT: vsetvli zero, a0, e64, m4, ta, ma -; NOVLOPT-NEXT: vfadd.vv v8, v12, v12 -; NOVLOPT-NEXT: ret -; -; VLOPT-LABEL: vfwmul_vv: -; VLOPT: # %bb.0: -; VLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; VLOPT-NEXT: vfwmul.vv v12, v8, v10 -; VLOPT-NEXT: vsetvli zero, zero, e64, m4, ta, ma -; VLOPT-NEXT: vfadd.vv v8, v12, v12 -; VLOPT-NEXT: ret +; CHECK-LABEL: vfwmul_vv: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; CHECK-NEXT: vfwmul.vv v12, v8, v10 +; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, ma +; CHECK-NEXT: vfadd.vv v8, v12, v12 +; CHECK-NEXT: ret %1 = call <vscale x 4 x double> @llvm.riscv.vfwmul.nxv4f64.nxv4f32.nxv4f32(<vscale x 4 x double> poison, <vscale x 4 x float> %a, <vscale x 4 x float> %b, iXLen 7, iXLen -1) %2 = call <vscale x 4 x double> @llvm.riscv.vfadd.nxv4f64.nxv4f64(<vscale x 4 x double> poison, <vscale x 4 x double> %1, <vscale x 4 x double> %1, iXLen 7, iXLen %vl) ret <vscale x 4 x double> %2 } define <vscale x 4 x double> @vfwmul_vf(<vscale x 4 x float> %a, float %b, iXLen %vl) { -; NOVLOPT-LABEL: vfwmul_vf: -; NOVLOPT: # %bb.0: -; NOVLOPT-NEXT: vsetvli a1, zero, e32, m2, ta, ma -; NOVLOPT-NEXT: vfwmul.vf v12, v8, fa0 -; NOVLOPT-NEXT: vsetvli zero, a0, e64, m4, ta, ma -; NOVLOPT-NEXT: vfadd.vv v8, v12, v12 -; NOVLOPT-NEXT: ret -; -; VLOPT-LABEL: vfwmul_vf: -; VLOPT: # %bb.0: -; VLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; VLOPT-NEXT: vfwmul.vf v12, v8, fa0 -; VLOPT-NEXT: vsetvli zero, zero, e64, m4, ta, ma -; VLOPT-NEXT: vfadd.vv v8, v12, v12 -; VLOPT-NEXT: ret +; CHECK-LABEL: vfwmul_vf: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; CHECK-NEXT: vfwmul.vf v12, v8, fa0 +; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, ma +; CHECK-NEXT: vfadd.vv v8, v12, v12 +; CHECK-NEXT: ret %1 = call <vscale x 4 x double> @llvm.riscv.vfwmul.nxv4f64.nxv4f32.f32(<vscale x 4 x double> poison, <vscale x 4 x float> %a, float %b, iXLen 7, iXLen -1) %2 = call <vscale x 4 x double> @llvm.riscv.vfadd.nxv4f64.nxv4f64(<vscale x 4 x double> poison, <vscale x 4 x double> %1, <vscale x 4 x double> %1, iXLen 7, iXLen %vl) ret <vscale x 4 x double> %2 } define <vscale x 4 x i1> @vmfeq_vf(<vscale x 4 x float> %a, <vscale x 4 x i1> %b, float%c, iXLen %vl) { -; NOVLOPT-LABEL: vmfeq_vf: -; NOVLOPT: # %bb.0: -; NOVLOPT-NEXT: vsetvli a1, zero, e32, m2, ta, ma -; NOVLOPT-NEXT: vmfeq.vf v10, v8, fa0 -; NOVLOPT-NEXT: vsetvli zero, a0, e8, mf2, ta, ma -; NOVLOPT-NEXT: vmand.mm v0, v10, v0 -; NOVLOPT-NEXT: ret -; -; VLOPT-LABEL: vmfeq_vf: -; VLOPT: # %bb.0: -; VLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; VLOPT-NEXT: vmfeq.vf v10, v8, fa0 -; VLOPT-NEXT: vmand.mm v0, v10, v0 -; VLOPT-NEXT: ret +; CHECK-LABEL: vmfeq_vf: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; CHECK-NEXT: vmfeq.vf v10, v8, fa0 +; CHECK-NEXT: vmand.mm v0, v10, v0 +; CHECK-NEXT: ret %1 = call <vscale x 4 x i1> @llvm.riscv.vmfeq.nxv4f32.f32(<vscale x 4 x float> %a, float %c, iXLen -1) %2 = call <vscale x 4 x i1> @llvm.riscv.vmand.nxv4i1(<vscale x 4 x i1> %1, <vscale x 4 x i1> %b, iXLen %vl) ret <vscale x 4 x i1> %2 } define <vscale x 4 x i1> @vmfeq_vv(<vscale x 4 x float> %a, <vscale x 4 x i1> %b, <vscale x 4 x float> %c, iXLen %vl) { -; NOVLOPT-LABEL: vmfeq_vv: -; NOVLOPT: # %bb.0: -; NOVLOPT-NEXT: vsetvli a1, zero, e32, m2, ta, ma -; NOVLOPT-NEXT: vmfeq.vv v12, v8, v10 -; NOVLOPT-NEXT: vsetvli zero, a0, e8, mf2, ta, ma -; NOVLOPT-NEXT: vmand.mm v0, v12, v0 -; NOVLOPT-NEXT: ret -; -; VLOPT-LABEL: vmfeq_vv: -; VLOPT: # %bb.0: -; VLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; VLOPT-NEXT: vmfeq.vv v12, v8, v10 -; VLOPT-NEXT: vmand.mm v0, v12, v0 -; VLOPT-NEXT: ret +; CHECK-LABEL: vmfeq_vv: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; CHECK-NEXT: vmfeq.vv v12, v8, v10 +; CHECK-NEXT: vmand.mm v0, v12, v0 +; CHECK-NEXT: ret %1 = call <vscale x 4 x i1> @llvm.riscv.vmfeq.nxv4f32.nxv4f32(<vscale x 4 x float> %a, <vscale x 4 x float> %c, iXLen -1) %2 = call <vscale x 4 x i1> @llvm.riscv.vmand.nxv4i1(<vscale x 4 x i1> %1, <vscale x 4 x i1> %b, iXLen %vl) ret <vscale x 4 x i1> %2 } define <vscale x 4 x i1> @vmfne_vf(<vscale x 4 x float> %a, <vscale x 4 x i1> %b, float%c, iXLen %vl) { -; NOVLOPT-LABEL: vmfne_vf: -; NOVLOPT: # %bb.0: -; NOVLOPT-NEXT: vsetvli a1, zero, e32, m2, ta, ma -; NOVLOPT-NEXT: vmfne.vf v10, v8, fa0 -; NOVLOPT-NEXT: vsetvli zero, a0, e8, mf2, ta, ma -; NOVLOPT-NEXT: vmand.mm v0, v10, v0 -; NOVLOPT-NEXT: ret -; -; VLOPT-LABEL: vmfne_vf: -; VLOPT: # %bb.0: -; VLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; VLOPT-NEXT: vmfne.vf v10, v8, fa0 -; VLOPT-NEXT: vmand.mm v0, v10, v0 -; VLOPT-NEXT: ret +; CHECK-LABEL: vmfne_vf: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; CHECK-NEXT: vmfne.vf v10, v8, fa0 +; CHECK-NEXT: vmand.mm v0, v10, v0 +; CHECK-NEXT: ret %1 = call <vscale x 4 x i1> @llvm.riscv.vmfne.nxv4f32.f32(<vscale x 4 x float> %a, float %c, iXLen -1) %2 = call <vscale x 4 x i1> @llvm.riscv.vmand.nxv4i1(<vscale x 4 x i1> %1, <vscale x 4 x i1> %b, iXLen %vl) ret <vscale x 4 x i1> %2 } define <vscale x 4 x i1> @vmfne_vv(<vscale x 4 x float> %a, <vscale x 4 x i1> %b, <vscale x 4 x float> %c, iXLen %vl) { -; NOVLOPT-LABEL: vmfne_vv: -; NOVLOPT: # %bb.0: -; NOVLOPT-NEXT: vsetvli a1, zero, e32, m2, ta, ma -; NOVLOPT-NEXT: vmfne.vv v12, v8, v10 -; NOVLOPT-NEXT: vsetvli zero, a0, e8, mf2, ta, ma -; NOVLOPT-NEXT: vmand.mm v0, v12, v0 -; NOVLOPT-NEXT: ret -; -; VLOPT-LABEL: vmfne_vv: -; VLOPT: # %bb.0: -; VLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; VLOPT-NEXT: vmfne.vv v12, v8, v10 -; VLOPT-NEXT: vmand.mm v0, v12, v0 -; VLOPT-NEXT: ret +; CHECK-LABEL: vmfne_vv: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; CHECK-NEXT: vmfne.vv v12, v8, v10 +; CHECK-NEXT: vmand.mm v0, v12, v0 +; CHECK-NEXT: ret %1 = call <vscale x 4 x i1> @llvm.riscv.vmfne.nxv4f32.nxv4f32(<vscale x 4 x float> %a, <vscale x 4 x float> %c, iXLen -1) %2 = call <vscale x 4 x i1> @llvm.riscv.vmand.nxv4i1(<vscale x 4 x i1> %1, <vscale x 4 x i1> %b, iXLen %vl) ret <vscale x 4 x i1> %2 } define <vscale x 4 x i1> @vmflt_vf(<vscale x 4 x float> %a, <vscale x 4 x i1> %b, float%c, iXLen %vl) { -; NOVLOPT-LABEL: vmflt_vf: -; NOVLOPT: # %bb.0: -; NOVLOPT-NEXT: vsetvli a1, zero, e32, m2, ta, ma -; NOVLOPT-NEXT: vmflt.vf v10, v8, fa0 -; NOVLOPT-NEXT: vsetvli zero, a0, e8, mf2, ta, ma -; NOVLOPT-NEXT: vmand.mm v0, v10, v0 -; NOVLOPT-NEXT: ret -; -; VLOPT-LABEL: vmflt_vf: -; VLOPT: # %bb.0: -; VLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; VLOPT-NEXT: vmflt.vf v10, v8, fa0 -; VLOPT-NEXT: vmand.mm v0, v10, v0 -; VLOPT-NEXT: ret +; CHECK-LABEL: vmflt_vf: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; CHECK-NEXT: vmflt.vf v10, v8, fa0 +; CHECK-NEXT: vmand.mm v0, v10, v0 +; CHECK-NEXT: ret %1 = call <vscale x 4 x i1> @llvm.riscv.vmflt.nxv4f32.f32(<vscale x 4 x float> %a, float %c, iXLen -1) %2 = call <vscale x 4 x i1> @llvm.riscv.vmand.nxv4i1(<vscale x 4 x i1> %1, <vscale x 4 x i1> %b, iXLen %vl) ret <vscale x 4 x i1> %2 } define <vscale x 4 x i1> @vmflt_vv(<vscale x 4 x float> %a, <vscale x 4 x i1> %b, <vscale x 4 x float> %c, iXLen %vl) { -; NOVLOPT-LABEL: vmflt_vv: -; NOVLOPT: # %bb.0: -; NOVLOPT-NEXT: vsetvli a1, zero, e32, m2, ta, ma -; NOVLOPT-NEXT: vmflt.vv v12, v8, v10 -; NOVLOPT-NEXT: vsetvli zero, a0, e8, mf2, ta, ma -; NOVLOPT-NEXT: vmand.mm v0, v12, v0 -; NOVLOPT-NEXT: ret -; -; VLOPT-LABEL: vmflt_vv: -; VLOPT: # %bb.0: -; VLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; VLOPT-NEXT: vmflt.vv v12, v8, v10 -; VLOPT-NEXT: vmand.mm v0, v12, v0 -; VLOPT-NEXT: ret +; CHECK-LABEL: vmflt_vv: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; CHECK-NEXT: vmflt.vv v12, v8, v10 +; CHECK-NEXT: vmand.mm v0, v12, v0 +; CHECK-NEXT: ret %1 = call <vscale x 4 x i1> @llvm.riscv.vmflt.nxv4f32.nxv4f32(<vscale x 4 x float> %a, <vscale x 4 x float> %c, iXLen -1) %2 = call <vscale x 4 x i1> @llvm.riscv.vmand.nxv4i1(<vscale x 4 x i1> %1, <vscale x 4 x i1> %b, iXLen %vl) ret <vscale x 4 x i1> %2 } define <vscale x 4 x i1> @vmfle_vf(<vscale x 4 x float> %a, <vscale x 4 x i1> %b, float%c, iXLen %vl) { -; NOVLOPT-LABEL: vmfle_vf: -; NOVLOPT: # %bb.0: -; NOVLOPT-NEXT: vsetvli a1, zero, e32, m2, ta, ma -; NOVLOPT-NEXT: vmfle.vf v10, v8, fa0 -; NOVLOPT-NEXT: vsetvli zero, a0, e8, mf2, ta, ma -; NOVLOPT-NEXT: vmand.mm v0, v10, v0 -; NOVLOPT-NEXT: ret -; -; VLOPT-LABEL: vmfle_vf: -; VLOPT: # %bb.0: -; VLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; VLOPT-NEXT: vmfle.vf v10, v8, fa0 -; VLOPT-NEXT: vmand.mm v0, v10, v0 -; VLOPT-NEXT: ret +; CHECK-LABEL: vmfle_vf: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; CHECK-NEXT: vmfle.vf v10, v8, fa0 +; CHECK-NEXT: vmand.mm v0, v10, v0 +; CHECK-NEXT: ret %1 = call <vscale x 4 x i1> @llvm.riscv.vmfle.nxv4f32.f32(<vscale x 4 x float> %a, float %c, iXLen -1) %2 = call <vscale x 4 x i1> @llvm.riscv.vmand.nxv4i1(<vscale x 4 x i1> %1, <vscale x 4 x i1> %b, iXLen %vl) ret <vscale x 4 x i1> %2 } define <vscale x 4 x i1> @vmfle_vv(<vscale x 4 x float> %a, <vscale x 4 x i1> %b, <vscale x 4 x float> %c, iXLen %vl) { -; NOVLOPT-LABEL: vmfle_vv: -; NOVLOPT: # %bb.0: -; NOVLOPT-NEXT: vsetvli a1, zero, e32, m2, ta, ma -; NOVLOPT-NEXT: vmfle.vv v12, v8, v10 -; NOVLOPT-NEXT: vsetvli zero, a0, e8, mf2, ta, ma -; NOVLOPT-NEXT: vmand.mm v0, v12, v0 -; NOVLOPT-NEXT: ret -; -; VLOPT-LABEL: vmfle_vv: -; VLOPT: # %bb.0: -; VLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; VLOPT-NEXT: vmfle.vv v12, v8, v10 -; VLOPT-NEXT: vmand.mm v0, v12, v0 -; VLOPT-NEXT: ret +; CHECK-LABEL: vmfle_vv: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; CHECK-NEXT: vmfle.vv v12, v8, v10 +; CHECK-NEXT: vmand.mm v0, v12, v0 +; CHECK-NEXT: ret %1 = call <vscale x 4 x i1> @llvm.riscv.vmfle.nxv4f32.nxv4f32(<vscale x 4 x float> %a, <vscale x 4 x float> %c, iXLen -1) %2 = call <vscale x 4 x i1> @llvm.riscv.vmand.nxv4i1(<vscale x 4 x i1> %1, <vscale x 4 x i1> %b, iXLen %vl) ret <vscale x 4 x i1> %2 } define <vscale x 4 x i1> @vmfgt_vf(<vscale x 4 x float> %a, <vscale x 4 x i1> %b, float%c, iXLen %vl) { -; NOVLOPT-LABEL: vmfgt_vf: -; NOVLOPT: # %bb.0: -; NOVLOPT-NEXT: vsetvli a1, zero, e32, m2, ta, ma -; NOVLOPT-NEXT: vmfgt.vf v10, v8, fa0 -; NOVLOPT-NEXT: vsetvli zero, a0, e8, mf2, ta, ma -; NOVLOPT-NEXT: vmand.mm v0, v10, v0 -; NOVLOPT-NEXT: ret -; -; VLOPT-LABEL: vmfgt_vf: -; VLOPT: # %bb.0: -; VLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; VLOPT-NEXT: vmfgt.vf v10, v8, fa0 -; VLOPT-NEXT: vmand.mm v0, v10, v0 -; VLOPT-NEXT: ret +; CHECK-LABEL: vmfgt_vf: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; CHECK-NEXT: vmfgt.vf v10, v8, fa0 +; CHECK-NEXT: vmand.mm v0, v10, v0 +; CHECK-NEXT: ret %1 = call <vscale x 4 x i1> @llvm.riscv.vmfgt.nxv4f32.f32(<vscale x 4 x float> %a, float %c, iXLen -1) %2 = call <vscale x 4 x i1> @llvm.riscv.vmand.nxv4i1(<vscale x 4 x i1> %1, <vscale x 4 x i1> %b, iXLen %vl) ret <vscale x 4 x i1> %2 } define <vscale x 4 x i1> @vmfgt_vv(<vscale x 4 x float> %a, <vscale x 4 x i1> %b, <vscale x 4 x float> %c, iXLen %vl) { -; NOVLOPT-LABEL: vmfgt_vv: -; NOVLOPT: # %bb.0: -; NOVLOPT-NEXT: vsetvli a1, zero, e32, m2, ta, ma -; NOVLOPT-NEXT: vmflt.vv v12, v10, v8 -; NOVLOPT-NEXT: vsetvli zero, a0, e8, mf2, ta, ma -; NOVLOPT-NEXT: vmand.mm v0, v12, v0 -; NOVLOPT-NEXT: ret -; -; VLOPT-LABEL: vmfgt_vv: -; VLOPT: # %bb.0: -; VLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; VLOPT-NEXT: vmflt.vv v12, v10, v8 -; VLOPT-NEXT: vmand.mm v0, v12, v0 -; VLOPT-NEXT: ret +; CHECK-LABEL: vmfgt_vv: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; CHECK-NEXT: vmflt.vv v12, v10, v8 +; CHECK-NEXT: vmand.mm v0, v12, v0 +; CHECK-NEXT: ret %1 = call <vscale x 4 x i1> @llvm.riscv.vmfgt.nxv4f32.nxv4f32(<vscale x 4 x float> %a, <vscale x 4 x float> %c, iXLen -1) %2 = call <vscale x 4 x i1> @llvm.riscv.vmand.nxv4i1(<vscale x 4 x i1> %1, <vscale x 4 x i1> %b, iXLen %vl) ret <vscale x 4 x i1> %2 } define <vscale x 4 x i32> @vmerge_vvm(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b, <vscale x 4 x i1> %c, iXLen %vl) { -; NOVLOPT-LABEL: vmerge_vvm: -; NOVLOPT: # %bb.0: -; NOVLOPT-NEXT: vsetvli a1, zero, e32, m2, ta, ma -; NOVLOPT-NEXT: vmerge.vvm v8, v8, v10, v0 -; NOVLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; NOVLOPT-NEXT: vadd.vv v8, v8, v10 -; NOVLOPT-NEXT: ret -; -; VLOPT-LABEL: vmerge_vvm: -; VLOPT: # %bb.0: -; VLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; VLOPT-NEXT: vmerge.vvm v8, v8, v10, v0 -; VLOPT-NEXT: vadd.vv v8, v8, v10 -; VLOPT-NEXT: ret +; CHECK-LABEL: vmerge_vvm: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; CHECK-NEXT: vmerge.vvm v8, v8, v10, v0 +; CHECK-NEXT: vadd.vv v8, v8, v10 +; CHECK-NEXT: ret %1 = call <vscale x 4 x i32> @llvm.riscv.vmerge.nxv4i32.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b, <vscale x 4 x i1> %c, iXLen -1) %2 = call <vscale x 4 x i32> @llvm.riscv.vadd.nxv4i32.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i32> %1, <vscale x 4 x i32> %b, iXLen %vl) ret <vscale x 4 x i32> %2 } define <vscale x 4 x i32> @vmerge_vxm(<vscale x 4 x i32> %a, i32 %b, <vscale x 4 x i1> %c, iXLen %vl) { -; NOVLOPT-LABEL: vmerge_vxm: -; NOVLOPT: # %bb.0: -; NOVLOPT-NEXT: vsetvli a2, zero, e32, m2, ta, ma -; NOVLOPT-NEXT: vmerge.vxm v8, v8, a0, v0 -; NOVLOPT-NEXT: vsetvli zero, a1, e32, m2, ta, ma -; NOVLOPT-NEXT: vadd.vv v8, v8, v8 -; NOVLOPT-NEXT: ret -; -; VLOPT-LABEL: vmerge_vxm: -; VLOPT: # %bb.0: -; VLOPT-NEXT: vsetvli zero, a1, e32, m2, ta, ma -; VLOPT-NEXT: vmerge.vxm v8, v8, a0, v0 -; VLOPT-NEXT: vadd.vv v8, v8, v8 -; VLOPT-NEXT: ret +; CHECK-LABEL: vmerge_vxm: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, ma +; CHECK-NEXT: vmerge.vxm v8, v8, a0, v0 +; CHECK-NEXT: vadd.vv v8, v8, v8 +; CHECK-NEXT: ret %1 = call <vscale x 4 x i32> @llvm.riscv.vmerge.nxv4i32.i32(<vscale x 4 x i32> poison, <vscale x 4 x i32> %a, i32 %b, <vscale x 4 x i1> %c, iXLen -1) %2 = call <vscale x 4 x i32> @llvm.riscv.vadd.nxv4i32.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i32> %1, <vscale x 4 x i32> %1, iXLen %vl) ret <vscale x 4 x i32> %2 } define <vscale x 4 x i32> @vmerge_vim(<vscale x 4 x i32> %a, <vscale x 4 x i1> %c, iXLen %vl) { -; NOVLOPT-LABEL: vmerge_vim: -; NOVLOPT: # %bb.0: -; NOVLOPT-NEXT: vsetvli a1, zero, e32, m2, ta, ma -; NOVLOPT-NEXT: vmerge.vim v8, v8, 9, v0 -; NOVLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; NOVLOPT-NEXT: vadd.vv v8, v8, v8 -; NOVLOPT-NEXT: ret -; -; VLOPT-LABEL: vmerge_vim: -; VLOPT: # %bb.0: -; VLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; VLOPT-NEXT: vmerge.vim v8, v8, 9, v0 -; VLOPT-NEXT: vadd.vv v8, v8, v8 -; VLOPT-NEXT: ret +; CHECK-LABEL: vmerge_vim: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; CHECK-NEXT: vmerge.vim v8, v8, 9, v0 +; CHECK-NEXT: vadd.vv v8, v8, v8 +; CHECK-NEXT: ret %1 = call <vscale x 4 x i32> @llvm.riscv.vmerge.nxv4i32.i32(<vscale x 4 x i32> poison, <vscale x 4 x i32> %a, i32 9, <vscale x 4 x i1> %c, iXLen -1) %2 = call <vscale x 4 x i32> @llvm.riscv.vadd.nxv4i32.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i32> %1, <vscale x 4 x i32> %1, iXLen %vl) ret <vscale x 4 x i32> %2 } define <vscale x 4 x i32> @vadc_vvm(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b, <vscale x 4 x i1> %c, iXLen %vl) { -; NOVLOPT-LABEL: vadc_vvm: -; NOVLOPT: # %bb.0: -; NOVLOPT-NEXT: vsetvli a1, zero, e32, m2, ta, ma -; NOVLOPT-NEXT: vadc.vvm v8, v8, v10, v0 -; NOVLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; NOVLOPT-NEXT: vadd.vv v8, v8, v10 -; NOVLOPT-NEXT: ret -; -; VLOPT-LABEL: vadc_vvm: -; VLOPT: # %bb.0: -; VLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; VLOPT-NEXT: vadc.vvm v8, v8, v10, v0 -; VLOPT-NEXT: vadd.vv v8, v8, v10 -; VLOPT-NEXT: ret +; CHECK-LABEL: vadc_vvm: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; CHECK-NEXT: vadc.vvm v8, v8, v10, v0 +; CHECK-NEXT: vadd.vv v8, v8, v10 +; CHECK-NEXT: ret %1 = call <vscale x 4 x i32> @llvm.riscv.vadc.nxv4i32.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b, <vscale x 4 x i1> %c, iXLen -1) %2 = call <vscale x 4 x i32> @llvm.riscv.vadd.nxv4i32.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i32> %1, <vscale x 4 x i32> %b, iXLen %vl) ret <vscale x 4 x i32> %2 } define <vscale x 4 x i32> @vadc_vxm(<vscale x 4 x i32> %a, i32 %b, <vscale x 4 x i1> %c, iXLen %vl) { -; NOVLOPT-LABEL: vadc_vxm: -; NOVLOPT: # %bb.0: -; NOVLOPT-NEXT: vsetvli a2, zero, e32, m2, ta, ma -; NOVLOPT-NEXT: vadc.vxm v8, v8, a0, v0 -; NOVLOPT-NEXT: vsetvli zero, a1, e32, m2, ta, ma -; NOVLOPT-NEXT: vadd.vv v8, v8, v8 -; NOVLOPT-NEXT: ret -; -; VLOPT-LABEL: vadc_vxm: -; VLOPT: # %bb.0: -; VLOPT-NEXT: vsetvli zero, a1, e32, m2, ta, ma -; VLOPT-NEXT: vadc.vxm v8, v8, a0, v0 -; VLOPT-NEXT: vadd.vv v8, v8, v8 -; VLOPT-NEXT: ret +; CHECK-LABEL: vadc_vxm: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, ma +; CHECK-NEXT: vadc.vxm v8, v8, a0, v0 +; CHECK-NEXT: vadd.vv v8, v8, v8 +; CHECK-NEXT: ret %1 = call <vscale x 4 x i32> @llvm.riscv.vadc.nxv4i32.i32(<vscale x 4 x i32> poison, <vscale x 4 x i32> %a, i32 %b, <vscale x 4 x i1> %c, iXLen -1) %2 = call <vscale x 4 x i32> @llvm.riscv.vadd.nxv4i32.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i32> %1, <vscale x 4 x i32> %1, iXLen %vl) ret <vscale x 4 x i32> %2 } define <vscale x 4 x i32> @vadc_vim(<vscale x 4 x i32> %a, <vscale x 4 x i1> %c, iXLen %vl) { -; NOVLOPT-LABEL: vadc_vim: -; NOVLOPT: # %bb.0: -; NOVLOPT-NEXT: vsetvli a1, zero, e32, m2, ta, ma -; NOVLOPT-NEXT: vadc.vim v8, v8, 9, v0 -; NOVLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; NOVLOPT-NEXT: vadd.vv v8, v8, v8 -; NOVLOPT-NEXT: ret -; -; VLOPT-LABEL: vadc_vim: -; VLOPT: # %bb.0: -; VLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; VLOPT-NEXT: vadc.vim v8, v8, 9, v0 -; VLOPT-NEXT: vadd.vv v8, v8, v8 -; VLOPT-NEXT: ret +; CHECK-LABEL: vadc_vim: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; CHECK-NEXT: vadc.vim v8, v8, 9, v0 +; CHECK-NEXT: vadd.vv v8, v8, v8 +; CHECK-NEXT: ret %1 = call <vscale x 4 x i32> @llvm.riscv.vadc.nxv4i32.i32(<vscale x 4 x i32> poison, <vscale x 4 x i32> %a, i32 9, <vscale x 4 x i1> %c, iXLen -1) %2 = call <vscale x 4 x i32> @llvm.riscv.vadd.nxv4i32.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i32> %1, <vscale x 4 x i32> %1, iXLen %vl) ret <vscale x 4 x i32> %2 } define <vscale x 4 x i32> @vaadd_vv(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b, iXLen %vl) { -; NOVLOPT-LABEL: vaadd_vv: -; NOVLOPT: # %bb.0: -; NOVLOPT-NEXT: csrwi vxrm, 0 -; NOVLOPT-NEXT: vsetvli a1, zero, e32, m2, ta, ma -; NOVLOPT-NEXT: vaadd.vv v8, v8, v10 -; NOVLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; NOVLOPT-NEXT: vadd.vv v8, v8, v10 -; NOVLOPT-NEXT: ret -; -; VLOPT-LABEL: vaadd_vv: -; VLOPT: # %bb.0: -; VLOPT-NEXT: csrwi vxrm, 0 -; VLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; VLOPT-NEXT: vaadd.vv v8, v8, v10 -; VLOPT-NEXT: vadd.vv v8, v8, v10 -; VLOPT-NEXT: ret +; CHECK-LABEL: vaadd_vv: +; CHECK: # %bb.0: +; CHECK-NEXT: csrwi vxrm, 0 +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; CHECK-NEXT: vaadd.vv v8, v8, v10 +; CHECK-NEXT: vadd.vv v8, v8, v10 +; CHECK-NEXT: ret %1 = call <vscale x 4 x i32> @llvm.riscv.vaadd.nxv4i32.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b, iXLen 0, iXLen -1) %2 = call <vscale x 4 x i32> @llvm.riscv.vadd.nxv4i32.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i32> %1, <vscale x 4 x i32> %b, iXLen %vl) ret <vscale x 4 x i32> %2 } define <vscale x 4 x i32> @vaadd_vx(<vscale x 4 x i32> %a, i32 %b, iXLen %vl) { -; NOVLOPT-LABEL: vaadd_vx: -; NOVLOPT: # %bb.0: -; NOVLOPT-NEXT: csrwi vxrm, 0 -; NOVLOPT-NEXT: vsetvli a2, zero, e32, m2, ta, ma -; NOVLOPT-NEXT: vaadd.vx v10, v8, a0 -; NOVLOPT-NEXT: vsetvli zero, a1, e32, m2, ta, ma -; NOVLOPT-NEXT: vadd.vv v8, v10, v8 -; NOVLOPT-NEXT: ret -; -; VLOPT-LABEL: vaadd_vx: -; VLOPT: # %bb.0: -; VLOPT-NEXT: csrwi vxrm, 0 -; VLOPT-NEXT: vsetvli zero, a1, e32, m2, ta, ma -; VLOPT-NEXT: vaadd.vx v10, v8, a0 -; VLOPT-NEXT: vadd.vv v8, v10, v8 -; VLOPT-NEXT: ret +; CHECK-LABEL: vaadd_vx: +; CHECK: # %bb.0: +; CHECK-NEXT: csrwi vxrm, 0 +; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, ma +; CHECK-NEXT: vaadd.vx v10, v8, a0 +; CHECK-NEXT: vadd.vv v8, v10, v8 +; CHECK-NEXT: ret %1 = call <vscale x 4 x i32> @llvm.riscv.vaadd.nxv4i32.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i32> %a, i32 %b, iXLen 0, iXLen -1) %2 = call <vscale x 4 x i32> @llvm.riscv.vadd.nxv4i32.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i32> %1, <vscale x 4 x i32> %a, iXLen %vl) ret <vscale x 4 x i32> %2 } define <vscale x 4 x i32> @vasub_vv(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b, iXLen %vl) { -; NOVLOPT-LABEL: vasub_vv: -; NOVLOPT: # %bb.0: -; NOVLOPT-NEXT: csrwi vxrm, 0 -; NOVLOPT-NEXT: vsetvli a1, zero, e32, m2, ta, ma -; NOVLOPT-NEXT: vasub.vv v8, v8, v10 -; NOVLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; NOVLOPT-NEXT: vadd.vv v8, v8, v10 -; NOVLOPT-NEXT: ret -; -; VLOPT-LABEL: vasub_vv: -; VLOPT: # %bb.0: -; VLOPT-NEXT: csrwi vxrm, 0 -; VLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; VLOPT-NEXT: vasub.vv v8, v8, v10 -; VLOPT-NEXT: vadd.vv v8, v8, v10 -; VLOPT-NEXT: ret +; CHECK-LABEL: vasub_vv: +; CHECK: # %bb.0: +; CHECK-NEXT: csrwi vxrm, 0 +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; CHECK-NEXT: vasub.vv v8, v8, v10 +; CHECK-NEXT: vadd.vv v8, v8, v10 +; CHECK-NEXT: ret %1 = call <vscale x 4 x i32> @llvm.riscv.vasub.nxv4i32.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b, iXLen 0, iXLen -1) %2 = call <vscale x 4 x i32> @llvm.riscv.vadd.nxv4i32.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i32> %1, <vscale x 4 x i32> %b, iXLen %vl) ret <vscale x 4 x i32> %2 } define <vscale x 4 x i32> @vasub_vx(<vscale x 4 x i32> %a, i32 %b, iXLen %vl) { -; NOVLOPT-LABEL: vasub_vx: -; NOVLOPT: # %bb.0: -; NOVLOPT-NEXT: csrwi vxrm, 0 -; NOVLOPT-NEXT: vsetvli a2, zero, e32, m2, ta, ma -; NOVLOPT-NEXT: vasub.vx v10, v8, a0 -; NOVLOPT-NEXT: vsetvli zero, a1, e32, m2, ta, ma -; NOVLOPT-NEXT: vadd.vv v8, v10, v8 -; NOVLOPT-NEXT: ret -; -; VLOPT-LABEL: vasub_vx: -; VLOPT: # %bb.0: -; VLOPT-NEXT: csrwi vxrm, 0 -; VLOPT-NEXT: vsetvli zero, a1, e32, m2, ta, ma -; VLOPT-NEXT: vasub.vx v10, v8, a0 -; VLOPT-NEXT: vadd.vv v8, v10, v8 -; VLOPT-NEXT: ret +; CHECK-LABEL: vasub_vx: +; CHECK: # %bb.0: +; CHECK-NEXT: csrwi vxrm, 0 +; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, ma +; CHECK-NEXT: vasub.vx v10, v8, a0 +; CHECK-NEXT: vadd.vv v8, v10, v8 +; CHECK-NEXT: ret %1 = call <vscale x 4 x i32> @llvm.riscv.vasub.nxv4i32.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i32> %a, i32 %b, iXLen 0, iXLen -1) %2 = call <vscale x 4 x i32> @llvm.riscv.vadd.nxv4i32.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i32> %1, <vscale x 4 x i32> %a, iXLen %vl) ret <vscale x 4 x i32> %2 } define <vscale x 4 x i32> @vaaddu_vv(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b, iXLen %vl) { -; NOVLOPT-LABEL: vaaddu_vv: -; NOVLOPT: # %bb.0: -; NOVLOPT-NEXT: csrwi vxrm, 0 -; NOVLOPT-NEXT: vsetvli a1, zero, e32, m2, ta, ma -; NOVLOPT-NEXT: vaaddu.vv v8, v8, v10 -; NOVLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; NOVLOPT-NEXT: vadd.vv v8, v8, v10 -; NOVLOPT-NEXT: ret -; -; VLOPT-LABEL: vaaddu_vv: -; VLOPT: # %bb.0: -; VLOPT-NEXT: csrwi vxrm, 0 -; VLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; VLOPT-NEXT: vaaddu.vv v8, v8, v10 -; VLOPT-NEXT: vadd.vv v8, v8, v10 -; VLOPT-NEXT: ret +; CHECK-LABEL: vaaddu_vv: +; CHECK: # %bb.0: +; CHECK-NEXT: csrwi vxrm, 0 +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; CHECK-NEXT: vaaddu.vv v8, v8, v10 +; CHECK-NEXT: vadd.vv v8, v8, v10 +; CHECK-NEXT: ret %1 = call <vscale x 4 x i32> @llvm.riscv.vaaddu.nxv4i32.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b, iXLen 0, iXLen -1) %2 = call <vscale x 4 x i32> @llvm.riscv.vadd.nxv4i32.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i32> %1, <vscale x 4 x i32> %b, iXLen %vl) ret <vscale x 4 x i32> %2 } define <vscale x 4 x i32> @vaaddu_vx(<vscale x 4 x i32> %a, i32 %b, iXLen %vl) { -; NOVLOPT-LABEL: vaaddu_vx: -; NOVLOPT: # %bb.0: -; NOVLOPT-NEXT: csrwi vxrm, 0 -; NOVLOPT-NEXT: vsetvli a2, zero, e32, m2, ta, ma -; NOVLOPT-NEXT: vaaddu.vx v10, v8, a0 -; NOVLOPT-NEXT: vsetvli zero, a1, e32, m2, ta, ma -; NOVLOPT-NEXT: vadd.vv v8, v10, v8 -; NOVLOPT-NEXT: ret -; -; VLOPT-LABEL: vaaddu_vx: -; VLOPT: # %bb.0: -; VLOPT-NEXT: csrwi vxrm, 0 -; VLOPT-NEXT: vsetvli zero, a1, e32, m2, ta, ma -; VLOPT-NEXT: vaaddu.vx v10, v8, a0 -; VLOPT-NEXT: vadd.vv v8, v10, v8 -; VLOPT-NEXT: ret +; CHECK-LABEL: vaaddu_vx: +; CHECK: # %bb.0: +; CHECK-NEXT: csrwi vxrm, 0 +; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, ma +; CHECK-NEXT: vaaddu.vx v10, v8, a0 +; CHECK-NEXT: vadd.vv v8, v10, v8 +; CHECK-NEXT: ret %1 = call <vscale x 4 x i32> @llvm.riscv.vaaddu.nxv4i32.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i32> %a, i32 %b, iXLen 0, iXLen -1) %2 = call <vscale x 4 x i32> @llvm.riscv.vadd.nxv4i32.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i32> %1, <vscale x 4 x i32> %a, iXLen %vl) ret <vscale x 4 x i32> %2 } define <vscale x 4 x i32> @vasubu_vv(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b, iXLen %vl) { -; NOVLOPT-LABEL: vasubu_vv: -; NOVLOPT: # %bb.0: -; NOVLOPT-NEXT: csrwi vxrm, 0 -; NOVLOPT-NEXT: vsetvli a1, zero, e32, m2, ta, ma -; NOVLOPT-NEXT: vasubu.vv v8, v8, v10 -; NOVLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; NOVLOPT-NEXT: vadd.vv v8, v8, v10 -; NOVLOPT-NEXT: ret -; -; VLOPT-LABEL: vasubu_vv: -; VLOPT: # %bb.0: -; VLOPT-NEXT: csrwi vxrm, 0 -; VLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; VLOPT-NEXT: vasubu.vv v8, v8, v10 -; VLOPT-NEXT: vadd.vv v8, v8, v10 -; VLOPT-NEXT: ret +; CHECK-LABEL: vasubu_vv: +; CHECK: # %bb.0: +; CHECK-NEXT: csrwi vxrm, 0 +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; CHECK-NEXT: vasubu.vv v8, v8, v10 +; CHECK-NEXT: vadd.vv v8, v8, v10 +; CHECK-NEXT: ret %1 = call <vscale x 4 x i32> @llvm.riscv.vasubu.nxv4i32.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b, iXLen 0, iXLen -1) %2 = call <vscale x 4 x i32> @llvm.riscv.vadd.nxv4i32.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i32> %1, <vscale x 4 x i32> %b, iXLen %vl) ret <vscale x 4 x i32> %2 } define <vscale x 4 x i32> @vasubu_vx(<vscale x 4 x i32> %a, i32 %b, iXLen %vl) { -; NOVLOPT-LABEL: vasubu_vx: -; NOVLOPT: # %bb.0: -; NOVLOPT-NEXT: csrwi vxrm, 0 -; NOVLOPT-NEXT: vsetvli a2, zero, e32, m2, ta, ma -; NOVLOPT-NEXT: vasubu.vx v10, v8, a0 -; NOVLOPT-NEXT: vsetvli zero, a1, e32, m2, ta, ma -; NOVLOPT-NEXT: vadd.vv v8, v10, v8 -; NOVLOPT-NEXT: ret -; -; VLOPT-LABEL: vasubu_vx: -; VLOPT: # %bb.0: -; VLOPT-NEXT: csrwi vxrm, 0 -; VLOPT-NEXT: vsetvli zero, a1, e32, m2, ta, ma -; VLOPT-NEXT: vasubu.vx v10, v8, a0 -; VLOPT-NEXT: vadd.vv v8, v10, v8 -; VLOPT-NEXT: ret +; CHECK-LABEL: vasubu_vx: +; CHECK: # %bb.0: +; CHECK-NEXT: csrwi vxrm, 0 +; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, ma +; CHECK-NEXT: vasubu.vx v10, v8, a0 +; CHECK-NEXT: vadd.vv v8, v10, v8 +; CHECK-NEXT: ret %1 = call <vscale x 4 x i32> @llvm.riscv.vasubu.nxv4i32.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i32> %a, i32 %b, iXLen 0, iXLen -1) %2 = call <vscale x 4 x i32> @llvm.riscv.vadd.nxv4i32.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i32> %1, <vscale x 4 x i32> %a, iXLen %vl) ret <vscale x 4 x i32> %2 } define <vscale x 4 x float> @vfmax_vv(<vscale x 4 x float> %a, <vscale x 4 x float> %b, iXLen %vl) { -; NOVLOPT-LABEL: vfmax_vv: -; NOVLOPT: # %bb.0: -; NOVLOPT-NEXT: vsetvli a1, zero, e32, m2, ta, ma -; NOVLOPT-NEXT: vfmax.vv v8, v8, v10 -; NOVLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; NOVLOPT-NEXT: vfadd.vv v8, v8, v10 -; NOVLOPT-NEXT: ret -; -; VLOPT-LABEL: vfmax_vv: -; VLOPT: # %bb.0: -; VLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; VLOPT-NEXT: vfmax.vv v8, v8, v10 -; VLOPT-NEXT: vfadd.vv v8, v8, v10 -; VLOPT-NEXT: ret +; CHECK-LABEL: vfmax_vv: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; CHECK-NEXT: vfmax.vv v8, v8, v10 +; CHECK-NEXT: vfadd.vv v8, v8, v10 +; CHECK-NEXT: ret %1 = call <vscale x 4 x float> @llvm.riscv.vfmax.nxv4f32.nxv4f32(<vscale x 4 x float> poison, <vscale x 4 x float> %a, <vscale x 4 x float> %b, iXLen -1) %2 = call <vscale x 4 x float> @llvm.riscv.vfadd.nxv4f32.nxv4f32(<vscale x 4 x float> poison, <vscale x 4 x float> %1, <vscale x 4 x float> %b, iXLen 7, iXLen %vl) ret <vscale x 4 x float> %2 } define <vscale x 4 x float> @vfmax_vf(<vscale x 4 x float> %a, float %b, iXLen %vl) { -; NOVLOPT-LABEL: vfmax_vf: -; NOVLOPT: # %bb.0: -; NOVLOPT-NEXT: vsetvli a1, zero, e32, m2, ta, ma -; NOVLOPT-NEXT: vfmax.vf v10, v8, fa0 -; NOVLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; NOVLOPT-NEXT: vfadd.vv v8, v10, v8 -; NOVLOPT-NEXT: ret -; -; VLOPT-LABEL: vfmax_vf: -; VLOPT: # %bb.0: -; VLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; VLOPT-NEXT: vfmax.vf v10, v8, fa0 -; VLOPT-NEXT: vfadd.vv v8, v10, v8 -; VLOPT-NEXT: ret +; CHECK-LABEL: vfmax_vf: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; CHECK-NEXT: vfmax.vf v10, v8, fa0 +; CHECK-NEXT: vfadd.vv v8, v10, v8 +; CHECK-NEXT: ret %1 = call <vscale x 4 x float> @llvm.riscv.vfmax.nxv4f32.f32(<vscale x 4 x float> poison, <vscale x 4 x float> %a, float %b, iXLen -1) %2 = call <vscale x 4 x float> @llvm.riscv.vfadd.nxv4f32.nxv4f32(<vscale x 4 x float> poison, <vscale x 4 x float> %1, <vscale x 4 x float> %a, iXLen 7, iXLen %vl) ret <vscale x 4 x float> %2 } define <vscale x 4 x float> @vfmin_vv(<vscale x 4 x float> %a, <vscale x 4 x float> %b, iXLen %vl) { -; NOVLOPT-LABEL: vfmin_vv: -; NOVLOPT: # %bb.0: -; NOVLOPT-NEXT: vsetvli a1, zero, e32, m2, ta, ma -; NOVLOPT-NEXT: vfmin.vv v8, v8, v10 -; NOVLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; NOVLOPT-NEXT: vfadd.vv v8, v8, v10 -; NOVLOPT-NEXT: ret -; -; VLOPT-LABEL: vfmin_vv: -; VLOPT: # %bb.0: -; VLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; VLOPT-NEXT: vfmin.vv v8, v8, v10 -; VLOPT-NEXT: vfadd.vv v8, v8, v10 -; VLOPT-NEXT: ret +; CHECK-LABEL: vfmin_vv: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; CHECK-NEXT: vfmin.vv v8, v8, v10 +; CHECK-NEXT: vfadd.vv v8, v8, v10 +; CHECK-NEXT: ret %1 = call <vscale x 4 x float> @llvm.riscv.vfmin.nxv4f32.nxv4f32(<vscale x 4 x float> poison, <vscale x 4 x float> %a, <vscale x 4 x float> %b, iXLen -1) %2 = call <vscale x 4 x float> @llvm.riscv.vfadd.nxv4f32.nxv4f32(<vscale x 4 x float> poison, <vscale x 4 x float> %1, <vscale x 4 x float> %b, iXLen 7, iXLen %vl) ret <vscale x 4 x float> %2 } define <vscale x 4 x float> @vfmin_vf(<vscale x 4 x float> %a, float %b, iXLen %vl) { -; NOVLOPT-LABEL: vfmin_vf: -; NOVLOPT: # %bb.0: -; NOVLOPT-NEXT: vsetvli a1, zero, e32, m2, ta, ma -; NOVLOPT-NEXT: vfmin.vf v10, v8, fa0 -; NOVLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; NOVLOPT-NEXT: vfadd.vv v8, v10, v8 -; NOVLOPT-NEXT: ret -; -; VLOPT-LABEL: vfmin_vf: -; VLOPT: # %bb.0: -; VLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; VLOPT-NEXT: vfmin.vf v10, v8, fa0 -; VLOPT-NEXT: vfadd.vv v8, v10, v8 -; VLOPT-NEXT: ret +; CHECK-LABEL: vfmin_vf: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; CHECK-NEXT: vfmin.vf v10, v8, fa0 +; CHECK-NEXT: vfadd.vv v8, v10, v8 +; CHECK-NEXT: ret %1 = call <vscale x 4 x float> @llvm.riscv.vfmin.nxv4f32.f32(<vscale x 4 x float> poison, <vscale x 4 x float> %a, float %b, iXLen -1) %2 = call <vscale x 4 x float> @llvm.riscv.vfadd.nxv4f32.nxv4f32(<vscale x 4 x float> poison, <vscale x 4 x float> %1, <vscale x 4 x float> %a, iXLen 7, iXLen %vl) ret <vscale x 4 x float> %2 } define <vscale x 4 x float> @vfsgnj_vv(<vscale x 4 x float> %a, <vscale x 4 x float> %b, iXLen %vl) { -; NOVLOPT-LABEL: vfsgnj_vv: -; NOVLOPT: # %bb.0: -; NOVLOPT-NEXT: vsetvli a1, zero, e32, m2, ta, ma -; NOVLOPT-NEXT: vfsgnj.vv v8, v8, v10 -; NOVLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; NOVLOPT-NEXT: vfadd.vv v8, v8, v10 -; NOVLOPT-NEXT: ret -; -; VLOPT-LABEL: vfsgnj_vv: -; VLOPT: # %bb.0: -; VLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; VLOPT-NEXT: vfsgnj.vv v8, v8, v10 -; VLOPT-NEXT: vfadd.vv v8, v8, v10 -; VLOPT-NEXT: ret +; CHECK-LABEL: vfsgnj_vv: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; CHECK-NEXT: vfsgnj.vv v8, v8, v10 +; CHECK-NEXT: vfadd.vv v8, v8, v10 +; CHECK-NEXT: ret %1 = call <vscale x 4 x float> @llvm.riscv.vfsgnj.nxv4f32.nxv4f32(<vscale x 4 x float> poison, <vscale x 4 x float> %a, <vscale x 4 x float> %b, iXLen -1) %2 = call <vscale x 4 x float> @llvm.riscv.vfadd.nxv4f32.nxv4f32(<vscale x 4 x float> poison, <vscale x 4 x float> %1, <vscale x 4 x float> %b, iXLen 7, iXLen %vl) ret <vscale x 4 x float> %2 } define <vscale x 4 x float> @vfsgnj_vf(<vscale x 4 x float> %a, float %b, iXLen %vl) { -; NOVLOPT-LABEL: vfsgnj_vf: -; NOVLOPT: # %bb.0: -; NOVLOPT-NEXT: vsetvli a1, zero, e32, m2, ta, ma -; NOVLOPT-NEXT: vfsgnj.vf v10, v8, fa0 -; NOVLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; NOVLOPT-NEXT: vfadd.vv v8, v10, v8 -; NOVLOPT-NEXT: ret -; -; VLOPT-LABEL: vfsgnj_vf: -; VLOPT: # %bb.0: -; VLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; VLOPT-NEXT: vfsgnj.vf v10, v8, fa0 -; VLOPT-NEXT: vfadd.vv v8, v10, v8 -; VLOPT-NEXT: ret +; CHECK-LABEL: vfsgnj_vf: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; CHECK-NEXT: vfsgnj.vf v10, v8, fa0 +; CHECK-NEXT: vfadd.vv v8, v10, v8 +; CHECK-NEXT: ret %1 = call <vscale x 4 x float> @llvm.riscv.vfsgnj.nxv4f32.nxv4f32(<vscale x 4 x float> poison, <vscale x 4 x float> %a, float %b, iXLen -1) %2 = call <vscale x 4 x float> @llvm.riscv.vfadd.nxv4f32.nxv4f32(<vscale x 4 x float> poison, <vscale x 4 x float> %1, <vscale x 4 x float> %a, iXLen 7, iXLen %vl) ret <vscale x 4 x float> %2 } define <vscale x 4 x float> @vfsgnjn_vv(<vscale x 4 x float> %a, <vscale x 4 x float> %b, iXLen %vl) { -; NOVLOPT-LABEL: vfsgnjn_vv: -; NOVLOPT: # %bb.0: -; NOVLOPT-NEXT: vsetvli a1, zero, e32, m2, ta, ma -; NOVLOPT-NEXT: vfsgnjn.vv v8, v8, v10 -; NOVLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; NOVLOPT-NEXT: vfadd.vv v8, v8, v10 -; NOVLOPT-NEXT: ret -; -; VLOPT-LABEL: vfsgnjn_vv: -; VLOPT: # %bb.0: -; VLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; VLOPT-NEXT: vfsgnjn.vv v8, v8, v10 -; VLOPT-NEXT: vfadd.vv v8, v8, v10 -; VLOPT-NEXT: ret +; CHECK-LABEL: vfsgnjn_vv: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; CHECK-NEXT: vfsgnjn.vv v8, v8, v10 +; CHECK-NEXT: vfadd.vv v8, v8, v10 +; CHECK-NEXT: ret %1 = call <vscale x 4 x float> @llvm.riscv.vfsgnjn.nxv4f32.nxv4f32(<vscale x 4 x float> poison, <vscale x 4 x float> %a, <vscale x 4 x float> %b, iXLen -1) %2 = call <vscale x 4 x float> @llvm.riscv.vfadd.nxv4f32.nxv4f32(<vscale x 4 x float> poison, <vscale x 4 x float> %1, <vscale x 4 x float> %b, iXLen 7, iXLen %vl) ret <vscale x 4 x float> %2 } define <vscale x 4 x float> @vfsgnjn_vf(<vscale x 4 x float> %a, float %b, iXLen %vl) { -; NOVLOPT-LABEL: vfsgnjn_vf: -; NOVLOPT: # %bb.0: -; NOVLOPT-NEXT: vsetvli a1, zero, e32, m2, ta, ma -; NOVLOPT-NEXT: vfsgnjn.vf v10, v8, fa0 -; NOVLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; NOVLOPT-NEXT: vfadd.vv v8, v10, v8 -; NOVLOPT-NEXT: ret -; -; VLOPT-LABEL: vfsgnjn_vf: -; VLOPT: # %bb.0: -; VLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; VLOPT-NEXT: vfsgnjn.vf v10, v8, fa0 -; VLOPT-NEXT: vfadd.vv v8, v10, v8 -; VLOPT-NEXT: ret +; CHECK-LABEL: vfsgnjn_vf: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; CHECK-NEXT: vfsgnjn.vf v10, v8, fa0 +; CHECK-NEXT: vfadd.vv v8, v10, v8 +; CHECK-NEXT: ret %1 = call <vscale x 4 x float> @llvm.riscv.vfsgnjn.nxv4f32.nxv4f32(<vscale x 4 x float> poison, <vscale x 4 x float> %a, float %b, iXLen -1) %2 = call <vscale x 4 x float> @llvm.riscv.vfadd.nxv4f32.nxv4f32(<vscale x 4 x float> poison, <vscale x 4 x float> %1, <vscale x 4 x float> %a, iXLen 7, iXLen %vl) ret <vscale x 4 x float> %2 } define <vscale x 4 x float> @vfsgnjx_vv(<vscale x 4 x float> %a, <vscale x 4 x float> %b, iXLen %vl) { -; NOVLOPT-LABEL: vfsgnjx_vv: -; NOVLOPT: # %bb.0: -; NOVLOPT-NEXT: vsetvli a1, zero, e32, m2, ta, ma -; NOVLOPT-NEXT: vfsgnjx.vv v8, v8, v10 -; NOVLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; NOVLOPT-NEXT: vfadd.vv v8, v8, v10 -; NOVLOPT-NEXT: ret -; -; VLOPT-LABEL: vfsgnjx_vv: -; VLOPT: # %bb.0: -; VLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; VLOPT-NEXT: vfsgnjx.vv v8, v8, v10 -; VLOPT-NEXT: vfadd.vv v8, v8, v10 -; VLOPT-NEXT: ret +; CHECK-LABEL: vfsgnjx_vv: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; CHECK-NEXT: vfsgnjx.vv v8, v8, v10 +; CHECK-NEXT: vfadd.vv v8, v8, v10 +; CHECK-NEXT: ret %1 = call <vscale x 4 x float> @llvm.riscv.vfsgnjx.nxv4f32.nxv4f32(<vscale x 4 x float> poison, <vscale x 4 x float> %a, <vscale x 4 x float> %b, iXLen -1) %2 = call <vscale x 4 x float> @llvm.riscv.vfadd.nxv4f32.nxv4f32(<vscale x 4 x float> poison, <vscale x 4 x float> %1, <vscale x 4 x float> %b, iXLen 7, iXLen %vl) ret <vscale x 4 x float> %2 } define <vscale x 4 x float> @vfsgnjx_vf(<vscale x 4 x float> %a, float %b, iXLen %vl) { -; NOVLOPT-LABEL: vfsgnjx_vf: -; NOVLOPT: # %bb.0: -; NOVLOPT-NEXT: vsetvli a1, zero, e32, m2, ta, ma -; NOVLOPT-NEXT: vfsgnjx.vf v10, v8, fa0 -; NOVLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; NOVLOPT-NEXT: vfadd.vv v8, v10, v8 -; NOVLOPT-NEXT: ret -; -; VLOPT-LABEL: vfsgnjx_vf: -; VLOPT: # %bb.0: -; VLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; VLOPT-NEXT: vfsgnjx.vf v10, v8, fa0 -; VLOPT-NEXT: vfadd.vv v8, v10, v8 -; VLOPT-NEXT: ret +; CHECK-LABEL: vfsgnjx_vf: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; CHECK-NEXT: vfsgnjx.vf v10, v8, fa0 +; CHECK-NEXT: vfadd.vv v8, v10, v8 +; CHECK-NEXT: ret %1 = call <vscale x 4 x float> @llvm.riscv.vfsgnjx.nxv4f32.nxv4f32(<vscale x 4 x float> poison, <vscale x 4 x float> %a, float %b, iXLen -1) %2 = call <vscale x 4 x float> @llvm.riscv.vfadd.nxv4f32.nxv4f32(<vscale x 4 x float> poison, <vscale x 4 x float> %1, <vscale x 4 x float> %a, iXLen 7, iXLen %vl) ret <vscale x 4 x float> %2 } define <vscale x 4 x float> @vfmerge_vf(<vscale x 4 x float> %a, float %b, <vscale x 4 x i1> %c, iXLen %vl) { -; NOVLOPT-LABEL: vfmerge_vf: -; NOVLOPT: # %bb.0: -; NOVLOPT-NEXT: vsetvli a1, zero, e32, m2, ta, ma -; NOVLOPT-NEXT: vfmerge.vfm v10, v8, fa0, v0 -; NOVLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; NOVLOPT-NEXT: vfadd.vv v8, v10, v8 -; NOVLOPT-NEXT: ret -; -; VLOPT-LABEL: vfmerge_vf: -; VLOPT: # %bb.0: -; VLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; VLOPT-NEXT: vfmerge.vfm v10, v8, fa0, v0 -; VLOPT-NEXT: vfadd.vv v8, v10, v8 -; VLOPT-NEXT: ret +; CHECK-LABEL: vfmerge_vf: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; CHECK-NEXT: vfmerge.vfm v10, v8, fa0, v0 +; CHECK-NEXT: vfadd.vv v8, v10, v8 +; CHECK-NEXT: ret %1 = call <vscale x 4 x float> @llvm.riscv.vfmerge(<vscale x 4 x float> poison, <vscale x 4 x float> %a, float %b, <vscale x 4 x i1> %c, iXLen -1) %2 = call <vscale x 4 x float> @llvm.riscv.vfadd.nxv4f32.nxv4f32(<vscale x 4 x float> poison, <vscale x 4 x float> %1, <vscale x 4 x float> %a, iXLen 7, iXLen %vl) ret <vscale x 4 x float> %2 } define <vscale x 4 x float> @vfmv_v_f(<vscale x 4 x float> %a, float %b, iXLen %vl) { -; NOVLOPT-LABEL: vfmv_v_f: -; NOVLOPT: # %bb.0: -; NOVLOPT-NEXT: vsetvli a1, zero, e32, m2, ta, ma -; NOVLOPT-NEXT: vfmv.v.f v10, fa0 -; NOVLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; NOVLOPT-NEXT: vfadd.vv v8, v10, v8 -; NOVLOPT-NEXT: ret -; -; VLOPT-LABEL: vfmv_v_f: -; VLOPT: # %bb.0: -; VLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; VLOPT-NEXT: vfmv.v.f v10, fa0 -; VLOPT-NEXT: vfadd.vv v8, v10, v8 -; VLOPT-NEXT: ret +; CHECK-LABEL: vfmv_v_f: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; CHECK-NEXT: vfmv.v.f v10, fa0 +; CHECK-NEXT: vfadd.vv v8, v10, v8 +; CHECK-NEXT: ret %1 = call <vscale x 4 x float> @llvm.riscv.vfmv.v.f(<vscale x 4 x float> poison, float %b, iXLen -1) %2 = call <vscale x 4 x float> @llvm.riscv.vfadd.nxv4f32.nxv4f32(<vscale x 4 x float> poison, <vscale x 4 x float> %1, <vscale x 4 x float> %a, iXLen 7, iXLen %vl) ret <vscale x 4 x float> %2 } define <vscale x 4 x float> @vfmacc_vv(<vscale x 4 x float> %a, <vscale x 4 x float> %b, <vscale x 4 x float> %c, iXLen %vl) { -; NOVLOPT-LABEL: vfmacc_vv: -; NOVLOPT: # %bb.0: -; NOVLOPT-NEXT: vsetvli a1, zero, e32, m2, ta, ma -; NOVLOPT-NEXT: vfmacc.vv v8, v12, v10 -; NOVLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; NOVLOPT-NEXT: vfadd.vv v8, v8, v12 -; NOVLOPT-NEXT: ret -; -; VLOPT-LABEL: vfmacc_vv: -; VLOPT: # %bb.0: -; VLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; VLOPT-NEXT: vfmacc.vv v8, v12, v10 -; VLOPT-NEXT: vfadd.vv v8, v8, v12 -; VLOPT-NEXT: ret +; CHECK-LABEL: vfmacc_vv: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; CHECK-NEXT: vfmacc.vv v8, v12, v10 +; CHECK-NEXT: vfadd.vv v8, v8, v12 +; CHECK-NEXT: ret %1 = call <vscale x 4 x float> @llvm.riscv.vfmacc(<vscale x 4 x float> %a, <vscale x 4 x float> %b, <vscale x 4 x float> %c, iXLen 7, iXLen -1, iXLen 3) %2 = call <vscale x 4 x float> @llvm.riscv.vfadd(<vscale x 4 x float> poison, <vscale x 4 x float> %1, <vscale x 4 x float> %c, iXLen 7, iXLen %vl) ret <vscale x 4 x float> %2 } define <vscale x 4 x float> @vfmacc_vf(<vscale x 4 x float> %a, float %b, <vscale x 4 x float> %c, iXLen %vl) { -; NOVLOPT-LABEL: vfmacc_vf: -; NOVLOPT: # %bb.0: -; NOVLOPT-NEXT: vsetvli a1, zero, e32, m2, ta, ma -; NOVLOPT-NEXT: vfmacc.vf v8, fa0, v10 -; NOVLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; NOVLOPT-NEXT: vfadd.vv v8, v8, v10 -; NOVLOPT-NEXT: ret -; -; VLOPT-LABEL: vfmacc_vf: -; VLOPT: # %bb.0: -; VLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; VLOPT-NEXT: vfmacc.vf v8, fa0, v10 -; VLOPT-NEXT: vfadd.vv v8, v8, v10 -; VLOPT-NEXT: ret +; CHECK-LABEL: vfmacc_vf: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; CHECK-NEXT: vfmacc.vf v8, fa0, v10 +; CHECK-NEXT: vfadd.vv v8, v8, v10 +; CHECK-NEXT: ret %1 = call <vscale x 4 x float> @llvm.riscv.vfmacc(<vscale x 4 x float> %a, float %b, <vscale x 4 x float> %c, iXLen 7, iXLen -1, iXLen 3) %2 = call <vscale x 4 x float> @llvm.riscv.vfadd(<vscale x 4 x float> poison, <vscale x 4 x float> %1, <vscale x 4 x float> %c, iXLen 7, iXLen %vl) ret <vscale x 4 x float> %2 } define <vscale x 4 x float> @vfnmacc_vv(<vscale x 4 x float> %a, <vscale x 4 x float> %b, <vscale x 4 x float> %c, iXLen %vl) { -; NOVLOPT-LABEL: vfnmacc_vv: -; NOVLOPT: # %bb.0: -; NOVLOPT-NEXT: vsetvli a1, zero, e32, m2, ta, ma -; NOVLOPT-NEXT: vfnmacc.vv v8, v12, v10 -; NOVLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; NOVLOPT-NEXT: vfadd.vv v8, v8, v12 -; NOVLOPT-NEXT: ret -; -; VLOPT-LABEL: vfnmacc_vv: -; VLOPT: # %bb.0: -; VLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; VLOPT-NEXT: vfnmacc.vv v8, v12, v10 -; VLOPT-NEXT: vfadd.vv v8, v8, v12 -; VLOPT-NEXT: ret +; CHECK-LABEL: vfnmacc_vv: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; CHECK-NEXT: vfnmacc.vv v8, v12, v10 +; CHECK-NEXT: vfadd.vv v8, v8, v12 +; CHECK-NEXT: ret %1 = call <vscale x 4 x float> @llvm.riscv.vfnmacc(<vscale x 4 x float> %a, <vscale x 4 x float> %b, <vscale x 4 x float> %c, iXLen 7, iXLen -1, iXLen 3) %2 = call <vscale x 4 x float> @llvm.riscv.vfadd(<vscale x 4 x float> poison, <vscale x 4 x float> %1, <vscale x 4 x float> %c, iXLen 7, iXLen %vl) ret <vscale x 4 x float> %2 } define <vscale x 4 x float> @vfnmacc_vf(<vscale x 4 x float> %a, float %b, <vscale x 4 x float> %c, iXLen %vl) { -; NOVLOPT-LABEL: vfnmacc_vf: -; NOVLOPT: # %bb.0: -; NOVLOPT-NEXT: vsetvli a1, zero, e32, m2, ta, ma -; NOVLOPT-NEXT: vfnmacc.vf v8, fa0, v10 -; NOVLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; NOVLOPT-NEXT: vfadd.vv v8, v8, v10 -; NOVLOPT-NEXT: ret -; -; VLOPT-LABEL: vfnmacc_vf: -; VLOPT: # %bb.0: -; VLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; VLOPT-NEXT: vfnmacc.vf v8, fa0, v10 -; VLOPT-NEXT: vfadd.vv v8, v8, v10 -; VLOPT-NEXT: ret +; CHECK-LABEL: vfnmacc_vf: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; CHECK-NEXT: vfnmacc.vf v8, fa0, v10 +; CHECK-NEXT: vfadd.vv v8, v8, v10 +; CHECK-NEXT: ret %1 = call <vscale x 4 x float> @llvm.riscv.vfnmacc(<vscale x 4 x float> %a, float %b, <vscale x 4 x float> %c, iXLen 7, iXLen -1, iXLen 3) %2 = call <vscale x 4 x float> @llvm.riscv.vfadd(<vscale x 4 x float> poison, <vscale x 4 x float> %1, <vscale x 4 x float> %c, iXLen 7, iXLen %vl) ret <vscale x 4 x float> %2 } define <vscale x 4 x float> @vfmsac_vv(<vscale x 4 x float> %a, <vscale x 4 x float> %b, <vscale x 4 x float> %c, iXLen %vl) { -; NOVLOPT-LABEL: vfmsac_vv: -; NOVLOPT: # %bb.0: -; NOVLOPT-NEXT: vsetvli a1, zero, e32, m2, ta, ma -; NOVLOPT-NEXT: vfmsac.vv v8, v12, v10 -; NOVLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; NOVLOPT-NEXT: vfadd.vv v8, v8, v12 -; NOVLOPT-NEXT: ret -; -; VLOPT-LABEL: vfmsac_vv: -; VLOPT: # %bb.0: -; VLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; VLOPT-NEXT: vfmsac.vv v8, v12, v10 -; VLOPT-NEXT: vfadd.vv v8, v8, v12 -; VLOPT-NEXT: ret +; CHECK-LABEL: vfmsac_vv: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; CHECK-NEXT: vfmsac.vv v8, v12, v10 +; CHECK-NEXT: vfadd.vv v8, v8, v12 +; CHECK-NEXT: ret %1 = call <vscale x 4 x float> @llvm.riscv.vfmsac(<vscale x 4 x float> %a, <vscale x 4 x float> %b, <vscale x 4 x float> %c, iXLen 7, iXLen -1, iXLen 3) %2 = call <vscale x 4 x float> @llvm.riscv.vfadd(<vscale x 4 x float> poison, <vscale x 4 x float> %1, <vscale x 4 x float> %c, iXLen 7, iXLen %vl) ret <vscale x 4 x float> %2 } define <vscale x 4 x float> @vfmsac_vf(<vscale x 4 x float> %a, float %b, <vscale x 4 x float> %c, iXLen %vl) { -; NOVLOPT-LABEL: vfmsac_vf: -; NOVLOPT: # %bb.0: -; NOVLOPT-NEXT: vsetvli a1, zero, e32, m2, ta, ma -; NOVLOPT-NEXT: vfmsac.vf v8, fa0, v10 -; NOVLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; NOVLOPT-NEXT: vfadd.vv v8, v8, v10 -; NOVLOPT-NEXT: ret -; -; VLOPT-LABEL: vfmsac_vf: -; VLOPT: # %bb.0: -; VLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; VLOPT-NEXT: vfmsac.vf v8, fa0, v10 -; VLOPT-NEXT: vfadd.vv v8, v8, v10 -; VLOPT-NEXT: ret +; CHECK-LABEL: vfmsac_vf: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; CHECK-NEXT: vfmsac.vf v8, fa0, v10 +; CHECK-NEXT: vfadd.vv v8, v8, v10 +; CHECK-NEXT: ret %1 = call <vscale x 4 x float> @llvm.riscv.vfmsac(<vscale x 4 x float> %a, float %b, <vscale x 4 x float> %c, iXLen 7, iXLen -1, iXLen 3) %2 = call <vscale x 4 x float> @llvm.riscv.vfadd(<vscale x 4 x float> poison, <vscale x 4 x float> %1, <vscale x 4 x float> %c, iXLen 7, iXLen %vl) ret <vscale x 4 x float> %2 } define <vscale x 4 x float> @vfnmsac_vv(<vscale x 4 x float> %a, <vscale x 4 x float> %b, <vscale x 4 x float> %c, iXLen %vl) { -; NOVLOPT-LABEL: vfnmsac_vv: -; NOVLOPT: # %bb.0: -; NOVLOPT-NEXT: vsetvli a1, zero, e32, m2, ta, ma -; NOVLOPT-NEXT: vfnmsac.vv v8, v12, v10 -; NOVLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; NOVLOPT-NEXT: vfadd.vv v8, v8, v12 -; NOVLOPT-NEXT: ret -; -; VLOPT-LABEL: vfnmsac_vv: -; VLOPT: # %bb.0: -; VLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; VLOPT-NEXT: vfnmsac.vv v8, v12, v10 -; VLOPT-NEXT: vfadd.vv v8, v8, v12 -; VLOPT-NEXT: ret +; CHECK-LABEL: vfnmsac_vv: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; CHECK-NEXT: vfnmsac.vv v8, v12, v10 +; CHECK-NEXT: vfadd.vv v8, v8, v12 +; CHECK-NEXT: ret %1 = call <vscale x 4 x float> @llvm.riscv.vfnmsac(<vscale x 4 x float> %a, <vscale x 4 x float> %b, <vscale x 4 x float> %c, iXLen 7, iXLen -1, iXLen 3) %2 = call <vscale x 4 x float> @llvm.riscv.vfadd(<vscale x 4 x float> poison, <vscale x 4 x float> %1, <vscale x 4 x float> %c, iXLen 7, iXLen %vl) ret <vscale x 4 x float> %2 } define <vscale x 4 x float> @vfnmsac_vf(<vscale x 4 x float> %a, float %b, <vscale x 4 x float> %c, iXLen %vl) { -; NOVLOPT-LABEL: vfnmsac_vf: -; NOVLOPT: # %bb.0: -; NOVLOPT-NEXT: vsetvli a1, zero, e32, m2, ta, ma -; NOVLOPT-NEXT: vfnmsac.vf v8, fa0, v10 -; NOVLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; NOVLOPT-NEXT: vfadd.vv v8, v8, v10 -; NOVLOPT-NEXT: ret -; -; VLOPT-LABEL: vfnmsac_vf: -; VLOPT: # %bb.0: -; VLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; VLOPT-NEXT: vfnmsac.vf v8, fa0, v10 -; VLOPT-NEXT: vfadd.vv v8, v8, v10 -; VLOPT-NEXT: ret +; CHECK-LABEL: vfnmsac_vf: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; CHECK-NEXT: vfnmsac.vf v8, fa0, v10 +; CHECK-NEXT: vfadd.vv v8, v8, v10 +; CHECK-NEXT: ret %1 = call <vscale x 4 x float> @llvm.riscv.vfnmsac(<vscale x 4 x float> %a, float %b, <vscale x 4 x float> %c, iXLen 7, iXLen -1, iXLen 3) %2 = call <vscale x 4 x float> @llvm.riscv.vfadd(<vscale x 4 x float> poison, <vscale x 4 x float> %1, <vscale x 4 x float> %c, iXLen 7, iXLen %vl) ret <vscale x 4 x float> %2 } define <vscale x 4 x float> @vfmadd_vv(<vscale x 4 x float> %a, <vscale x 4 x float> %b, <vscale x 4 x float> %c, iXLen %vl) { -; NOVLOPT-LABEL: vfmadd_vv: -; NOVLOPT: # %bb.0: -; NOVLOPT-NEXT: vsetvli a1, zero, e32, m2, ta, ma -; NOVLOPT-NEXT: vfmadd.vv v8, v10, v12 -; NOVLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; NOVLOPT-NEXT: vfadd.vv v8, v8, v12 -; NOVLOPT-NEXT: ret -; -; VLOPT-LABEL: vfmadd_vv: -; VLOPT: # %bb.0: -; VLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; VLOPT-NEXT: vfmadd.vv v8, v10, v12 -; VLOPT-NEXT: vfadd.vv v8, v8, v12 -; VLOPT-NEXT: ret +; CHECK-LABEL: vfmadd_vv: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; CHECK-NEXT: vfmadd.vv v8, v10, v12 +; CHECK-NEXT: vfadd.vv v8, v8, v12 +; CHECK-NEXT: ret %1 = call <vscale x 4 x float> @llvm.riscv.vfmadd(<vscale x 4 x float> %a, <vscale x 4 x float> %b, <vscale x 4 x float> %c, iXLen 7, iXLen -1, iXLen 3) %2 = call <vscale x 4 x float> @llvm.riscv.vfadd(<vscale x 4 x float> poison, <vscale x 4 x float> %1, <vscale x 4 x float> %c, iXLen 7, iXLen %vl) ret <vscale x 4 x float> %2 } define <vscale x 4 x float> @vfmadd_vf(<vscale x 4 x float> %a, float %b, <vscale x 4 x float> %c, iXLen %vl) { -; NOVLOPT-LABEL: vfmadd_vf: -; NOVLOPT: # %bb.0: -; NOVLOPT-NEXT: vsetvli a1, zero, e32, m2, ta, ma -; NOVLOPT-NEXT: vfmadd.vf v8, fa0, v10 -; NOVLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; NOVLOPT-NEXT: vfadd.vv v8, v8, v10 -; NOVLOPT-NEXT: ret -; -; VLOPT-LABEL: vfmadd_vf: -; VLOPT: # %bb.0: -; VLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; VLOPT-NEXT: vfmadd.vf v8, fa0, v10 -; VLOPT-NEXT: vfadd.vv v8, v8, v10 -; VLOPT-NEXT: ret +; CHECK-LABEL: vfmadd_vf: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; CHECK-NEXT: vfmadd.vf v8, fa0, v10 +; CHECK-NEXT: vfadd.vv v8, v8, v10 +; CHECK-NEXT: ret %1 = call <vscale x 4 x float> @llvm.riscv.vfmadd(<vscale x 4 x float> %a, float %b, <vscale x 4 x float> %c, iXLen 7, iXLen -1, iXLen 3) %2 = call <vscale x 4 x float> @llvm.riscv.vfadd(<vscale x 4 x float> poison, <vscale x 4 x float> %1, <vscale x 4 x float> %c, iXLen 7, iXLen %vl) ret <vscale x 4 x float> %2 } define <vscale x 4 x float> @vfnmadd_vv(<vscale x 4 x float> %a, <vscale x 4 x float> %b, <vscale x 4 x float> %c, iXLen %vl) { -; NOVLOPT-LABEL: vfnmadd_vv: -; NOVLOPT: # %bb.0: -; NOVLOPT-NEXT: vsetvli a1, zero, e32, m2, ta, ma -; NOVLOPT-NEXT: vfnmadd.vv v8, v10, v12 -; NOVLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; NOVLOPT-NEXT: vfadd.vv v8, v8, v12 -; NOVLOPT-NEXT: ret -; -; VLOPT-LABEL: vfnmadd_vv: -; VLOPT: # %bb.0: -; VLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; VLOPT-NEXT: vfnmadd.vv v8, v10, v12 -; VLOPT-NEXT: vfadd.vv v8, v8, v12 -; VLOPT-NEXT: ret +; CHECK-LABEL: vfnmadd_vv: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; CHECK-NEXT: vfnmadd.vv v8, v10, v12 +; CHECK-NEXT: vfadd.vv v8, v8, v12 +; CHECK-NEXT: ret %1 = call <vscale x 4 x float> @llvm.riscv.vfnmadd(<vscale x 4 x float> %a, <vscale x 4 x float> %b, <vscale x 4 x float> %c, iXLen 7, iXLen -1, iXLen 3) %2 = call <vscale x 4 x float> @llvm.riscv.vfadd(<vscale x 4 x float> poison, <vscale x 4 x float> %1, <vscale x 4 x float> %c, iXLen 7, iXLen %vl) ret <vscale x 4 x float> %2 } define <vscale x 4 x float> @vfnmadd_vf(<vscale x 4 x float> %a, float %b, <vscale x 4 x float> %c, iXLen %vl) { -; NOVLOPT-LABEL: vfnmadd_vf: -; NOVLOPT: # %bb.0: -; NOVLOPT-NEXT: vsetvli a1, zero, e32, m2, ta, ma -; NOVLOPT-NEXT: vfnmadd.vf v8, fa0, v10 -; NOVLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; NOVLOPT-NEXT: vfadd.vv v8, v8, v10 -; NOVLOPT-NEXT: ret -; -; VLOPT-LABEL: vfnmadd_vf: -; VLOPT: # %bb.0: -; VLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; VLOPT-NEXT: vfnmadd.vf v8, fa0, v10 -; VLOPT-NEXT: vfadd.vv v8, v8, v10 -; VLOPT-NEXT: ret +; CHECK-LABEL: vfnmadd_vf: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; CHECK-NEXT: vfnmadd.vf v8, fa0, v10 +; CHECK-NEXT: vfadd.vv v8, v8, v10 +; CHECK-NEXT: ret %1 = call <vscale x 4 x float> @llvm.riscv.vfnmadd(<vscale x 4 x float> %a, float %b, <vscale x 4 x float> %c, iXLen 7, iXLen -1, iXLen 3) %2 = call <vscale x 4 x float> @llvm.riscv.vfadd(<vscale x 4 x float> poison, <vscale x 4 x float> %1, <vscale x 4 x float> %c, iXLen 7, iXLen %vl) ret <vscale x 4 x float> %2 } define <vscale x 4 x float> @vfmsub_vv(<vscale x 4 x float> %a, <vscale x 4 x float> %b, <vscale x 4 x float> %c, iXLen %vl) { -; NOVLOPT-LABEL: vfmsub_vv: -; NOVLOPT: # %bb.0: -; NOVLOPT-NEXT: vsetvli a1, zero, e32, m2, ta, ma -; NOVLOPT-NEXT: vfmsub.vv v8, v10, v12 -; NOVLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; NOVLOPT-NEXT: vfadd.vv v8, v8, v12 -; NOVLOPT-NEXT: ret -; -; VLOPT-LABEL: vfmsub_vv: -; VLOPT: # %bb.0: -; VLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; VLOPT-NEXT: vfmsub.vv v8, v10, v12 -; VLOPT-NEXT: vfadd.vv v8, v8, v12 -; VLOPT-NEXT: ret +; CHECK-LABEL: vfmsub_vv: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; CHECK-NEXT: vfmsub.vv v8, v10, v12 +; CHECK-NEXT: vfadd.vv v8, v8, v12 +; CHECK-NEXT: ret %1 = call <vscale x 4 x float> @llvm.riscv.vfmsub(<vscale x 4 x float> %a, <vscale x 4 x float> %b, <vscale x 4 x float> %c, iXLen 7, iXLen -1, iXLen 3) %2 = call <vscale x 4 x float> @llvm.riscv.vfadd(<vscale x 4 x float> poison, <vscale x 4 x float> %1, <vscale x 4 x float> %c, iXLen 7, iXLen %vl) ret <vscale x 4 x float> %2 } define <vscale x 4 x float> @vfmsub_vf(<vscale x 4 x float> %a, float %b, <vscale x 4 x float> %c, iXLen %vl) { -; NOVLOPT-LABEL: vfmsub_vf: -; NOVLOPT: # %bb.0: -; NOVLOPT-NEXT: vsetvli a1, zero, e32, m2, ta, ma -; NOVLOPT-NEXT: vfmsub.vf v8, fa0, v10 -; NOVLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; NOVLOPT-NEXT: vfadd.vv v8, v8, v10 -; NOVLOPT-NEXT: ret -; -; VLOPT-LABEL: vfmsub_vf: -; VLOPT: # %bb.0: -; VLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; VLOPT-NEXT: vfmsub.vf v8, fa0, v10 -; VLOPT-NEXT: vfadd.vv v8, v8, v10 -; VLOPT-NEXT: ret +; CHECK-LABEL: vfmsub_vf: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; CHECK-NEXT: vfmsub.vf v8, fa0, v10 +; CHECK-NEXT: vfadd.vv v8, v8, v10 +; CHECK-NEXT: ret %1 = call <vscale x 4 x float> @llvm.riscv.vfmsub(<vscale x 4 x float> %a, float %b, <vscale x 4 x float> %c, iXLen 7, iXLen -1, iXLen 3) %2 = call <vscale x 4 x float> @llvm.riscv.vfadd(<vscale x 4 x float> poison, <vscale x 4 x float> %1, <vscale x 4 x float> %c, iXLen 7, iXLen %vl) ret <vscale x 4 x float> %2 } define <vscale x 4 x float> @vfnmsub_vv(<vscale x 4 x float> %a, <vscale x 4 x float> %b, <vscale x 4 x float> %c, iXLen %vl) { -; NOVLOPT-LABEL: vfnmsub_vv: -; NOVLOPT: # %bb.0: -; NOVLOPT-NEXT: vsetvli a1, zero, e32, m2, ta, ma -; NOVLOPT-NEXT: vfnmsub.vv v8, v10, v12 -; NOVLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; NOVLOPT-NEXT: vfadd.vv v8, v8, v12 -; NOVLOPT-NEXT: ret -; -; VLOPT-LABEL: vfnmsub_vv: -; VLOPT: # %bb.0: -; VLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; VLOPT-NEXT: vfnmsub.vv v8, v10, v12 -; VLOPT-NEXT: vfadd.vv v8, v8, v12 -; VLOPT-NEXT: ret +; CHECK-LABEL: vfnmsub_vv: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; CHECK-NEXT: vfnmsub.vv v8, v10, v12 +; CHECK-NEXT: vfadd.vv v8, v8, v12 +; CHECK-NEXT: ret %1 = call <vscale x 4 x float> @llvm.riscv.vfnmsub(<vscale x 4 x float> %a, <vscale x 4 x float> %b, <vscale x 4 x float> %c, iXLen 7, iXLen -1, iXLen 3) %2 = call <vscale x 4 x float> @llvm.riscv.vfadd(<vscale x 4 x float> poison, <vscale x 4 x float> %1, <vscale x 4 x float> %c, iXLen 7, iXLen %vl) ret <vscale x 4 x float> %2 } define <vscale x 4 x float> @vfnmsub_vf(<vscale x 4 x float> %a, float %b, <vscale x 4 x float> %c, iXLen %vl) { -; NOVLOPT-LABEL: vfnmsub_vf: -; NOVLOPT: # %bb.0: -; NOVLOPT-NEXT: vsetvli a1, zero, e32, m2, ta, ma -; NOVLOPT-NEXT: vfnmsub.vf v8, fa0, v10 -; NOVLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; NOVLOPT-NEXT: vfadd.vv v8, v8, v10 -; NOVLOPT-NEXT: ret -; -; VLOPT-LABEL: vfnmsub_vf: -; VLOPT: # %bb.0: -; VLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; VLOPT-NEXT: vfnmsub.vf v8, fa0, v10 -; VLOPT-NEXT: vfadd.vv v8, v8, v10 -; VLOPT-NEXT: ret +; CHECK-LABEL: vfnmsub_vf: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; CHECK-NEXT: vfnmsub.vf v8, fa0, v10 +; CHECK-NEXT: vfadd.vv v8, v8, v10 +; CHECK-NEXT: ret %1 = call <vscale x 4 x float> @llvm.riscv.vfnmsub(<vscale x 4 x float> %a, float %b, <vscale x 4 x float> %c, iXLen 7, iXLen -1, iXLen 3) %2 = call <vscale x 4 x float> @llvm.riscv.vfadd(<vscale x 4 x float> poison, <vscale x 4 x float> %1, <vscale x 4 x float> %c, iXLen 7, iXLen %vl) ret <vscale x 4 x float> %2 } define <vscale x 4 x double> @vfwmacc_vv(<vscale x 4 x double> %a, <vscale x 4 x float> %b, <vscale x 4 x float> %c, <vscale x 4 x double> %d, iXLen %vl) { -; NOVLOPT-LABEL: vfwmacc_vv: -; NOVLOPT: # %bb.0: -; NOVLOPT-NEXT: vsetvli a1, zero, e32, m2, tu, ma -; NOVLOPT-NEXT: vfwmacc.vv v8, v12, v14 -; NOVLOPT-NEXT: vsetvli zero, a0, e64, m4, ta, ma -; NOVLOPT-NEXT: vfadd.vv v8, v8, v16 -; NOVLOPT-NEXT: ret -; -; VLOPT-LABEL: vfwmacc_vv: -; VLOPT: # %bb.0: -; VLOPT-NEXT: vsetvli zero, a0, e32, m2, tu, ma -; VLOPT-NEXT: vfwmacc.vv v8, v12, v14 -; VLOPT-NEXT: vsetvli zero, zero, e64, m4, ta, ma -; VLOPT-NEXT: vfadd.vv v8, v8, v16 -; VLOPT-NEXT: ret +; CHECK-LABEL: vfwmacc_vv: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m2, tu, ma +; CHECK-NEXT: vfwmacc.vv v8, v12, v14 +; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, ma +; CHECK-NEXT: vfadd.vv v8, v8, v16 +; CHECK-NEXT: ret %1 = call <vscale x 4 x double> @llvm.riscv.vfwmacc(<vscale x 4 x double> %a, <vscale x 4 x float> %b, <vscale x 4 x float> %c, iXLen 7, iXLen -1, iXLen 0) %2 = call <vscale x 4 x double> @llvm.riscv.vfadd(<vscale x 4 x double> poison, <vscale x 4 x double> %1, <vscale x 4 x double> %d, iXLen 7, iXLen %vl) ret <vscale x 4 x double> %2 } define <vscale x 4 x double> @vfwmacc_vf(<vscale x 4 x double> %a, float %b, <vscale x 4 x float> %c, <vscale x 4 x double> %d, iXLen %vl) { -; NOVLOPT-LABEL: vfwmacc_vf: -; NOVLOPT: # %bb.0: -; NOVLOPT-NEXT: vsetvli a1, zero, e32, m2, tu, ma -; NOVLOPT-NEXT: vfwmacc.vf v8, fa0, v12 -; NOVLOPT-NEXT: vsetvli zero, a0, e64, m4, ta, ma -; NOVLOPT-NEXT: vfadd.vv v8, v8, v16 -; NOVLOPT-NEXT: ret -; -; VLOPT-LABEL: vfwmacc_vf: -; VLOPT: # %bb.0: -; VLOPT-NEXT: vsetvli zero, a0, e32, m2, tu, ma -; VLOPT-NEXT: vfwmacc.vf v8, fa0, v12 -; VLOPT-NEXT: vsetvli zero, zero, e64, m4, ta, ma -; VLOPT-NEXT: vfadd.vv v8, v8, v16 -; VLOPT-NEXT: ret +; CHECK-LABEL: vfwmacc_vf: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m2, tu, ma +; CHECK-NEXT: vfwmacc.vf v8, fa0, v12 +; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, ma +; CHECK-NEXT: vfadd.vv v8, v8, v16 +; CHECK-NEXT: ret %1 = call <vscale x 4 x double> @llvm.riscv.vfwmacc(<vscale x 4 x double> %a, float %b, <vscale x 4 x float> %c, iXLen 7, iXLen -1, iXLen 0) %2 = call <vscale x 4 x double> @llvm.riscv.vfadd(<vscale x 4 x double> poison, <vscale x 4 x double> %1, <vscale x 4 x double> %d, iXLen 7, iXLen %vl) ret <vscale x 4 x double> %2 } define <vscale x 4 x double> @vfwnmacc_vv(<vscale x 4 x double> %a, <vscale x 4 x float> %b, <vscale x 4 x float> %c, <vscale x 4 x double> %d, iXLen %vl) { -; NOVLOPT-LABEL: vfwnmacc_vv: -; NOVLOPT: # %bb.0: -; NOVLOPT-NEXT: vsetvli a1, zero, e32, m2, tu, ma -; NOVLOPT-NEXT: vfwnmacc.vv v8, v12, v14 -; NOVLOPT-NEXT: vsetvli zero, a0, e64, m4, ta, ma -; NOVLOPT-NEXT: vfadd.vv v8, v8, v16 -; NOVLOPT-NEXT: ret -; -; VLOPT-LABEL: vfwnmacc_vv: -; VLOPT: # %bb.0: -; VLOPT-NEXT: vsetvli zero, a0, e32, m2, tu, ma -; VLOPT-NEXT: vfwnmacc.vv v8, v12, v14 -; VLOPT-NEXT: vsetvli zero, zero, e64, m4, ta, ma -; VLOPT-NEXT: vfadd.vv v8, v8, v16 -; VLOPT-NEXT: ret +; CHECK-LABEL: vfwnmacc_vv: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m2, tu, ma +; CHECK-NEXT: vfwnmacc.vv v8, v12, v14 +; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, ma +; CHECK-NEXT: vfadd.vv v8, v8, v16 +; CHECK-NEXT: ret %1 = call <vscale x 4 x double> @llvm.riscv.vfwnmacc(<vscale x 4 x double> %a, <vscale x 4 x float> %b, <vscale x 4 x float> %c, iXLen 7, iXLen -1, iXLen 0) %2 = call <vscale x 4 x double> @llvm.riscv.vfadd(<vscale x 4 x double> poison, <vscale x 4 x double> %1, <vscale x 4 x double> %d, iXLen 7, iXLen %vl) ret <vscale x 4 x double> %2 } define <vscale x 4 x double> @vfwnmacc_vf(<vscale x 4 x double> %a, float %b, <vscale x 4 x float> %c, <vscale x 4 x double> %d, iXLen %vl) { -; NOVLOPT-LABEL: vfwnmacc_vf: -; NOVLOPT: # %bb.0: -; NOVLOPT-NEXT: vsetvli a1, zero, e32, m2, tu, ma -; NOVLOPT-NEXT: vfwnmacc.vf v8, fa0, v12 -; NOVLOPT-NEXT: vsetvli zero, a0, e64, m4, ta, ma -; NOVLOPT-NEXT: vfadd.vv v8, v8, v16 -; NOVLOPT-NEXT: ret -; -; VLOPT-LABEL: vfwnmacc_vf: -; VLOPT: # %bb.0: -; VLOPT-NEXT: vsetvli zero, a0, e32, m2, tu, ma -; VLOPT-NEXT: vfwnmacc.vf v8, fa0, v12 -; VLOPT-NEXT: vsetvli zero, zero, e64, m4, ta, ma -; VLOPT-NEXT: vfadd.vv v8, v8, v16 -; VLOPT-NEXT: ret +; CHECK-LABEL: vfwnmacc_vf: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m2, tu, ma +; CHECK-NEXT: vfwnmacc.vf v8, fa0, v12 +; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, ma +; CHECK-NEXT: vfadd.vv v8, v8, v16 +; CHECK-NEXT: ret %1 = call <vscale x 4 x double> @llvm.riscv.vfwnmacc(<vscale x 4 x double> %a, float %b, <vscale x 4 x float> %c, iXLen 7, iXLen -1, iXLen 0) %2 = call <vscale x 4 x double> @llvm.riscv.vfadd(<vscale x 4 x double> poison, <vscale x 4 x double> %1, <vscale x 4 x double> %d, iXLen 7, iXLen %vl) ret <vscale x 4 x double> %2 } define <vscale x 4 x double> @vfwmsac_vv(<vscale x 4 x double> %a, <vscale x 4 x float> %b, <vscale x 4 x float> %c, <vscale x 4 x double> %d, iXLen %vl) { -; NOVLOPT-LABEL: vfwmsac_vv: -; NOVLOPT: # %bb.0: -; NOVLOPT-NEXT: vsetvli a1, zero, e32, m2, tu, ma -; NOVLOPT-NEXT: vfwmsac.vv v8, v12, v14 -; NOVLOPT-NEXT: vsetvli zero, a0, e64, m4, ta, ma -; NOVLOPT-NEXT: vfadd.vv v8, v8, v16 -; NOVLOPT-NEXT: ret -; -; VLOPT-LABEL: vfwmsac_vv: -; VLOPT: # %bb.0: -; VLOPT-NEXT: vsetvli zero, a0, e32, m2, tu, ma -; VLOPT-NEXT: vfwmsac.vv v8, v12, v14 -; VLOPT-NEXT: vsetvli zero, zero, e64, m4, ta, ma -; VLOPT-NEXT: vfadd.vv v8, v8, v16 -; VLOPT-NEXT: ret +; CHECK-LABEL: vfwmsac_vv: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m2, tu, ma +; CHECK-NEXT: vfwmsac.vv v8, v12, v14 +; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, ma +; CHECK-NEXT: vfadd.vv v8, v8, v16 +; CHECK-NEXT: ret %1 = call <vscale x 4 x double> @llvm.riscv.vfwmsac(<vscale x 4 x double> %a, <vscale x 4 x float> %b, <vscale x 4 x float> %c, iXLen 7, iXLen -1, iXLen 0) %2 = call <vscale x 4 x double> @llvm.riscv.vfadd(<vscale x 4 x double> poison, <vscale x 4 x double> %1, <vscale x 4 x double> %d, iXLen 7, iXLen %vl) ret <vscale x 4 x double> %2 } define <vscale x 4 x double> @vfwmsac_vf(<vscale x 4 x double> %a, float %b, <vscale x 4 x float> %c, <vscale x 4 x double> %d, iXLen %vl) { -; NOVLOPT-LABEL: vfwmsac_vf: -; NOVLOPT: # %bb.0: -; NOVLOPT-NEXT: vsetvli a1, zero, e32, m2, tu, ma -; NOVLOPT-NEXT: vfwmsac.vf v8, fa0, v12 -; NOVLOPT-NEXT: vsetvli zero, a0, e64, m4, ta, ma -; NOVLOPT-NEXT: vfadd.vv v8, v8, v16 -; NOVLOPT-NEXT: ret -; -; VLOPT-LABEL: vfwmsac_vf: -; VLOPT: # %bb.0: -; VLOPT-NEXT: vsetvli zero, a0, e32, m2, tu, ma -; VLOPT-NEXT: vfwmsac.vf v8, fa0, v12 -; VLOPT-NEXT: vsetvli zero, zero, e64, m4, ta, ma -; VLOPT-NEXT: vfadd.vv v8, v8, v16 -; VLOPT-NEXT: ret +; CHECK-LABEL: vfwmsac_vf: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m2, tu, ma +; CHECK-NEXT: vfwmsac.vf v8, fa0, v12 +; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, ma +; CHECK-NEXT: vfadd.vv v8, v8, v16 +; CHECK-NEXT: ret %1 = call <vscale x 4 x double> @llvm.riscv.vfwmsac(<vscale x 4 x double> %a, float %b, <vscale x 4 x float> %c, iXLen 7, iXLen -1, iXLen 0) %2 = call <vscale x 4 x double> @llvm.riscv.vfadd(<vscale x 4 x double> poison, <vscale x 4 x double> %1, <vscale x 4 x double> %d, iXLen 7, iXLen %vl) ret <vscale x 4 x double> %2 } define <vscale x 4 x double> @vfwnmsac_vv(<vscale x 4 x double> %a, <vscale x 4 x float> %b, <vscale x 4 x float> %c, <vscale x 4 x double> %d, iXLen %vl) { -; NOVLOPT-LABEL: vfwnmsac_vv: -; NOVLOPT: # %bb.0: -; NOVLOPT-NEXT: vsetvli a1, zero, e32, m2, tu, ma -; NOVLOPT-NEXT: vfwnmsac.vv v8, v12, v14 -; NOVLOPT-NEXT: vsetvli zero, a0, e64, m4, ta, ma -; NOVLOPT-NEXT: vfadd.vv v8, v8, v16 -; NOVLOPT-NEXT: ret -; -; VLOPT-LABEL: vfwnmsac_vv: -; VLOPT: # %bb.0: -; VLOPT-NEXT: vsetvli zero, a0, e32, m2, tu, ma -; VLOPT-NEXT: vfwnmsac.vv v8, v12, v14 -; VLOPT-NEXT: vsetvli zero, zero, e64, m4, ta, ma -; VLOPT-NEXT: vfadd.vv v8, v8, v16 -; VLOPT-NEXT: ret +; CHECK-LABEL: vfwnmsac_vv: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m2, tu, ma +; CHECK-NEXT: vfwnmsac.vv v8, v12, v14 +; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, ma +; CHECK-NEXT: vfadd.vv v8, v8, v16 +; CHECK-NEXT: ret %1 = call <vscale x 4 x double> @llvm.riscv.vfwnmsac(<vscale x 4 x double> %a, <vscale x 4 x float> %b, <vscale x 4 x float> %c, iXLen 7, iXLen -1, iXLen 0) %2 = call <vscale x 4 x double> @llvm.riscv.vfadd(<vscale x 4 x double> poison, <vscale x 4 x double> %1, <vscale x 4 x double> %d, iXLen 7, iXLen %vl) ret <vscale x 4 x double> %2 } define <vscale x 4 x double> @vfwnmsac_vf(<vscale x 4 x double> %a, float %b, <vscale x 4 x float> %c, <vscale x 4 x double> %d, iXLen %vl) { -; NOVLOPT-LABEL: vfwnmsac_vf: -; NOVLOPT: # %bb.0: -; NOVLOPT-NEXT: vsetvli a1, zero, e32, m2, tu, ma -; NOVLOPT-NEXT: vfwnmsac.vf v8, fa0, v12 -; NOVLOPT-NEXT: vsetvli zero, a0, e64, m4, ta, ma -; NOVLOPT-NEXT: vfadd.vv v8, v8, v16 -; NOVLOPT-NEXT: ret -; -; VLOPT-LABEL: vfwnmsac_vf: -; VLOPT: # %bb.0: -; VLOPT-NEXT: vsetvli zero, a0, e32, m2, tu, ma -; VLOPT-NEXT: vfwnmsac.vf v8, fa0, v12 -; VLOPT-NEXT: vsetvli zero, zero, e64, m4, ta, ma -; VLOPT-NEXT: vfadd.vv v8, v8, v16 -; VLOPT-NEXT: ret +; CHECK-LABEL: vfwnmsac_vf: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m2, tu, ma +; CHECK-NEXT: vfwnmsac.vf v8, fa0, v12 +; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, ma +; CHECK-NEXT: vfadd.vv v8, v8, v16 +; CHECK-NEXT: ret %1 = call <vscale x 4 x double> @llvm.riscv.vfwnmsac(<vscale x 4 x double> %a, float %b, <vscale x 4 x float> %c, iXLen 7, iXLen -1, iXLen 0) %2 = call <vscale x 4 x double> @llvm.riscv.vfadd(<vscale x 4 x double> poison, <vscale x 4 x double> %1, <vscale x 4 x double> %d, iXLen 7, iXLen %vl) ret <vscale x 4 x double> %2 } define <vscale x 4 x float> @vfwmaccbf16_vv(<vscale x 4 x float> %a, <vscale x 4 x bfloat> %b, <vscale x 4 x bfloat> %c, <vscale x 4 x float> %d, iXLen %vl) { -; NOVLOPT-LABEL: vfwmaccbf16_vv: -; NOVLOPT: # %bb.0: -; NOVLOPT-NEXT: vsetvli a1, zero, e16, m1, tu, ma -; NOVLOPT-NEXT: vfwmaccbf16.vv v8, v10, v11 -; NOVLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; NOVLOPT-NEXT: vfadd.vv v8, v8, v12 -; NOVLOPT-NEXT: ret -; -; VLOPT-LABEL: vfwmaccbf16_vv: -; VLOPT: # %bb.0: -; VLOPT-NEXT: vsetvli zero, a0, e16, m1, tu, ma -; VLOPT-NEXT: vfwmaccbf16.vv v8, v10, v11 -; VLOPT-NEXT: vsetvli zero, zero, e32, m2, ta, ma -; VLOPT-NEXT: vfadd.vv v8, v8, v12 -; VLOPT-NEXT: ret +; CHECK-LABEL: vfwmaccbf16_vv: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m1, tu, ma +; CHECK-NEXT: vfwmaccbf16.vv v8, v10, v11 +; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; CHECK-NEXT: vfadd.vv v8, v8, v12 +; CHECK-NEXT: ret %1 = call <vscale x 4 x float> @llvm.riscv.vfwmaccbf16(<vscale x 4 x float> %a, <vscale x 4 x bfloat> %b, <vscale x 4 x bfloat> %c, iXLen 7, iXLen -1, iXLen 0) %2 = call <vscale x 4 x float> @llvm.riscv.vfadd(<vscale x 4 x float> poison, <vscale x 4 x float> %1, <vscale x 4 x float> %d, iXLen 7, iXLen %vl) ret <vscale x 4 x float> %2 } define <vscale x 4 x i32> @vsbc_vvm(<vscale x 4 x i32> %a, <vscale x 4 x i1> %mask, <vscale x 4 x i32> %b, <vscale x 4 x i32> %c, iXLen %vl) { -; NOVLOPT-LABEL: vsbc_vvm: -; NOVLOPT: # %bb.0: -; NOVLOPT-NEXT: vsetvli a1, zero, e32, m2, ta, ma -; NOVLOPT-NEXT: vsbc.vvm v8, v8, v10, v0 -; NOVLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; NOVLOPT-NEXT: vadd.vv v8, v8, v12 -; NOVLOPT-NEXT: ret -; -; VLOPT-LABEL: vsbc_vvm: -; VLOPT: # %bb.0: -; VLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; VLOPT-NEXT: vsbc.vvm v8, v8, v10, v0 -; VLOPT-NEXT: vadd.vv v8, v8, v12 -; VLOPT-NEXT: ret +; CHECK-LABEL: vsbc_vvm: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; CHECK-NEXT: vsbc.vvm v8, v8, v10, v0 +; CHECK-NEXT: vadd.vv v8, v8, v12 +; CHECK-NEXT: ret %1 = call <vscale x 4 x i32> @llvm.riscv.vsbc.nxv4i32.nxv4i32.nxv4i1(<vscale x 4 x i32> poison, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b, <vscale x 4 x i1> %mask, iXLen -1) %2 = call <vscale x 4 x i32> @llvm.riscv.vadd.nxv4i32.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i32> %1, <vscale x 4 x i32> %c, iXLen %vl) ret <vscale x 4 x i32> %2 } define <vscale x 4 x i32> @vsbc_vxm(<vscale x 4 x i32> %a, <vscale x 4 x i1> %mask, <vscale x 4 x i32> %b, i32 %c, iXLen %vl) { -; NOVLOPT-LABEL: vsbc_vxm: -; NOVLOPT: # %bb.0: -; NOVLOPT-NEXT: vsetvli a2, zero, e32, m2, ta, ma -; NOVLOPT-NEXT: vsbc.vxm v8, v8, a0, v0 -; NOVLOPT-NEXT: vsetvli zero, a1, e32, m2, ta, ma -; NOVLOPT-NEXT: vadd.vv v8, v8, v10 -; NOVLOPT-NEXT: ret -; -; VLOPT-LABEL: vsbc_vxm: -; VLOPT: # %bb.0: -; VLOPT-NEXT: vsetvli zero, a1, e32, m2, ta, ma -; VLOPT-NEXT: vsbc.vxm v8, v8, a0, v0 -; VLOPT-NEXT: vadd.vv v8, v8, v10 -; VLOPT-NEXT: ret +; CHECK-LABEL: vsbc_vxm: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, ma +; CHECK-NEXT: vsbc.vxm v8, v8, a0, v0 +; CHECK-NEXT: vadd.vv v8, v8, v10 +; CHECK-NEXT: ret %1 = call <vscale x 4 x i32> @llvm.riscv.vsbc.nxv4i32.i32.nxv4i1(<vscale x 4 x i32> poison, <vscale x 4 x i32> %a, i32 %c, <vscale x 4 x i1> %mask, iXLen -1) %2 = call <vscale x 4 x i32> @llvm.riscv.vadd.nxv4i32.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i32> %1, <vscale x 4 x i32> %b, iXLen %vl) ret <vscale x 4 x i32> %2 } define <vscale x 4 x i32> @vfclass_v(<vscale x 4 x float> %a, <vscale x 4 x i32> %b, iXLen %vl) { -; NOVLOPT-LABEL: vfclass_v: -; NOVLOPT: # %bb.0: -; NOVLOPT-NEXT: vsetvli a1, zero, e32, m2, ta, ma -; NOVLOPT-NEXT: vfclass.v v8, v8 -; NOVLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; NOVLOPT-NEXT: vadd.vv v8, v8, v10 -; NOVLOPT-NEXT: ret -; -; VLOPT-LABEL: vfclass_v: -; VLOPT: # %bb.0: -; VLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; VLOPT-NEXT: vfclass.v v8, v8 -; VLOPT-NEXT: vadd.vv v8, v8, v10 -; VLOPT-NEXT: ret +; CHECK-LABEL: vfclass_v: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; CHECK-NEXT: vfclass.v v8, v8 +; CHECK-NEXT: vadd.vv v8, v8, v10 +; CHECK-NEXT: ret %1 = call <vscale x 4 x i32> @llvm.riscv.vfclass.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x float> %a, iXLen -1) %2 = call <vscale x 4 x i32> @llvm.riscv.vadd.nxv4i32.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i32> %1, <vscale x 4 x i32> %b, iXLen %vl) ret <vscale x 4 x i32> %2 } define <vscale x 4 x i32> @vrgather_vi(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b, iXLen %vl) { -; NOVLOPT-LABEL: vrgather_vi: -; NOVLOPT: # %bb.0: -; NOVLOPT-NEXT: vsetvli a1, zero, e32, m2, ta, ma -; NOVLOPT-NEXT: vrgather.vi v12, v8, 5 -; NOVLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; NOVLOPT-NEXT: vadd.vv v8, v12, v10 -; NOVLOPT-NEXT: ret -; -; VLOPT-LABEL: vrgather_vi: -; VLOPT: # %bb.0: -; VLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; VLOPT-NEXT: vrgather.vi v12, v8, 5 -; VLOPT-NEXT: vadd.vv v8, v12, v10 -; VLOPT-NEXT: ret +; CHECK-LABEL: vrgather_vi: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; CHECK-NEXT: vrgather.vi v12, v8, 5 +; CHECK-NEXT: vadd.vv v8, v12, v10 +; CHECK-NEXT: ret %1 = call <vscale x 4 x i32> @llvm.riscv.vrgather.vx.nxv4i32.iXLen(<vscale x 4 x i32> poison, <vscale x 4 x i32> %a, iXLen 5, iXLen -1) %2 = call <vscale x 4 x i32> @llvm.riscv.vadd.nxv4i32.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i32> %1, <vscale x 4 x i32> %b, iXLen %vl) ret <vscale x 4 x i32> %2 } define <vscale x 4 x i32> @vrgather_vv(<vscale x 4 x i32> %a, <vscale x 4 x i32> %idx, <vscale x 4 x i32> %b, iXLen %vl) { -; NOVLOPT-LABEL: vrgather_vv: -; NOVLOPT: # %bb.0: -; NOVLOPT-NEXT: vsetvli a1, zero, e32, m2, ta, ma -; NOVLOPT-NEXT: vrgather.vv v12, v8, v10 -; NOVLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; NOVLOPT-NEXT: vadd.vv v8, v12, v8 -; NOVLOPT-NEXT: ret -; -; VLOPT-LABEL: vrgather_vv: -; VLOPT: # %bb.0: -; VLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; VLOPT-NEXT: vrgather.vv v12, v8, v10 -; VLOPT-NEXT: vadd.vv v8, v12, v8 -; VLOPT-NEXT: ret +; CHECK-LABEL: vrgather_vv: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; CHECK-NEXT: vrgather.vv v12, v8, v10 +; CHECK-NEXT: vadd.vv v8, v12, v8 +; CHECK-NEXT: ret %1 = call <vscale x 4 x i32> @llvm.riscv.vrgather.vv.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i32> %a, <vscale x 4 x i32> %idx, iXLen -1) %2 = call <vscale x 4 x i32> @llvm.riscv.vadd.nxv4i32.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i32> %1, <vscale x 4 x i32> %a, iXLen %vl) ret <vscale x 4 x i32> %2 } define <vscale x 4 x i32> @vrgather_vx(<vscale x 4 x i32> %a, iXLen %idx, <vscale x 4 x i32> %b, iXLen %vl) { -; NOVLOPT-LABEL: vrgather_vx: -; NOVLOPT: # %bb.0: -; NOVLOPT-NEXT: vsetvli a2, zero, e32, m2, ta, ma -; NOVLOPT-NEXT: vrgather.vx v12, v8, a0 -; NOVLOPT-NEXT: vsetvli zero, a1, e32, m2, ta, ma -; NOVLOPT-NEXT: vadd.vv v8, v12, v10 -; NOVLOPT-NEXT: ret -; -; VLOPT-LABEL: vrgather_vx: -; VLOPT: # %bb.0: -; VLOPT-NEXT: vsetvli zero, a1, e32, m2, ta, ma -; VLOPT-NEXT: vrgather.vx v12, v8, a0 -; VLOPT-NEXT: vadd.vv v8, v12, v10 -; VLOPT-NEXT: ret +; CHECK-LABEL: vrgather_vx: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, ma +; CHECK-NEXT: vrgather.vx v12, v8, a0 +; CHECK-NEXT: vadd.vv v8, v12, v10 +; CHECK-NEXT: ret %1 = call <vscale x 4 x i32> @llvm.riscv.vrgather.vx.nxv4i32.iXLen(<vscale x 4 x i32> poison, <vscale x 4 x i32> %a, iXLen %idx, iXLen -1) %2 = call <vscale x 4 x i32> @llvm.riscv.vadd.nxv4i32.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i32> %1, <vscale x 4 x i32> %b, iXLen %vl) ret <vscale x 4 x i32> %2 } define <vscale x 4 x i32> @vrgatherei16_vv(<vscale x 4 x i32> %a, <vscale x 4 x i16> %idx, <vscale x 4 x i32> %b, iXLen %vl) { -; NOVLOPT-LABEL: vrgatherei16_vv: -; NOVLOPT: # %bb.0: -; NOVLOPT-NEXT: vsetvli a1, zero, e32, m2, ta, ma -; NOVLOPT-NEXT: vrgatherei16.vv v12, v8, v10 -; NOVLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; NOVLOPT-NEXT: vadd.vv v8, v12, v8 -; NOVLOPT-NEXT: ret -; -; VLOPT-LABEL: vrgatherei16_vv: -; VLOPT: # %bb.0: -; VLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; VLOPT-NEXT: vrgatherei16.vv v12, v8, v10 -; VLOPT-NEXT: vadd.vv v8, v12, v8 -; VLOPT-NEXT: ret +; CHECK-LABEL: vrgatherei16_vv: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; CHECK-NEXT: vrgatherei16.vv v12, v8, v10 +; CHECK-NEXT: vadd.vv v8, v12, v8 +; CHECK-NEXT: ret %1 = call <vscale x 4 x i32> @llvm.riscv.vrgatherei16.vv.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i32> %a, <vscale x 4 x i16> %idx, iXLen -1) %2 = call <vscale x 4 x i32> @llvm.riscv.vadd.nxv4i32.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i32> %1, <vscale x 4 x i32> %a, iXLen %vl) ret <vscale x 4 x i32> %2 } define <vscale x 4 x float> @vfwmaccbf16_vf(<vscale x 4 x float> %a, bfloat %b, <vscale x 4 x bfloat> %c, <vscale x 4 x float> %d, iXLen %vl) { -; NOVLOPT-LABEL: vfwmaccbf16_vf: -; NOVLOPT: # %bb.0: -; NOVLOPT-NEXT: vsetvli a1, zero, e16, m1, tu, ma -; NOVLOPT-NEXT: vfwmaccbf16.vf v8, fa0, v10 -; NOVLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; NOVLOPT-NEXT: vfadd.vv v8, v8, v12 -; NOVLOPT-NEXT: ret -; -; VLOPT-LABEL: vfwmaccbf16_vf: -; VLOPT: # %bb.0: -; VLOPT-NEXT: vsetvli zero, a0, e16, m1, tu, ma -; VLOPT-NEXT: vfwmaccbf16.vf v8, fa0, v10 -; VLOPT-NEXT: vsetvli zero, zero, e32, m2, ta, ma -; VLOPT-NEXT: vfadd.vv v8, v8, v12 -; VLOPT-NEXT: ret +; CHECK-LABEL: vfwmaccbf16_vf: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m1, tu, ma +; CHECK-NEXT: vfwmaccbf16.vf v8, fa0, v10 +; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; CHECK-NEXT: vfadd.vv v8, v8, v12 +; CHECK-NEXT: ret %1 = call <vscale x 4 x float> @llvm.riscv.vfwmaccbf16(<vscale x 4 x float> %a, bfloat %b, <vscale x 4 x bfloat> %c, iXLen 7, iXLen -1, iXLen 0) %2 = call <vscale x 4 x float> @llvm.riscv.vfadd(<vscale x 4 x float> poison, <vscale x 4 x float> %1, <vscale x 4 x float> %d, iXLen 7, iXLen %vl) ret <vscale x 4 x float> %2 } define <vscale x 4 x double> @vfsqrt(<vscale x 4 x float> %a) { -; NOVLOPT-LABEL: vfsqrt: -; NOVLOPT: # %bb.0: -; NOVLOPT-NEXT: vsetivli zero, 7, e32, m2, ta, ma -; NOVLOPT-NEXT: vmv2r.v v12, v8 -; NOVLOPT-NEXT: fsrmi a0, 0 -; NOVLOPT-NEXT: vfsqrt.v v14, v8 -; NOVLOPT-NEXT: fsrm a0 -; NOVLOPT-NEXT: vsetivli zero, 6, e32, m2, ta, ma -; NOVLOPT-NEXT: vfwmacc.vv v8, v12, v14 -; NOVLOPT-NEXT: ret -; -; VLOPT-LABEL: vfsqrt: -; VLOPT: # %bb.0: -; VLOPT-NEXT: vsetivli zero, 6, e32, m2, ta, ma -; VLOPT-NEXT: vmv2r.v v12, v8 -; VLOPT-NEXT: fsrmi a0, 0 -; VLOPT-NEXT: vfsqrt.v v14, v8 -; VLOPT-NEXT: fsrm a0 -; VLOPT-NEXT: vfwmacc.vv v8, v12, v14 -; VLOPT-NEXT: ret +; CHECK-LABEL: vfsqrt: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 6, e32, m2, ta, ma +; CHECK-NEXT: vmv2r.v v12, v8 +; CHECK-NEXT: fsrmi a0, 0 +; CHECK-NEXT: vfsqrt.v v14, v8 +; CHECK-NEXT: fsrm a0 +; CHECK-NEXT: vfwmacc.vv v8, v12, v14 +; CHECK-NEXT: ret %1 = call <vscale x 4 x float> @llvm.riscv.vfsqrt.nxv4f32(<vscale x 4 x float> poison, <vscale x 4 x float> %a, iXLen 0, iXLen 7) %2 = call <vscale x 4 x double> @llvm.riscv.vfwmacc(<vscale x 4 x double> poison, <vscale x 4 x float> %a, <vscale x 4 x float> %1, iXLen 7, iXLen 6, iXLen 0) ret <vscale x 4 x double> %2 } define <vscale x 4 x double> @vfrsqrt7(<vscale x 4 x float> %a) { -; NOVLOPT-LABEL: vfrsqrt7: -; NOVLOPT: # %bb.0: -; NOVLOPT-NEXT: vsetivli zero, 7, e32, m2, ta, ma -; NOVLOPT-NEXT: vmv2r.v v12, v8 -; NOVLOPT-NEXT: vfrsqrt7.v v14, v8 -; NOVLOPT-NEXT: vsetivli zero, 6, e32, m2, ta, ma -; NOVLOPT-NEXT: vfwmacc.vv v8, v12, v14 -; NOVLOPT-NEXT: ret -; -; VLOPT-LABEL: vfrsqrt7: -; VLOPT: # %bb.0: -; VLOPT-NEXT: vsetivli zero, 6, e32, m2, ta, ma -; VLOPT-NEXT: vmv2r.v v12, v8 -; VLOPT-NEXT: vfrsqrt7.v v14, v8 -; VLOPT-NEXT: vfwmacc.vv v8, v12, v14 -; VLOPT-NEXT: ret +; CHECK-LABEL: vfrsqrt7: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 6, e32, m2, ta, ma +; CHECK-NEXT: vmv2r.v v12, v8 +; CHECK-NEXT: vfrsqrt7.v v14, v8 +; CHECK-NEXT: vfwmacc.vv v8, v12, v14 +; CHECK-NEXT: ret %1 = call <vscale x 4 x float> @llvm.riscv.vfrsqrt7.nxv4f32(<vscale x 4 x float> poison, <vscale x 4 x float> %a, iXLen 7) %2 = call <vscale x 4 x double> @llvm.riscv.vfwmacc(<vscale x 4 x double> poison, <vscale x 4 x float> %a, <vscale x 4 x float> %1, iXLen 7, iXLen 6, iXLen 0) ret <vscale x 4 x double> %2 } define <vscale x 4 x double> @vfrec7(<vscale x 4 x float> %a) { -; NOVLOPT-LABEL: vfrec7: -; NOVLOPT: # %bb.0: -; NOVLOPT-NEXT: vsetivli zero, 7, e32, m2, ta, ma -; NOVLOPT-NEXT: vmv2r.v v12, v8 -; NOVLOPT-NEXT: fsrmi a0, 0 -; NOVLOPT-NEXT: vfrec7.v v14, v8 -; NOVLOPT-NEXT: fsrm a0 -; NOVLOPT-NEXT: vsetivli zero, 6, e32, m2, ta, ma -; NOVLOPT-NEXT: vfwmacc.vv v8, v12, v14 -; NOVLOPT-NEXT: ret -; -; VLOPT-LABEL: vfrec7: -; VLOPT: # %bb.0: -; VLOPT-NEXT: vsetivli zero, 6, e32, m2, ta, ma -; VLOPT-NEXT: vmv2r.v v12, v8 -; VLOPT-NEXT: fsrmi a0, 0 -; VLOPT-NEXT: vfrec7.v v14, v8 -; VLOPT-NEXT: fsrm a0 -; VLOPT-NEXT: vfwmacc.vv v8, v12, v14 -; VLOPT-NEXT: ret +; CHECK-LABEL: vfrec7: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 6, e32, m2, ta, ma +; CHECK-NEXT: vmv2r.v v12, v8 +; CHECK-NEXT: fsrmi a0, 0 +; CHECK-NEXT: vfrec7.v v14, v8 +; CHECK-NEXT: fsrm a0 +; CHECK-NEXT: vfwmacc.vv v8, v12, v14 +; CHECK-NEXT: ret %1 = call <vscale x 4 x float> @llvm.riscv.vfrec7.nxv4f32(<vscale x 4 x float> poison, <vscale x 4 x float> %a, iXLen 0, iXLen 7) %2 = call <vscale x 4 x double> @llvm.riscv.vfwmacc(<vscale x 4 x double> poison, <vscale x 4 x float> %a, <vscale x 4 x float> %1, iXLen 7, iXLen 6, iXLen 0) ret <vscale x 4 x double> %2 diff --git a/llvm/test/CodeGen/RISCV/rvv/vl-opt-no-prop.ll b/llvm/test/CodeGen/RISCV/rvv/vl-opt-no-prop.ll index 8507254..e1f641a 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vl-opt-no-prop.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vl-opt-no-prop.ll @@ -1,12 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2 -; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs \ -; RUN: | FileCheck %s -; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs \ -; RUN: | FileCheck %s -; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v -riscv-enable-vl-optimizer \ -; RUN: -verify-machineinstrs | FileCheck %s -; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v -riscv-enable-vl-optimizer \ -; RUN: -verify-machineinstrs | FileCheck %s +; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs | FileCheck %s +; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs | FileCheck %s declare <vscale x 4 x i32> @llvm.riscv.vadd.nxv4i32.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, iXLen) declare <vscale x 4 x i32> @llvm.riscv.vrgather.vv.nxv4i32.iXLen( diff --git a/llvm/test/CodeGen/RISCV/rvv/vl-opt-op-info.ll b/llvm/test/CodeGen/RISCV/rvv/vl-opt-op-info.ll index 938f575..545fcc9 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vl-opt-op-info.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vl-opt-op-info.ll @@ -1,12 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2 -; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v,+zvl512b -verify-machineinstrs \ -; RUN: -riscv-enable-vl-optimizer=false | FileCheck %s -check-prefixes=CHECK,NOVLOPT -; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v,+zvl512b -verify-machineinstrs \ -; RUN: -riscv-enable-vl-optimizer=false | FileCheck %s -check-prefixes=CHECK,NOVLOPT -; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v,+zvl512b -riscv-enable-vl-optimizer \ -; RUN: -verify-machineinstrs | FileCheck %s -check-prefixes=CHECK,VLOPT -; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v,+zvl512b -riscv-enable-vl-optimizer \ -; RUN: -verify-machineinstrs | FileCheck %s -check-prefixes=CHECK,VLOPT +; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v,+zvl512b -verify-machineinstrs | FileCheck %s +; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v,+zvl512b -verify-machineinstrs | FileCheck %s define <2 x i32> @vdot_lane_s32(<2 x i32> noundef %var_1, <8 x i8> noundef %var_3, <8 x i8> noundef %var_5, <8 x i16> %x) { ; CHECK-LABEL: vdot_lane_s32: @@ -40,20 +34,12 @@ declare <vscale x 2 x i16> @llvm.riscv.vnsrl.nxv2i16.nxv2i32.nxv2i16( iXLen); define <vscale x 2 x i16> @intrinsic_vnsrl_wv_nxv2i16_nxv2i32_nxv2i16(<vscale x 2 x i16> %a, <vscale x 2 x i16> %b, iXLen %2, <vscale x 2 x i32> %3, <vscale x 2 x i32> %4, <vscale x 2 x i16> %z) nounwind { -; NOVLOPT-LABEL: intrinsic_vnsrl_wv_nxv2i16_nxv2i32_nxv2i16: -; NOVLOPT: # %bb.0: # %entry -; NOVLOPT-NEXT: vsetvli a1, zero, e16, mf2, ta, ma -; NOVLOPT-NEXT: vwadd.vv v10, v8, v9 -; NOVLOPT-NEXT: vsetvli zero, a0, e16, mf2, ta, ma -; NOVLOPT-NEXT: vnsrl.wv v8, v10, v12 -; NOVLOPT-NEXT: ret -; -; VLOPT-LABEL: intrinsic_vnsrl_wv_nxv2i16_nxv2i32_nxv2i16: -; VLOPT: # %bb.0: # %entry -; VLOPT-NEXT: vsetvli zero, a0, e16, mf2, ta, ma -; VLOPT-NEXT: vwadd.vv v10, v8, v9 -; VLOPT-NEXT: vnsrl.wv v8, v10, v12 -; VLOPT-NEXT: ret +; CHECK-LABEL: intrinsic_vnsrl_wv_nxv2i16_nxv2i32_nxv2i16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma +; CHECK-NEXT: vwadd.vv v10, v8, v9 +; CHECK-NEXT: vnsrl.wv v8, v10, v12 +; CHECK-NEXT: ret entry: %c = sext <vscale x 2 x i16> %a to <vscale x 2 x i32> %d = sext <vscale x 2 x i16> %b to <vscale x 2 x i32> @@ -74,22 +60,13 @@ declare <vscale x 2 x i16> @llvm.riscv.vnclip.nxv2i16.nxv2i32.nxv2i16( iXLen, iXLen); define <vscale x 2 x i16> @vnclip(<vscale x 2 x i16> %a, <vscale x 2 x i16> %b, iXLen %2, <vscale x 2 x i32> %3, <vscale x 2 x i32> %4, <vscale x 2 x i16> %z) nounwind { -; NOVLOPT-LABEL: vnclip: -; NOVLOPT: # %bb.0: # %entry -; NOVLOPT-NEXT: vsetvli a1, zero, e16, mf2, ta, ma -; NOVLOPT-NEXT: vwadd.vv v10, v8, v9 -; NOVLOPT-NEXT: csrwi vxrm, 0 -; NOVLOPT-NEXT: vsetvli zero, a0, e16, mf2, ta, ma -; NOVLOPT-NEXT: vnclip.wv v8, v10, v12 -; NOVLOPT-NEXT: ret -; -; VLOPT-LABEL: vnclip: -; VLOPT: # %bb.0: # %entry -; VLOPT-NEXT: vsetvli zero, a0, e16, mf2, ta, ma -; VLOPT-NEXT: vwadd.vv v10, v8, v9 -; VLOPT-NEXT: csrwi vxrm, 0 -; VLOPT-NEXT: vnclip.wv v8, v10, v12 -; VLOPT-NEXT: ret +; CHECK-LABEL: vnclip: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma +; CHECK-NEXT: vwadd.vv v10, v8, v9 +; CHECK-NEXT: csrwi vxrm, 0 +; CHECK-NEXT: vnclip.wv v8, v10, v12 +; CHECK-NEXT: ret entry: %c = sext <vscale x 2 x i16> %a to <vscale x 2 x i32> %d = sext <vscale x 2 x i16> %b to <vscale x 2 x i32> diff --git a/llvm/test/CodeGen/RISCV/rvv/vl-opt-op-info.mir b/llvm/test/CodeGen/RISCV/rvv/vl-opt-op-info.mir index 52cd3e3..bfa4067 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vl-opt-op-info.mir +++ b/llvm/test/CodeGen/RISCV/rvv/vl-opt-op-info.mir @@ -8,8 +8,10 @@ body: | ; CHECK-LABEL: name: vop_vi ; CHECK: %x:vr = PseudoVADD_VI_M1 $noreg, $noreg, 9, 1, 3 /* e8 */, 0 /* tu, mu */ ; CHECK-NEXT: %y:vr = PseudoVADD_VV_M1 $noreg, %x, $noreg, 1, 3 /* e8 */, 0 /* tu, mu */ + ; CHECK-NEXT: $v8 = COPY %y %x:vr = PseudoVADD_VI_M1 $noreg, $noreg, 9, -1, 3 /* e8 */, 0 %y:vr = PseudoVADD_VV_M1 $noreg, %x, $noreg, 1, 3 /* e8 */, 0 + $v8 = COPY %y ... --- name: vop_vi_incompatible_eew @@ -18,8 +20,10 @@ body: | ; CHECK-LABEL: name: vop_vi_incompatible_eew ; CHECK: %x:vr = PseudoVADD_VI_M1 $noreg, $noreg, 9, -1, 3 /* e8 */, 0 /* tu, mu */ ; CHECK-NEXT: %y:vr = PseudoVADD_VV_M1 $noreg, %x, $noreg, 1, 4 /* e16 */, 0 /* tu, mu */ + ; CHECK-NEXT: $v8 = COPY %y %x:vr = PseudoVADD_VI_M1 $noreg, $noreg, 9, -1, 3 /* e8 */, 0 %y:vr = PseudoVADD_VV_M1 $noreg, %x, $noreg, 1, 4 /* e16 */, 0 + $v8 = COPY %y ... --- name: vop_vi_incompatible_emul @@ -28,8 +32,10 @@ body: | ; CHECK-LABEL: name: vop_vi_incompatible_emul ; CHECK: %x:vr = PseudoVADD_VI_M1 $noreg, $noreg, 9, -1, 3 /* e8 */, 0 /* tu, mu */ ; CHECK-NEXT: %y:vr = PseudoVADD_VV_MF2 $noreg, %x, $noreg, 1, 3 /* e8 */, 0 /* tu, mu */ + ; CHECK-NEXT: $v8 = COPY %y %x:vr = PseudoVADD_VI_M1 $noreg, $noreg, 9, -1, 3 /* e8 */, 0 %y:vr = PseudoVADD_VV_MF2 $noreg, %x, $noreg, 1, 3 /* e8 */, 0 + $v8 = COPY %y ... --- name: vop_vv @@ -38,8 +44,10 @@ body: | ; CHECK-LABEL: name: vop_vv ; CHECK: %x:vr = PseudoVADD_VV_M1 $noreg, $noreg, $noreg, 1, 3 /* e8 */, 0 /* tu, mu */ ; CHECK-NEXT: %y:vr = PseudoVADD_VV_M1 $noreg, %x, $noreg, 1, 3 /* e8 */, 0 /* tu, mu */ + ; CHECK-NEXT: $v8 = COPY %y %x:vr = PseudoVADD_VV_M1 $noreg, $noreg, $noreg, -1, 3 /* e8 */, 0 %y:vr = PseudoVADD_VV_M1 $noreg, %x, $noreg, 1, 3 /* e8 */, 0 + $v8 = COPY %y ... --- name: vop_vv_incompatible_eew @@ -48,9 +56,10 @@ body: | ; CHECK-LABEL: name: vop_vv_incompatible_eew ; CHECK: %x:vr = PseudoVADD_VV_M1 $noreg, $noreg, $noreg, -1, 3 /* e8 */, 0 /* tu, mu */ ; CHECK-NEXT: %y:vr = PseudoVADD_VV_M1 $noreg, %x, $noreg, 1, 4 /* e16 */, 0 /* tu, mu */ + ; CHECK-NEXT: $v8 = COPY %y %x:vr = PseudoVADD_VV_M1 $noreg, $noreg, $noreg, -1, 3 /* e8 */, 0 %y:vr = PseudoVADD_VV_M1 $noreg, %x, $noreg, 1, 4 /* e16 */, 0 - + $v8 = COPY %y ... --- name: vop_vv_incompatible_emul @@ -59,8 +68,10 @@ body: | ; CHECK-LABEL: name: vop_vv_incompatible_emul ; CHECK: %x:vr = PseudoVADD_VV_M1 $noreg, $noreg, $noreg, -1, 3 /* e8 */, 0 /* tu, mu */ ; CHECK-NEXT: %y:vr = PseudoVADD_VV_MF2 $noreg, %x, $noreg, 1, 3 /* e8 */, 0 /* tu, mu */ + ; CHECK-NEXT: $v8 = COPY %y %x:vr = PseudoVADD_VV_M1 $noreg, $noreg, $noreg, -1, 3 /* e8 */, 0 %y:vr = PseudoVADD_VV_MF2 $noreg, %x, $noreg, 1, 3 /* e8 */, 0 + $v8 = COPY %y ... --- name: vwop_vv_vd @@ -69,8 +80,10 @@ body: | ; CHECK-LABEL: name: vwop_vv_vd ; CHECK: early-clobber %x:vr = PseudoVWADD_VV_MF2 $noreg, $noreg, $noreg, 1, 3 /* e8 */, 0 /* tu, mu */ ; CHECK-NEXT: %y:vr = PseudoVADD_VV_M1 $noreg, %x, $noreg, 1, 4 /* e16 */, 0 /* tu, mu */ + ; CHECK-NEXT: $v8 = COPY %y %x:vr = PseudoVWADD_VV_MF2 $noreg, $noreg, $noreg, -1, 3 /* e8 */, 0 %y:vr = PseudoVADD_VV_M1 $noreg, %x, $noreg, 1, 4 /* e16 */, 0 + $v8 = COPY %y ... --- name: vwop_vv_vd_incompatible_eew @@ -79,8 +92,10 @@ body: | ; CHECK-LABEL: name: vwop_vv_vd_incompatible_eew ; CHECK: early-clobber %x:vr = PseudoVWADD_VV_MF2 $noreg, $noreg, $noreg, -1, 3 /* e8 */, 0 /* tu, mu */ ; CHECK-NEXT: %y:vr = PseudoVADD_VV_M1 $noreg, %x, $noreg, 1, 3 /* e8 */, 0 /* tu, mu */ + ; CHECK-NEXT: $v8 = COPY %y %x:vr = PseudoVWADD_VV_MF2 $noreg, $noreg, $noreg, -1, 3 /* e8 */, 0 %y:vr = PseudoVADD_VV_M1 $noreg, %x, $noreg, 1, 3 /* e8 */, 0 + $v8 = COPY %y ... --- name: vwop_vv_vd_incompatible_emul @@ -89,8 +104,10 @@ body: | ; CHECK-LABEL: name: vwop_vv_vd_incompatible_emul ; CHECK: early-clobber %x:vr = PseudoVWADD_VV_MF2 $noreg, $noreg, $noreg, -1, 3 /* e8 */, 0 /* tu, mu */ ; CHECK-NEXT: %y:vr = PseudoVADD_VV_MF2 $noreg, %x, $noreg, 1, 4 /* e16 */, 0 /* tu, mu */ + ; CHECK-NEXT: $v8 = COPY %y %x:vr = PseudoVWADD_VV_MF2 $noreg, $noreg, $noreg, -1, 3 /* e8 */, 0 %y:vr = PseudoVADD_VV_MF2 $noreg, %x, $noreg, 1, 4 /* e8 */, 0 + $v8 = COPY %y ... --- name: vwop_vv_vd_passthru_use @@ -100,9 +117,11 @@ body: | ; CHECK: %x:vr = PseudoVADD_VV_M1 $noreg, $noreg, $noreg, 1, 4 /* e16 */, 0 /* tu, mu */ ; CHECK-NEXT: early-clobber %y:vr = PseudoVWADD_VV_MF2 %x, $noreg, $noreg, 1, 3 /* e8 */, 0 /* tu, mu */ ; CHECK-NEXT: %z:vr = PseudoVADD_VV_M1 $noreg, %y, $noreg, 1, 4 /* e16 */, 0 /* tu, mu */ + ; CHECK-NEXT: $v8 = COPY %z %x:vr = PseudoVADD_VV_M1 $noreg, $noreg, $noreg, -1, 4 /* e16 */, 0 %y:vr = PseudoVWADD_VV_MF2 %x, $noreg, $noreg, 1, 3 /* e8 */, 0 %z:vr = PseudoVADD_VV_M1 $noreg, %y, $noreg, 1, 4 /* e16 */, 0 + $v8 = COPY %z ... --- name: vwop_vv_vd_passthru_use_incompatible_eew @@ -112,9 +131,11 @@ body: | ; CHECK: %x:vr = PseudoVADD_VV_M1 $noreg, $noreg, $noreg, -1, 4 /* e16 */, 0 /* tu, mu */ ; CHECK-NEXT: early-clobber %y:vr = PseudoVWADD_VV_MF2 %x, $noreg, $noreg, 1, 4 /* e16 */, 0 /* tu, mu */ ; CHECK-NEXT: %z:vr = PseudoVADD_VV_M1 $noreg, %y, $noreg, 1, 4 /* e16 */, 0 /* tu, mu */ + ; CHECK-NEXT: $v8 = COPY %z %x:vr = PseudoVADD_VV_M1 $noreg, $noreg, $noreg, -1, 4 /* e16 */, 0 %y:vr = PseudoVWADD_VV_MF2 %x, $noreg, $noreg, 1, 4 /* e16 */, 0 %z:vr = PseudoVADD_VV_M1 $noreg, %y, $noreg, 1, 4 /* e16 */, 0 + $v8 = COPY %z ... --- name: vwop_vv_vd_passthru_use_incompatible_emul @@ -124,9 +145,11 @@ body: | ; CHECK: %x:vr = PseudoVADD_VV_M1 $noreg, $noreg, $noreg, -1, 4 /* e16 */, 0 /* tu, mu */ ; CHECK-NEXT: early-clobber %y:vr = PseudoVWADD_VV_MF4 %x, $noreg, $noreg, 1, 3 /* e8 */, 0 /* tu, mu */ ; CHECK-NEXT: %z:vr = PseudoVADD_VV_MF2 $noreg, %y, $noreg, 1, 4 /* e16 */, 0 /* tu, mu */ + ; CHECK-NEXT: $v8 = COPY %z %x:vr = PseudoVADD_VV_M1 $noreg, $noreg, $noreg, -1, 4 /* e16 */, 0 %y:vr = PseudoVWADD_VV_MF4 %x, $noreg, $noreg, 1, 3 /* e8 */, 0 %z:vr = PseudoVADD_VV_MF2 $noreg, %y, $noreg, 1, 4 /* e16 */, 0 + $v8 = COPY %z ... --- name: vwop_vv_vs2 @@ -135,8 +158,10 @@ body: | ; CHECK-LABEL: name: vwop_vv_vs2 ; CHECK: %x:vr = PseudoVADD_VV_M1 $noreg, $noreg, $noreg, 1, 3 /* e8 */, 0 /* tu, mu */ ; CHECK-NEXT: early-clobber %y:vrm2 = PseudoVWADD_VV_M1 $noreg, %x, $noreg, 1, 3 /* e8 */, 0 /* tu, mu */ + ; CHECK-NEXT: $v8m2 = COPY %y %x:vr = PseudoVADD_VV_M1 $noreg, $noreg, $noreg, -1, 3 /* e8 */, 0 %y:vrm2 = PseudoVWADD_VV_M1 $noreg, %x, $noreg, 1, 3 /* e8 */, 0 + $v8m2 = COPY %y ... --- name: vwop_vv_vs2_incompatible_eew @@ -145,8 +170,10 @@ body: | ; CHECK-LABEL: name: vwop_vv_vs2_incompatible_eew ; CHECK: %x:vr = PseudoVADD_VV_M1 $noreg, $noreg, $noreg, -1, 3 /* e8 */, 0 /* tu, mu */ ; CHECK-NEXT: early-clobber %y:vrm2 = PseudoVWADD_VV_M1 $noreg, %x, $noreg, 1, 4 /* e16 */, 0 /* tu, mu */ + ; CHECK-NEXT: $v8m2 = COPY %y %x:vr = PseudoVADD_VV_M1 $noreg, $noreg, $noreg, -1, 3 /* e8 */, 0 %y:vrm2 = PseudoVWADD_VV_M1 $noreg, %x, $noreg, 1, 4 /* e16 */, 0 + $v8m2 = COPY %y ... --- name: vwop_vv_vs2_incompatible_emul @@ -155,8 +182,10 @@ body: | ; CHECK-LABEL: name: vwop_vv_vs2_incompatible_emul ; CHECK: %x:vr = PseudoVADD_VV_M1 $noreg, $noreg, $noreg, -1, 3 /* e8 */, 0 /* tu, mu */ ; CHECK-NEXT: early-clobber %y:vr = PseudoVWADD_VV_MF2 $noreg, %x, $noreg, 1, 3 /* e8 */, 0 /* tu, mu */ + ; CHECK-NEXT: $v8 = COPY %y %x:vr = PseudoVADD_VV_M1 $noreg, $noreg, $noreg, -1, 3 /* e8 */, 0 %y:vr = PseudoVWADD_VV_MF2 $noreg, %x, $noreg, 1, 3 /* e8 */, 0 + $v8 = COPY %y ... --- name: vwop_vv_vs1 @@ -165,8 +194,10 @@ body: | ; CHECK-LABEL: name: vwop_vv_vs1 ; CHECK: %x:vr = PseudoVADD_VV_M1 $noreg, $noreg, $noreg, 1, 3 /* e8 */, 0 /* tu, mu */ ; CHECK-NEXT: early-clobber %y:vrm2 = PseudoVWADD_VV_M1 $noreg, %x, $noreg, 1, 3 /* e8 */, 0 /* tu, mu */ + ; CHECK-NEXT: $v8m2 = COPY %y %x:vr = PseudoVADD_VV_M1 $noreg, $noreg, $noreg, -1, 3 /* e8 */, 0 %y:vrm2 = PseudoVWADD_VV_M1 $noreg, %x, $noreg, 1, 3 /* e8 */, 0 + $v8m2 = COPY %y ... --- name: vwop_vv_vs1_incompatible_eew @@ -175,8 +206,10 @@ body: | ; CHECK-LABEL: name: vwop_vv_vs1_incompatible_eew ; CHECK: %x:vr = PseudoVADD_VV_M1 $noreg, $noreg, $noreg, -1, 3 /* e8 */, 0 /* tu, mu */ ; CHECK-NEXT: early-clobber %y:vrm2 = PseudoVWADD_VV_M1 $noreg, $noreg, %x, 1, 4 /* e16 */, 0 /* tu, mu */ + ; CHECK-NEXT: $v8m2 = COPY %y %x:vr = PseudoVADD_VV_M1 $noreg, $noreg, $noreg, -1, 3 /* e8 */, 0 %y:vrm2 = PseudoVWADD_VV_M1 $noreg, $noreg, %x, 1, 4 /* e16 */, 0 + $v8m2 = COPY %y ... --- name: vwop_vv_vs1_incompatible_emul @@ -185,8 +218,10 @@ body: | ; CHECK-LABEL: name: vwop_vv_vs1_incompatible_emul ; CHECK: %x:vr = PseudoVADD_VV_M1 $noreg, $noreg, $noreg, -1, 3 /* e8 */, 0 /* tu, mu */ ; CHECK-NEXT: early-clobber %y:vr = PseudoVWADD_VV_MF2 $noreg, $noreg, %x, 1, 3 /* e8 */, 0 /* tu, mu */ + ; CHECK-NEXT: $v8 = COPY %y %x:vr = PseudoVADD_VV_M1 $noreg, $noreg, $noreg, -1, 3 /* e8 */, 0 %y:vr = PseudoVWADD_VV_MF2 $noreg, $noreg, %x, 1, 3 /* e8 */, 0 + $v8 = COPY %y ... --- name: vwop_wv_vd @@ -195,8 +230,10 @@ body: | ; CHECK-LABEL: name: vwop_wv_vd ; CHECK: early-clobber %x:vr = PseudoVWADD_WV_MF2 $noreg, $noreg, $noreg, 1, 3 /* e8 */, 0 /* tu, mu */ ; CHECK-NEXT: %y:vr = PseudoVADD_VV_M1 $noreg, %x, $noreg, 1, 4 /* e16 */, 0 /* tu, mu */ + ; CHECK-NEXT: $v8 = COPY %y %x:vr = PseudoVWADD_WV_MF2 $noreg, $noreg, $noreg, -1, 3 /* e8 */, 0 %y:vr = PseudoVADD_VV_M1 $noreg, %x, $noreg, 1, 4 /* e16 */, 0 + $v8 = COPY %y ... --- name: vwop_wv_vd_incompatible_eew @@ -205,8 +242,10 @@ body: | ; CHECK-LABEL: name: vwop_wv_vd_incompatible_eew ; CHECK: early-clobber %x:vr = PseudoVWADD_WV_MF2 $noreg, $noreg, $noreg, -1, 3 /* e8 */, 0 /* tu, mu */ ; CHECK-NEXT: %y:vr = PseudoVADD_VV_M1 $noreg, %x, $noreg, 1, 3 /* e8 */, 0 /* tu, mu */ + ; CHECK-NEXT: $v8 = COPY %y %x:vr = PseudoVWADD_WV_MF2 $noreg, $noreg, $noreg, -1, 3 /* e8 */, 0 %y:vr = PseudoVADD_VV_M1 $noreg, %x, $noreg, 1, 3 /* e8 */, 0 + $v8 = COPY %y ... --- name: vwop_wv_vd_incompatible_emul @@ -215,8 +254,10 @@ body: | ; CHECK-LABEL: name: vwop_wv_vd_incompatible_emul ; CHECK: early-clobber %x:vr = PseudoVWADD_WV_MF2 $noreg, $noreg, $noreg, -1, 3 /* e8 */, 0 /* tu, mu */ ; CHECK-NEXT: %y:vr = PseudoVADD_VV_MF2 $noreg, %x, $noreg, 1, 4 /* e16 */, 0 /* tu, mu */ + ; CHECK-NEXT: $v8 = COPY %y %x:vr = PseudoVWADD_WV_MF2 $noreg, $noreg, $noreg, -1, 3 /* e8 */, 0 %y:vr = PseudoVADD_VV_MF2 $noreg, %x, $noreg, 1, 4 /* e8 */, 0 + $v8 = COPY %y ... --- name: vwop_wv_vd_passthru_use @@ -226,9 +267,11 @@ body: | ; CHECK: %x:vr = PseudoVADD_VV_M1 $noreg, $noreg, $noreg, 1, 4 /* e16 */, 0 /* tu, mu */ ; CHECK-NEXT: early-clobber %y:vr = PseudoVWADD_WV_MF2 %x, $noreg, $noreg, 1, 3 /* e8 */, 0 /* tu, mu */ ; CHECK-NEXT: %z:vr = PseudoVADD_VV_M1 $noreg, %y, $noreg, 1, 4 /* e16 */, 0 /* tu, mu */ + ; CHECK-NEXT: $v8 = COPY %z %x:vr = PseudoVADD_VV_M1 $noreg, $noreg, $noreg, -1, 4 /* e16 */, 0 %y:vr = PseudoVWADD_WV_MF2 %x, $noreg, $noreg, 1, 3 /* e8 */, 0 %z:vr = PseudoVADD_VV_M1 $noreg, %y, $noreg, 1, 4 /* e16 */, 0 + $v8 = COPY %z ... --- name: vwop_wv_vd_passthru_use_incompatible_eew @@ -238,9 +281,11 @@ body: | ; CHECK: %x:vr = PseudoVADD_VV_M1 $noreg, $noreg, $noreg, -1, 4 /* e16 */, 0 /* tu, mu */ ; CHECK-NEXT: early-clobber %y:vr = PseudoVWADD_WV_MF2 %x, $noreg, $noreg, 1, 4 /* e16 */, 0 /* tu, mu */ ; CHECK-NEXT: %z:vr = PseudoVADD_VV_M1 $noreg, %y, $noreg, 1, 4 /* e16 */, 0 /* tu, mu */ + ; CHECK-NEXT: $v8 = COPY %z %x:vr = PseudoVADD_VV_M1 $noreg, $noreg, $noreg, -1, 4 /* e16 */, 0 %y:vr = PseudoVWADD_WV_MF2 %x, $noreg, $noreg, 1, 4 /* e16 */, 0 %z:vr = PseudoVADD_VV_M1 $noreg, %y, $noreg, 1, 4 /* e16 */, 0 + $v8 = COPY %z ... --- name: vwop_wv_vd_passthru_use_incompatible_emul @@ -250,9 +295,11 @@ body: | ; CHECK: %x:vr = PseudoVADD_VV_M1 $noreg, $noreg, $noreg, -1, 4 /* e16 */, 0 /* tu, mu */ ; CHECK-NEXT: early-clobber %y:vr = PseudoVWADD_WV_MF4 %x, $noreg, $noreg, 1, 3 /* e8 */, 0 /* tu, mu */ ; CHECK-NEXT: %z:vr = PseudoVADD_VV_MF2 $noreg, %y, $noreg, 1, 4 /* e16 */, 0 /* tu, mu */ + ; CHECK-NEXT: $v8 = COPY %z %x:vr = PseudoVADD_VV_M1 $noreg, $noreg, $noreg, -1, 4 /* e16 */, 0 %y:vr = PseudoVWADD_WV_MF4 %x, $noreg, $noreg, 1, 3 /* e8 */, 0 %z:vr = PseudoVADD_VV_MF2 $noreg, %y, $noreg, 1, 4 /* e16 */, 0 + $v8 = COPY %z ... --- name: vwop_wv_vs2 @@ -261,8 +308,10 @@ body: | ; CHECK-LABEL: name: vwop_wv_vs2 ; CHECK: %x:vrm2 = PseudoVADD_VV_M2 $noreg, $noreg, $noreg, 1, 4 /* e16 */, 0 /* tu, mu */ ; CHECK-NEXT: early-clobber %y:vrm2 = PseudoVWADD_WV_M1 $noreg, %x, $noreg, 1, 3 /* e8 */, 0 /* tu, mu */ + ; CHECK-NEXT: $v8m2 = COPY %y %x:vrm2 = PseudoVADD_VV_M2 $noreg, $noreg, $noreg, -1, 4 /* e16 */, 0 %y:vrm2 = PseudoVWADD_WV_M1 $noreg, %x, $noreg, 1, 3 /* e8 */, 0 + $v8m2 = COPY %y ... --- name: vwop_wv_vs2_incompatible_eew @@ -271,8 +320,10 @@ body: | ; CHECK-LABEL: name: vwop_wv_vs2_incompatible_eew ; CHECK: %x:vrm2 = PseudoVADD_VV_M2 $noreg, $noreg, $noreg, -1, 3 /* e8 */, 0 /* tu, mu */ ; CHECK-NEXT: early-clobber %y:vrm2 = PseudoVWADD_WV_M1 $noreg, %x, $noreg, 1, 3 /* e8 */, 0 /* tu, mu */ + ; CHECK-NEXT: $v8m2 = COPY %y %x:vrm2 = PseudoVADD_VV_M2 $noreg, $noreg, $noreg, -1, 3 /* e8 */, 0 %y:vrm2 = PseudoVWADD_WV_M1 $noreg, %x, $noreg, 1, 3 /* e8 */, 0 + $v8m2 = COPY %y ... --- name: vwop_wv_vs2_incompatible_emul @@ -281,8 +332,10 @@ body: | ; CHECK-LABEL: name: vwop_wv_vs2_incompatible_emul ; CHECK: %x:vr = PseudoVADD_VV_MF2 $noreg, $noreg, $noreg, -1, 4 /* e16 */, 0 /* tu, mu */ ; CHECK-NEXT: early-clobber %y:vr = PseudoVWADD_WV_MF2 $noreg, %x, $noreg, 1, 3 /* e8 */, 0 /* tu, mu */ + ; CHECK-NEXT: $v8 = COPY %y %x:vr = PseudoVADD_VV_MF2 $noreg, $noreg, $noreg, -1, 4 /* e16 */, 0 %y:vr = PseudoVWADD_WV_MF2 $noreg, %x, $noreg, 1, 3 /* e8 */, 0 + $v8 = COPY %y ... --- name: vwop_wv_vs1 @@ -291,8 +344,10 @@ body: | ; CHECK-LABEL: name: vwop_wv_vs1 ; CHECK: %x:vr = PseudoVADD_VV_M1 $noreg, $noreg, $noreg, 1, 3 /* e8 */, 0 /* tu, mu */ ; CHECK-NEXT: early-clobber %y:vrm2 = PseudoVWADD_WV_M1 $noreg, $noreg, %x, 1, 3 /* e8 */, 0 /* tu, mu */ + ; CHECK-NEXT: $v8m2 = COPY %y %x:vr = PseudoVADD_VV_M1 $noreg, $noreg, $noreg, -1, 3 /* e8 */, 0 %y:vrm2 = PseudoVWADD_WV_M1 $noreg, $noreg, %x, 1, 3 /* e8 */, 0 + $v8m2 = COPY %y ... --- name: vwop_wv_vs1_incompatible_eew @@ -301,8 +356,10 @@ body: | ; CHECK-LABEL: name: vwop_wv_vs1_incompatible_eew ; CHECK: %x:vr = PseudoVADD_VV_M1 $noreg, $noreg, $noreg, -1, 4 /* e16 */, 0 /* tu, mu */ ; CHECK-NEXT: early-clobber %y:vrm2 = PseudoVWADD_WV_M1 $noreg, $noreg, %x, 1, 3 /* e8 */, 0 /* tu, mu */ + ; CHECK-NEXT: $v8m2 = COPY %y %x:vr = PseudoVADD_VV_M1 $noreg, $noreg, $noreg, -1, 4 /* e16 */, 0 %y:vrm2 = PseudoVWADD_WV_M1 $noreg, $noreg, %x, 1, 3 /* e8 */, 0 + $v8m2 = COPY %y ... --- name: vwop_wv_vs1_incompatible_emul @@ -311,8 +368,10 @@ body: | ; CHECK-LABEL: name: vwop_wv_vs1_incompatible_emul ; CHECK: %x:vr = PseudoVADD_VV_MF2 $noreg, $noreg, $noreg, -1, 3 /* e8 */, 0 /* tu, mu */ ; CHECK-NEXT: early-clobber %y:vrm2 = PseudoVWADD_WV_M1 $noreg, $noreg, %x, 1, 3 /* e8 */, 0 /* tu, mu */ + ; CHECK-NEXT: $v8m2 = COPY %y %x:vr = PseudoVADD_VV_MF2 $noreg, $noreg, $noreg, -1, 3 /* e8 */, 0 %y:vrm2 = PseudoVWADD_WV_M1 $noreg, $noreg, %x, 1, 3 /* e8 */, 0 + $v8m2 = COPY %y ... --- name: tied_vwop_wv_vs1 @@ -321,8 +380,10 @@ body: | ; CHECK-LABEL: name: tied_vwop_wv_vs1 ; CHECK: %x:vr = PseudoVADD_VV_M1 $noreg, $noreg, $noreg, 1, 3 /* e8 */, 0 /* tu, mu */ ; CHECK-NEXT: early-clobber %y:vrm2 = PseudoVWADD_WV_M1_TIED $noreg, %x, 1, 3 /* e8 */, 0 /* tu, mu */ + ; CHECK-NEXT: $v8m2 = COPY %y %x:vr = PseudoVADD_VV_M1 $noreg, $noreg, $noreg, -1, 3 /* e8 */, 0 %y:vrm2 = PseudoVWADD_WV_M1_TIED $noreg, %x, 1, 3 /* e8 */, 0 + $v8m2 = COPY %y ... --- name: tied_vwop_wv_vs1_incompatible_eew @@ -331,8 +392,10 @@ body: | ; CHECK-LABEL: name: tied_vwop_wv_vs1_incompatible_eew ; CHECK: %x:vr = PseudoVADD_VV_M1 $noreg, $noreg, $noreg, -1, 4 /* e16 */, 0 /* tu, mu */ ; CHECK-NEXT: early-clobber %y:vrm2 = PseudoVWADD_WV_M1_TIED $noreg, %x, 1, 3 /* e8 */, 0 /* tu, mu */ + ; CHECK-NEXT: $v8m2 = COPY %y %x:vr = PseudoVADD_VV_M1 $noreg, $noreg, $noreg, -1, 4 /* e16 */, 0 %y:vrm2 = PseudoVWADD_WV_M1_TIED $noreg, %x, 1, 3 /* e8 */, 0 + $v8m2 = COPY %y ... --- name: tied_vwop_wv_vs1_incompatible_emul @@ -341,8 +404,10 @@ body: | ; CHECK-LABEL: name: tied_vwop_wv_vs1_incompatible_emul ; CHECK: %x:vr = PseudoVADD_VV_MF2 $noreg, $noreg, $noreg, -1, 3 /* e8 */, 0 /* tu, mu */ ; CHECK-NEXT: early-clobber %y:vrm2 = PseudoVWADD_WV_M1_TIED $noreg, %x, 1, 3 /* e8 */, 0 /* tu, mu */ + ; CHECK-NEXT: $v8m2 = COPY %y %x:vr = PseudoVADD_VV_MF2 $noreg, $noreg, $noreg, -1, 3 /* e8 */, 0 %y:vrm2 = PseudoVWADD_WV_M1_TIED $noreg, %x, 1, 3 /* e8 */, 0 + $v8m2 = COPY %y ... --- name: vop_vf2_vd @@ -351,8 +416,10 @@ body: | ; CHECK-LABEL: name: vop_vf2_vd ; CHECK: early-clobber %x:vr = PseudoVZEXT_VF2_M1 $noreg, $noreg, 1, 4 /* e16 */, 0 /* tu, mu */ ; CHECK-NEXT: %y:vr = PseudoVADD_VV_M1 $noreg, %x, $noreg, 1, 4 /* e16 */, 0 /* tu, mu */ + ; CHECK-NEXT: $v8 = COPY %y %x:vr = PseudoVZEXT_VF2_M1 $noreg, $noreg, -1, 4 /* e16 */, 0 %y:vr = PseudoVADD_VV_M1 $noreg, %x, $noreg, 1, 4 /* e16 */, 0 + $v8 = COPY %y ... --- name: vop_vf2_vd_incompatible_eew @@ -361,8 +428,10 @@ body: | ; CHECK-LABEL: name: vop_vf2_vd_incompatible_eew ; CHECK: early-clobber %x:vr = PseudoVZEXT_VF2_M1 $noreg, $noreg, -1, 5 /* e32 */, 0 /* tu, mu */ ; CHECK-NEXT: %y:vr = PseudoVADD_VV_M1 $noreg, %x, $noreg, 1, 4 /* e16 */, 0 /* tu, mu */ + ; CHECK-NEXT: $v8 = COPY %y %x:vr = PseudoVZEXT_VF2_M1 $noreg, $noreg, -1, 5 /* e32 */, 0 %y:vr = PseudoVADD_VV_M1 $noreg, %x, $noreg, 1, 4 /* e16 */, 0 + $v8 = COPY %y ... --- name: vop_vf2_vd_incompatible_emul @@ -371,8 +440,10 @@ body: | ; CHECK-LABEL: name: vop_vf2_vd_incompatible_emul ; CHECK: early-clobber %x:vr = PseudoVZEXT_VF2_MF2 $noreg, $noreg, -1, 4 /* e16 */, 0 /* tu, mu */ ; CHECK-NEXT: %y:vr = PseudoVADD_VV_M1 $noreg, %x, $noreg, 1, 4 /* e16 */, 0 /* tu, mu */ + ; CHECK-NEXT: $v8 = COPY %y %x:vr = PseudoVZEXT_VF2_MF2 $noreg, $noreg, -1, 4 /* e16 */, 0 %y:vr = PseudoVADD_VV_M1 $noreg, %x, $noreg, 1, 4 /* e16 */, 0 + $v8 = COPY %y ... --- name: vop_vf2_vs2 @@ -381,8 +452,10 @@ body: | ; CHECK-LABEL: name: vop_vf2_vs2 ; CHECK: %x:vr = PseudoVADD_VV_MF2 $noreg, $noreg, $noreg, 1, 3 /* e8 */, 0 /* tu, mu */ ; CHECK-NEXT: early-clobber %y:vr = PseudoVZEXT_VF2_M1 $noreg, %x, 1, 4 /* e16 */, 0 /* tu, mu */ + ; CHECK-NEXT: $v8 = COPY %y %x:vr = PseudoVADD_VV_MF2 $noreg, $noreg, $noreg, -1, 3 /* e8 */, 0 %y:vr = PseudoVZEXT_VF2_M1 $noreg, %x, 1, 4 /* e16 */, 0 + $v8 = COPY %y ... --- name: vop_vf2_vs2_incompatible_eew @@ -391,8 +464,10 @@ body: | ; CHECK-LABEL: name: vop_vf2_vs2_incompatible_eew ; CHECK: %x:vr = PseudoVADD_VV_MF2 $noreg, $noreg, $noreg, -1, 4 /* e16 */, 0 /* tu, mu */ ; CHECK-NEXT: early-clobber %y:vr = PseudoVZEXT_VF2_M1 $noreg, %x, 1, 4 /* e16 */, 0 /* tu, mu */ + ; CHECK-NEXT: $v8 = COPY %y %x:vr = PseudoVADD_VV_MF2 $noreg, $noreg, $noreg, -1, 4 /* e16 */, 0 %y:vr = PseudoVZEXT_VF2_M1 $noreg, %x, 1, 4 /* e16 */, 0 + $v8 = COPY %y ... --- name: vop_vf2_vs2_incompatible_emul @@ -401,8 +476,10 @@ body: | ; CHECK-LABEL: name: vop_vf2_vs2_incompatible_emul ; CHECK: %x:vr = PseudoVADD_VV_M1 $noreg, $noreg, $noreg, -1, 3 /* e8 */, 0 /* tu, mu */ ; CHECK-NEXT: early-clobber %y:vr = PseudoVZEXT_VF2_M1 $noreg, %x, 1, 4 /* e16 */, 0 /* tu, mu */ + ; CHECK-NEXT: $v8 = COPY %y %x:vr = PseudoVADD_VV_M1 $noreg, $noreg, $noreg, -1, 3 /* e8 */, 0 %y:vr = PseudoVZEXT_VF2_M1 $noreg, %x, 1, 4 /* e16 */, 0 + $v8 = COPY %y ... --- name: vop_vf4_vd @@ -411,8 +488,10 @@ body: | ; CHECK-LABEL: name: vop_vf4_vd ; CHECK: early-clobber %x:vr = PseudoVZEXT_VF4_M1 $noreg, $noreg, 1, 5 /* e32 */, 0 /* tu, mu */ ; CHECK-NEXT: %y:vr = PseudoVADD_VV_M1 $noreg, %x, $noreg, 1, 5 /* e32 */, 0 /* tu, mu */ + ; CHECK-NEXT: $v8 = COPY %y %x:vr = PseudoVZEXT_VF4_M1 $noreg, $noreg, -1, 5 /* e32 */, 0 %y:vr = PseudoVADD_VV_M1 $noreg, %x, $noreg, 1, 5 /* e32 */, 0 + $v8 = COPY %y ... --- name: vop_vf4_vd_incompatible_eew @@ -421,8 +500,10 @@ body: | ; CHECK-LABEL: name: vop_vf4_vd_incompatible_eew ; CHECK: early-clobber %x:vr = PseudoVZEXT_VF4_M1 $noreg, $noreg, -1, 5 /* e32 */, 0 /* tu, mu */ ; CHECK-NEXT: %y:vr = PseudoVADD_VV_M1 $noreg, %x, $noreg, 1, 4 /* e16 */, 0 /* tu, mu */ + ; CHECK-NEXT: $v8 = COPY %y %x:vr = PseudoVZEXT_VF4_M1 $noreg, $noreg, -1, 5 /* e32 */, 0 %y:vr = PseudoVADD_VV_M1 $noreg, %x, $noreg, 1, 4 /* e16 */, 0 + $v8 = COPY %y ... --- name: vop_vf4_vd_incompatible_emul @@ -431,8 +512,10 @@ body: | ; CHECK-LABEL: name: vop_vf4_vd_incompatible_emul ; CHECK: early-clobber %x:vr = PseudoVZEXT_VF4_MF2 $noreg, $noreg, -1, 5 /* e32 */, 0 /* tu, mu */ ; CHECK-NEXT: %y:vr = PseudoVADD_VV_M1 $noreg, %x, $noreg, 1, 5 /* e32 */, 0 /* tu, mu */ + ; CHECK-NEXT: $v8 = COPY %y %x:vr = PseudoVZEXT_VF4_MF2 $noreg, $noreg, -1, 5 /* e32 */, 0 %y:vr = PseudoVADD_VV_M1 $noreg, %x, $noreg, 1, 5 /* e32 */, 0 + $v8 = COPY %y ... --- name: vop_vf4_vs2 @@ -441,8 +524,10 @@ body: | ; CHECK-LABEL: name: vop_vf4_vs2 ; CHECK: %x:vr = PseudoVADD_VV_MF4 $noreg, $noreg, $noreg, 1, 3 /* e8 */, 0 /* tu, mu */ ; CHECK-NEXT: early-clobber %y:vr = PseudoVZEXT_VF4_M1 $noreg, %x, 1, 5 /* e32 */, 0 /* tu, mu */ + ; CHECK-NEXT: $v8 = COPY %y %x:vr = PseudoVADD_VV_MF4 $noreg, $noreg, $noreg, -1, 3 /* e8 */, 0 %y:vr = PseudoVZEXT_VF4_M1 $noreg, %x, 1, 5 /* e32 */, 0 + $v8 = COPY %y ... --- name: vop_vf4_vs2_incompatible_eew @@ -451,8 +536,10 @@ body: | ; CHECK-LABEL: name: vop_vf4_vs2_incompatible_eew ; CHECK: %x:vr = PseudoVADD_VV_MF4 $noreg, $noreg, $noreg, -1, 4 /* e16 */, 0 /* tu, mu */ ; CHECK-NEXT: early-clobber %y:vr = PseudoVZEXT_VF4_M1 $noreg, %x, 1, 5 /* e32 */, 0 /* tu, mu */ + ; CHECK-NEXT: $v8 = COPY %y %x:vr = PseudoVADD_VV_MF4 $noreg, $noreg, $noreg, -1, 4 /* e16 */, 0 %y:vr = PseudoVZEXT_VF4_M1 $noreg, %x, 1, 5 /* e32 */, 0 + $v8 = COPY %y ... --- name: vop_vf4_vs2_incompatible_emul @@ -461,8 +548,10 @@ body: | ; CHECK-LABEL: name: vop_vf4_vs2_incompatible_emul ; CHECK: %x:vr = PseudoVADD_VV_MF2 $noreg, $noreg, $noreg, -1, 3 /* e8 */, 0 /* tu, mu */ ; CHECK-NEXT: early-clobber %y:vr = PseudoVZEXT_VF4_M1 $noreg, %x, 1, 5 /* e32 */, 0 /* tu, mu */ + ; CHECK-NEXT: $v8 = COPY %y %x:vr = PseudoVADD_VV_MF2 $noreg, $noreg, $noreg, -1, 3 /* e8 */, 0 %y:vr = PseudoVZEXT_VF4_M1 $noreg, %x, 1, 5 /* e32 */, 0 + $v8 = COPY %y ... --- name: vop_vf8_vd @@ -471,8 +560,10 @@ body: | ; CHECK-LABEL: name: vop_vf8_vd ; CHECK: early-clobber %x:vr = PseudoVZEXT_VF8_M1 $noreg, $noreg, 1, 6 /* e64 */, 0 /* tu, mu */ ; CHECK-NEXT: %y:vr = PseudoVADD_VV_M1 $noreg, %x, $noreg, 1, 6 /* e64 */, 0 /* tu, mu */ + ; CHECK-NEXT: $v8 = COPY %y %x:vr = PseudoVZEXT_VF8_M1 $noreg, $noreg, -1, 6 /* e64 */, 0 %y:vr = PseudoVADD_VV_M1 $noreg, %x, $noreg, 1, 6 /* e64 */, 0 + $v8 = COPY %y ... --- name: vop_vf8_vd_incompatible_eew @@ -481,8 +572,10 @@ body: | ; CHECK-LABEL: name: vop_vf8_vd_incompatible_eew ; CHECK: early-clobber %x:vr = PseudoVZEXT_VF8_M1 $noreg, $noreg, -1, 6 /* e64 */, 0 /* tu, mu */ ; CHECK-NEXT: %y:vr = PseudoVADD_VV_M1 $noreg, %x, $noreg, 1, 5 /* e32 */, 0 /* tu, mu */ + ; CHECK-NEXT: $v8 = COPY %y %x:vr = PseudoVZEXT_VF8_M1 $noreg, $noreg, -1, 6 /* e64 */, 0 %y:vr = PseudoVADD_VV_M1 $noreg, %x, $noreg, 1, 5 /* e32 */, 0 + $v8 = COPY %y ... --- name: vop_vf8_vd_incompatible_emul @@ -491,8 +584,10 @@ body: | ; CHECK-LABEL: name: vop_vf8_vd_incompatible_emul ; CHECK: early-clobber %x:vr = PseudoVZEXT_VF8_M1 $noreg, $noreg, -1, 6 /* e64 */, 0 /* tu, mu */ ; CHECK-NEXT: %y:vr = PseudoVADD_VV_MF2 $noreg, %x, $noreg, 1, 6 /* e64 */, 0 /* tu, mu */ + ; CHECK-NEXT: $v8 = COPY %y %x:vr = PseudoVZEXT_VF8_M1 $noreg, $noreg, -1, 6 /* e64 */, 0 %y:vr = PseudoVADD_VV_MF2 $noreg, %x, $noreg, 1, 6 /* e64 */, 0 + $v8 = COPY %y ... --- name: vop_vf8_vs2 @@ -501,8 +596,10 @@ body: | ; CHECK-LABEL: name: vop_vf8_vs2 ; CHECK: %x:vr = PseudoVADD_VV_MF8 $noreg, $noreg, $noreg, 1, 3 /* e8 */, 0 /* tu, mu */ ; CHECK-NEXT: early-clobber %y:vr = PseudoVZEXT_VF8_M1 $noreg, %x, 1, 6 /* e64 */, 0 /* tu, mu */ + ; CHECK-NEXT: $v8 = COPY %y %x:vr = PseudoVADD_VV_MF8 $noreg, $noreg, $noreg, -1, 3 /* e8 */, 0 %y:vr = PseudoVZEXT_VF8_M1 $noreg, %x, 1, 6 /* e64 */, 0 + $v8 = COPY %y ... --- name: vop_vf8_vs2_incompatible_eew @@ -511,8 +608,10 @@ body: | ; CHECK-LABEL: name: vop_vf8_vs2_incompatible_eew ; CHECK: %x:vr = PseudoVADD_VV_MF8 $noreg, $noreg, $noreg, -1, 4 /* e16 */, 0 /* tu, mu */ ; CHECK-NEXT: early-clobber %y:vr = PseudoVZEXT_VF8_M1 $noreg, %x, 1, 6 /* e64 */, 0 /* tu, mu */ + ; CHECK-NEXT: $v8 = COPY %y %x:vr = PseudoVADD_VV_MF8 $noreg, $noreg, $noreg, -1, 4 /* e16 */, 0 %y:vr = PseudoVZEXT_VF8_M1 $noreg, %x, 1, 6 /* e64 */, 0 + $v8 = COPY %y ... --- name: vop_vf8_vs2_incompatible_emul @@ -521,8 +620,10 @@ body: | ; CHECK-LABEL: name: vop_vf8_vs2_incompatible_emul ; CHECK: %x:vr = PseudoVADD_VV_MF4 $noreg, $noreg, $noreg, -1, 3 /* e8 */, 0 /* tu, mu */ ; CHECK-NEXT: early-clobber %y:vr = PseudoVZEXT_VF8_M1 $noreg, %x, 1, 6 /* e64 */, 0 /* tu, mu */ + ; CHECK-NEXT: $v8 = COPY %y %x:vr = PseudoVADD_VV_MF4 $noreg, $noreg, $noreg, -1, 3 /* e8 */, 0 %y:vr = PseudoVZEXT_VF8_M1 $noreg, %x, 1, 6 /* e64 */, 0 + $v8 = COPY %y ... --- name: vnop_wv_vd @@ -531,8 +632,10 @@ body: | ; CHECK-LABEL: name: vnop_wv_vd ; CHECK: early-clobber %x:vr = PseudoVNSRL_WV_M1 $noreg, $noreg, $noreg, 1, 3 /* e8 */, 0 /* tu, mu */ ; CHECK-NEXT: %y:vr = PseudoVADD_VV_M1 $noreg, %x, $noreg, 1, 3 /* e8 */, 0 /* tu, mu */ + ; CHECK-NEXT: $v8 = COPY %y %x:vr = PseudoVNSRL_WV_M1 $noreg, $noreg, $noreg, -1, 3 /* e8 */, 0 %y:vr = PseudoVADD_VV_M1 $noreg, %x, $noreg, 1, 3 /* e8 */, 0 + $v8 = COPY %y ... --- name: vnop_wv_vd_unsupported_eew @@ -541,8 +644,10 @@ body: | ; CHECK-LABEL: name: vnop_wv_vd_unsupported_eew ; CHECK: early-clobber %x:vr = PseudoVNSRL_WV_M1 $noreg, $noreg, $noreg, -1, 4 /* e16 */, 0 /* tu, mu */ ; CHECK-NEXT: %y:vr = PseudoVADD_VV_M1 $noreg, %x, $noreg, 1, 3 /* e8 */, 0 /* tu, mu */ + ; CHECK-NEXT: $v8 = COPY %y %x:vr = PseudoVNSRL_WV_M1 $noreg, $noreg, $noreg, -1, 4 /* e16 */, 0 %y:vr = PseudoVADD_VV_M1 $noreg, %x, $noreg, 1, 3 /* e8 */, 0 + $v8 = COPY %y ... --- name: vnop_wv_vd_unsupported_emul @@ -551,8 +656,10 @@ body: | ; CHECK-LABEL: name: vnop_wv_vd_unsupported_emul ; CHECK: %x:vr = PseudoVNSRL_WV_MF2 $noreg, $noreg, $noreg, -1, 3 /* e8 */, 0 /* tu, mu */ ; CHECK-NEXT: %y:vr = PseudoVADD_VV_M1 $noreg, %x, $noreg, 1, 3 /* e8 */, 0 /* tu, mu */ + ; CHECK-NEXT: $v8 = COPY %y %x:vr = PseudoVNSRL_WV_MF2 $noreg, $noreg, $noreg, -1, 3 /* e8 */, 0 %y:vr = PseudoVADD_VV_M1 $noreg, %x, $noreg, 1, 3 /* e8 */, 0 + $v8 = COPY %y ... --- name: vnop_wv_vd_passthru_use @@ -562,9 +669,11 @@ body: | ; CHECK: %x:vr = PseudoVADD_VV_M1 $noreg, $noreg, $noreg, 1, 3 /* e8 */, 0 /* tu, mu */ ; CHECK-NEXT: early-clobber %y:vr = PseudoVNSRL_WV_M1 %x, $noreg, $noreg, 1, 3 /* e8 */, 0 /* tu, mu */ ; CHECK-NEXT: %z:vr = PseudoVADD_VV_M1 $noreg, %y, $noreg, 1, 3 /* e8 */, 0 /* tu, mu */ + ; CHECK-NEXT: $v8 = COPY %z %x:vr = PseudoVADD_VV_M1 $noreg, $noreg, $noreg, -1, 3 /* e8 */, 0 %y:vr = PseudoVNSRL_WV_M1 %x, $noreg, $noreg, 1, 3 /* e8 */, 0 %z:vr = PseudoVADD_VV_M1 $noreg, %y, $noreg, 1, 3 /* e8 */, 0 + $v8 = COPY %z ... --- name: vnop_wv_vd_passthru_use_incompatible_eew @@ -574,9 +683,11 @@ body: | ; CHECK: %x:vr = PseudoVADD_VV_M1 $noreg, $noreg, $noreg, -1, 3 /* e8 */, 0 /* tu, mu */ ; CHECK-NEXT: early-clobber %y:vr = PseudoVNSRL_WV_M1 %x, $noreg, $noreg, 1, 4 /* e16 */, 0 /* tu, mu */ ; CHECK-NEXT: %z:vr = PseudoVADD_VV_M1 $noreg, %y, $noreg, 1, 4 /* e16 */, 0 /* tu, mu */ + ; CHECK-NEXT: $v8 = COPY %z %x:vr = PseudoVADD_VV_M1 $noreg, $noreg, $noreg, -1, 3 /* e8 */, 0 %y:vr = PseudoVNSRL_WV_M1 %x, $noreg, $noreg, 1, 4 /* e16 */, 0 %z:vr = PseudoVADD_VV_M1 $noreg, %y, $noreg, 1, 4 /* e16 */, 0 + $v8 = COPY %z ... --- name: vnop_wv_vd_passthru_use_unsupported_emul @@ -586,9 +697,11 @@ body: | ; CHECK: %x:vr = PseudoVADD_VV_M1 $noreg, $noreg, $noreg, -1, 3 /* e8 */, 0 /* tu, mu */ ; CHECK-NEXT: %y:vr = PseudoVNSRL_WV_MF2 %x, $noreg, $noreg, 1, 3 /* e8 */, 0 /* tu, mu */ ; CHECK-NEXT: %z:vr = PseudoVADD_VV_MF2 $noreg, %y, $noreg, 1, 3 /* e8 */, 0 /* tu, mu */ + ; CHECK-NEXT: $v8 = COPY %z %x:vr = PseudoVADD_VV_M1 $noreg, $noreg, $noreg, -1, 3 /* e8 */, 0 %y:vr = PseudoVNSRL_WV_MF2 %x, $noreg, $noreg, 1, 3 /* e8 */, 0 %z:vr = PseudoVADD_VV_MF2 $noreg, %y, $noreg, 1, 3 /* e8 */, 0 + $v8 = COPY %z ... --- name: vnop_wv_vs2 @@ -597,8 +710,10 @@ body: | ; CHECK-LABEL: name: vnop_wv_vs2 ; CHECK: %x:vr = PseudoVADD_VV_M1 $noreg, $noreg, $noreg, 1, 4 /* e16 */, 0 /* tu, mu */ ; CHECK-NEXT: %y:vr = PseudoVNSRL_WV_MF2 $noreg, %x, $noreg, 1, 3 /* e8 */, 0 /* tu, mu */ + ; CHECK-NEXT: $v8 = COPY %y %x:vr = PseudoVADD_VV_M1 $noreg, $noreg, $noreg, -1, 4 /* e16 */, 0 %y:vr = PseudoVNSRL_WV_MF2 $noreg, %x, $noreg, 1, 3 /* e8 */, 0 + $v8 = COPY %y ... --- name: vnop_wv_vs2_incompatible_eew @@ -607,8 +722,10 @@ body: | ; CHECK-LABEL: name: vnop_wv_vs2_incompatible_eew ; CHECK: %x:vr = PseudoVADD_VV_M1 $noreg, $noreg, $noreg, -1, 3 /* e8 */, 0 /* tu, mu */ ; CHECK-NEXT: %y:vr = PseudoVNSRL_WV_MF2 $noreg, %x, $noreg, 1, 3 /* e8 */, 0 /* tu, mu */ + ; CHECK-NEXT: $v8 = COPY %y %x:vr = PseudoVADD_VV_M1 $noreg, $noreg, $noreg, -1, 3 /* e8 */, 0 %y:vr = PseudoVNSRL_WV_MF2 $noreg, %x, $noreg, 1, 3 /* e8 */, 0 + $v8 = COPY %y ... --- name: vnop_wv_vs2_incompatible_emul @@ -617,8 +734,10 @@ body: | ; CHECK-LABEL: name: vnop_wv_vs2_incompatible_emul ; CHECK: %x:vr = PseudoVADD_VV_MF2 $noreg, $noreg, $noreg, -1, 4 /* e16 */, 0 /* tu, mu */ ; CHECK-NEXT: %y:vr = PseudoVNSRL_WV_MF2 $noreg, %x, $noreg, 1, 3 /* e8 */, 0 /* tu, mu */ + ; CHECK-NEXT: $v8 = COPY %y %x:vr = PseudoVADD_VV_MF2 $noreg, $noreg, $noreg, -1, 4 /* e16 */, 0 %y:vr = PseudoVNSRL_WV_MF2 $noreg, %x, $noreg, 1, 3 /* e8 */, 0 + $v8 = COPY %y ... --- name: vnop_wv_vs1 @@ -627,8 +746,10 @@ body: | ; CHECK-LABEL: name: vnop_wv_vs1 ; CHECK: %x:vr = PseudoVADD_VV_MF2 $noreg, $noreg, $noreg, 1, 3 /* e8 */, 0 /* tu, mu */ ; CHECK-NEXT: %y:vr = PseudoVNSRL_WV_MF2 $noreg, $noreg, %x, 1, 3 /* e8 */, 0 /* tu, mu */ + ; CHECK-NEXT: $v8 = COPY %y %x:vr = PseudoVADD_VV_MF2 $noreg, $noreg, $noreg, -1, 3 /* e8 */, 0 %y:vr = PseudoVNSRL_WV_MF2 $noreg, $noreg, %x, 1, 3 /* e8 */, 0 + $v8 = COPY %y ... --- name: vnop_wv_vs1_incompatible_eew @@ -637,8 +758,10 @@ body: | ; CHECK-LABEL: name: vnop_wv_vs1_incompatible_eew ; CHECK: %x:vr = PseudoVADD_VV_MF2 $noreg, $noreg, $noreg, -1, 4 /* e16 */, 0 /* tu, mu */ ; CHECK-NEXT: %y:vr = PseudoVNSRL_WV_MF2 $noreg, $noreg, %x, 1, 3 /* e8 */, 0 /* tu, mu */ + ; CHECK-NEXT: $v8 = COPY %y %x:vr = PseudoVADD_VV_MF2 $noreg, $noreg, $noreg, -1, 4 /* e16 */, 0 %y:vr = PseudoVNSRL_WV_MF2 $noreg, $noreg, %x, 1, 3 /* e8 */, 0 + $v8 = COPY %y ... --- name: vnop_wv_vs1_incompatible_emul @@ -647,8 +770,10 @@ body: | ; CHECK-LABEL: name: vnop_wv_vs1_incompatible_emul ; CHECK: %x:vr = PseudoVADD_VV_MF4 $noreg, $noreg, $noreg, -1, 3 /* e8 */, 0 /* tu, mu */ ; CHECK-NEXT: %y:vr = PseudoVNSRL_WV_MF2 $noreg, $noreg, %x, 1, 3 /* e8 */, 0 /* tu, mu */ + ; CHECK-NEXT: $v8 = COPY %y %x:vr = PseudoVADD_VV_MF4 $noreg, $noreg, $noreg, -1, 3 /* e8 */, 0 %y:vr = PseudoVNSRL_WV_MF2 $noreg, $noreg, %x, 1, 3 /* e8 */, 0 + $v8 = COPY %y ... --- name: vfnop_vs2 @@ -657,8 +782,10 @@ body: | ; CHECK-LABEL: name: vfnop_vs2 ; CHECK: %x:vr = PseudoVADD_VV_M1 $noreg, $noreg, $noreg, 1, 4 /* e16 */, 0 /* tu, mu */ ; CHECK-NEXT: early-clobber %y:vr = PseudoVFNCVT_X_F_W_MF2 $noreg, %x, 0, 1, 3 /* e8 */, 0 /* tu, mu */ + ; CHECK-NEXT: $v8 = COPY %y %x:vr = PseudoVADD_VV_M1 $noreg, $noreg, $noreg, -1, 4 /* e16 */, 0 early-clobber %y:vr = PseudoVFNCVT_X_F_W_MF2 $noreg, %x, 0, 1, 3 /* e8 */, 0 + $v8 = COPY %y ... --- name: vfnop_vs2_incompatible_eew @@ -667,8 +794,10 @@ body: | ; CHECK-LABEL: name: vfnop_vs2_incompatible_eew ; CHECK: %x:vr = PseudoVADD_VV_M1 $noreg, $noreg, $noreg, -1, 3 /* e8 */, 0 /* tu, mu */ ; CHECK-NEXT: early-clobber %y:vr = PseudoVFNCVT_X_F_W_MF2 $noreg, %x, 0, 1, 4 /* e16 */, 0 /* tu, mu */ + ; CHECK-NEXT: $v8 = COPY %y %x:vr = PseudoVADD_VV_M1 $noreg, $noreg, $noreg, -1, 3 /* e8 */, 0 early-clobber %y:vr = PseudoVFNCVT_X_F_W_MF2 $noreg, %x, 0, 1, 4 /* e16 */, 0 + $v8 = COPY %y ... --- name: vfnop_vs2_incompatible_emul @@ -677,8 +806,10 @@ body: | ; CHECK-LABEL: name: vfnop_vs2_incompatible_emul ; CHECK: %x:vr = PseudoVADD_VV_MF2 $noreg, $noreg, $noreg, -1, 4 /* e16 */, 0 /* tu, mu */ ; CHECK-NEXT: early-clobber %y:vr = PseudoVFNCVT_X_F_W_MF2 $noreg, %x, 0, 1, 3 /* e8 */, 0 /* tu, mu */ + ; CHECK-NEXT: $v8 = COPY %y %x:vr = PseudoVADD_VV_MF2 $noreg, $noreg, $noreg, -1, 4 /* e16 */, 0 early-clobber %y:vr = PseudoVFNCVT_X_F_W_MF2 $noreg, %x, 0, 1, 3 /* e8 */, 0 + $v8 = COPY %y ... --- name: vseN_v @@ -737,8 +868,10 @@ body: | ; CHECK-LABEL: name: vleN_v ; CHECK: %x:vr = PseudoVLE8_V_M1 $noreg, $noreg, 1, 3 /* e8 */, 0 /* tu, mu */ ; CHECK-NEXT: %y:vr = PseudoVADD_VV_M1 $noreg, %x, $noreg, 1, 3 /* e8 */, 0 /* tu, mu */ + ; CHECK-NEXT: $v8 = COPY %y %x:vr = PseudoVLE8_V_M1 $noreg, $noreg, -1, 3 /* e8 */, 0 %y:vr = PseudoVADD_VV_M1 $noreg, %x, $noreg, 1, 3 /* e8 */, 0 + $v8 = COPY %y ... --- name: vleN_v_incompatible_eew @@ -747,8 +880,10 @@ body: | ; CHECK-LABEL: name: vleN_v_incompatible_eew ; CHECK: %x:vr = PseudoVLE8_V_M1 $noreg, $noreg, -1, 3 /* e8 */, 0 /* tu, mu */ ; CHECK-NEXT: %y:vr = PseudoVADD_VV_M1 $noreg, %x, $noreg, 1, 4 /* e16 */, 0 /* tu, mu */ + ; CHECK-NEXT: $v8 = COPY %y %x:vr = PseudoVLE8_V_M1 $noreg, $noreg, -1, 3 /* e8 */, 0 %y:vr = PseudoVADD_VV_M1 $noreg, %x, $noreg, 1, 4 /* e16 */, 0 + $v8 = COPY %y ... --- name: vleN_v_incompatible_emul @@ -757,8 +892,10 @@ body: | ; CHECK-LABEL: name: vleN_v_incompatible_emul ; CHECK: %x:vr = PseudoVLE8_V_M1 $noreg, $noreg, -1, 3 /* e8 */, 0 /* tu, mu */ ; CHECK-NEXT: %y:vr = PseudoVADD_VV_MF2 $noreg, %x, $noreg, 1, 3 /* e8 */, 0 /* tu, mu */ + ; CHECK-NEXT: $v8 = COPY %y %x:vr = PseudoVLE8_V_M1 $noreg, $noreg, -1, 3 /* e8 */, 0 %y:vr = PseudoVADD_VV_MF2 $noreg, %x, $noreg, 1, 3 /* e8 */, 0 + $v8 = COPY %y ... --- name: vlm_v @@ -767,8 +904,10 @@ body: | ; CHECK-LABEL: name: vlm_v ; CHECK: %x:vr = PseudoVLM_V_B8 $noreg, $noreg, 1, 0 /* e8 */, 0 /* tu, mu */ ; CHECK-NEXT: %y:vr = PseudoVMAND_MM_B8 $noreg, %x, 1, 0 /* e8 */ + ; CHECK-NEXT: $v8 = COPY %y %x:vr = PseudoVLM_V_B8 $noreg, $noreg, -1, 0, 0 %y:vr = PseudoVMAND_MM_B8 $noreg, %x, 1, 0 + $v8 = COPY %y ... --- name: vlm_v_incompatible_eew @@ -777,8 +916,10 @@ body: | ; CHECK-LABEL: name: vlm_v_incompatible_eew ; CHECK: %x:vr = PseudoVLM_V_B8 $noreg, $noreg, -1, 0 /* e8 */, 0 /* tu, mu */ ; CHECK-NEXT: %y:vr = PseudoVADD_VV_M1 $noreg, $noreg, %x, 1, 4 /* e16 */, 0 /* tu, mu */ + ; CHECK-NEXT: $v8 = COPY %y %x:vr = PseudoVLM_V_B8 $noreg, $noreg, -1, 0, 0 %y:vr = PseudoVADD_VV_M1 $noreg, $noreg, %x, 1, 4 /* e16 */, 0 + $v8 = COPY %y ... --- name: vlm_v_incompatible_emul @@ -787,8 +928,10 @@ body: | ; CHECK-LABEL: name: vlm_v_incompatible_emul ; CHECK: %x:vr = PseudoVLM_V_B8 $noreg, $noreg, -1, 0 /* e8 */, 0 /* tu, mu */ ; CHECK-NEXT: %y:vr = PseudoVMAND_MM_B16 $noreg, %x, 1, 0 /* e8 */ + ; CHECK-NEXT: $v8 = COPY %y %x:vr = PseudoVLM_V_B8 $noreg, $noreg, -1, 0, 0 %y:vr = PseudoVMAND_MM_B16 $noreg, %x, 1, 0 + $v8 = COPY %y ... --- name: vsseN_v @@ -887,8 +1030,10 @@ body: | ; CHECK-LABEL: name: vluxeiN_v_data ; CHECK: %x:vr = PseudoVADD_VV_M1 $noreg, $noreg, $noreg, 1, 3 /* e8 */, 0 /* tu, mu */ ; CHECK-NEXT: %y:vr = PseudoVLUXEI8_V_M1_M1 $noreg, $noreg, %x, 1, 3 /* e8 */, 0 /* tu, mu */ + ; CHECK-NEXT: $v8 = COPY %y %x:vr = PseudoVADD_VV_M1 $noreg, $noreg, $noreg, -1, 3 /* e8 */, 0 %y:vr = PseudoVLUXEI8_V_M1_M1 $noreg, $noreg, %x, 1, 3 /* e8 */, 0 + $v8 = COPY %y ... --- name: vluxeiN_v_incompatible_eew @@ -897,8 +1042,10 @@ body: | ; CHECK-LABEL: name: vluxeiN_v_incompatible_eew ; CHECK: %x:vr = PseudoVADD_VV_M1 $noreg, $noreg, $noreg, -1, 4 /* e16 */, 0 /* tu, mu */ ; CHECK-NEXT: %y:vr = PseudoVLUXEI8_V_M1_M1 $noreg, $noreg, %x, 1, 3 /* e8 */, 0 /* tu, mu */ + ; CHECK-NEXT: $v8 = COPY %y %x:vr = PseudoVADD_VV_M1 $noreg, $noreg, $noreg, -1, 4 /* e16 */, 0 %y:vr = PseudoVLUXEI8_V_M1_M1 $noreg, $noreg, %x, 1, 3 /* e8 */, 0 + $v8 = COPY %y ... --- name: vluxeiN_v_data_incompatible_emul @@ -907,8 +1054,10 @@ body: | ; CHECK-LABEL: name: vluxeiN_v_data_incompatible_emul ; CHECK: %x:vr = PseudoVADD_VV_M1 $noreg, $noreg, $noreg, -1, 3 /* e8 */, 0 /* tu, mu */ ; CHECK-NEXT: %y:vr = PseudoVLUXEI8_V_MF2_MF2 $noreg, $noreg, %x, 1, 3 /* e8 */, 0 /* tu, mu */ + ; CHECK-NEXT: $v8 = COPY %y %x:vr = PseudoVADD_VV_M1 $noreg, $noreg, $noreg, -1, 3 /* e8 */, 0 %y:vr = PseudoVLUXEI8_V_MF2_MF2 $noreg, $noreg, %x, 1, 3 /* e8 */, 0 + $v8 = COPY %y ... --- name: vluxeiN_v_idx @@ -917,8 +1066,10 @@ body: | ; CHECK-LABEL: name: vluxeiN_v_idx ; CHECK: %x:vr = PseudoVADD_VV_MF2 $noreg, $noreg, $noreg, 1, 3 /* e8 */, 0 /* tu, mu */ ; CHECK-NEXT: early-clobber %y:vr = PseudoVLUXEI8_V_MF2_M1 $noreg, $noreg, %x, 1, 4 /* e16 */, 0 /* tu, mu */ + ; CHECK-NEXT: $v8 = COPY %y %x:vr = PseudoVADD_VV_MF2 $noreg, $noreg, $noreg, -1, 3 /* e8 */, 0 %y:vr = PseudoVLUXEI8_V_MF2_M1 $noreg, $noreg, %x, 1, 4 /* e16 */, 0 + $v8 = COPY %y ... --- name: vluxeiN_v_idx_incompatible_eew @@ -927,8 +1078,10 @@ body: | ; CHECK-LABEL: name: vluxeiN_v_idx_incompatible_eew ; CHECK: %x:vr = PseudoVADD_VV_M1 $noreg, $noreg, $noreg, -1, 4 /* e16 */, 0 /* tu, mu */ ; CHECK-NEXT: %y:vr = PseudoVLUXEI8_V_M1_M1 $noreg, $noreg, %x, 1, 3 /* e8 */, 0 /* tu, mu */ + ; CHECK-NEXT: $v8 = COPY %y %x:vr = PseudoVADD_VV_M1 $noreg, $noreg, $noreg, -1, 4 /* e16 */, 0 %y:vr = PseudoVLUXEI8_V_M1_M1 $noreg, $noreg, %x, 1, 3 /* e8 */, 0 + $v8 = COPY %y ... --- name: vluxeiN_v_idx_incompatible_emul @@ -937,8 +1090,10 @@ body: | ; CHECK-LABEL: name: vluxeiN_v_idx_incompatible_emul ; CHECK: %x:vr = PseudoVADD_VV_M1 $noreg, $noreg, $noreg, -1, 3 /* e8 */, 0 /* tu, mu */ ; CHECK-NEXT: %y:vr = PseudoVLUXEI8_V_MF2_MF2 $noreg, $noreg, %x, 1, 3 /* e8 */, 0 /* tu, mu */ + ; CHECK-NEXT: $v8 = COPY %y %x:vr = PseudoVADD_VV_M1 $noreg, $noreg, $noreg, -1, 3 /* e8 */, 0 %y:vr = PseudoVLUXEI8_V_MF2_MF2 $noreg, $noreg, %x, 1, 3 /* e8 */, 0 + $v8 = COPY %y ... --- name: vluxeiN_v_vd @@ -947,8 +1102,10 @@ body: | ; CHECK-LABEL: name: vluxeiN_v_vd ; CHECK: %x:vr = PseudoVLUXEI8_V_M1_M1 $noreg, $noreg, $noreg, 1, 3 /* e8 */, 0 /* tu, mu */ ; CHECK-NEXT: %y:vr = PseudoVADD_VV_M1 $noreg, %x, $noreg, 1, 3 /* e8 */, 0 /* tu, mu */ + ; CHECK-NEXT: $v8 = COPY %y %x:vr = PseudoVLUXEI8_V_M1_M1 $noreg, $noreg, $noreg, -1, 3 /* e8 */, 0 %y:vr = PseudoVADD_VV_M1 $noreg, %x, $noreg, 1, 3 /* e8 */, 0 + $v8 = COPY %y ... --- name: vluxeiN_v_vd_incompatible_eew @@ -957,8 +1114,10 @@ body: | ; CHECK-LABEL: name: vluxeiN_v_vd_incompatible_eew ; CHECK: %x:vr = PseudoVLUXEI8_V_M1_M1 $noreg, $noreg, $noreg, -1, 3 /* e8 */, 0 /* tu, mu */ ; CHECK-NEXT: %y:vr = PseudoVADD_VV_M1 $noreg, %x, $noreg, 1, 4 /* e16 */, 0 /* tu, mu */ + ; CHECK-NEXT: $v8 = COPY %y %x:vr = PseudoVLUXEI8_V_M1_M1 $noreg, $noreg, $noreg, -1, 3 /* e8 */, 0 %y:vr = PseudoVADD_VV_M1 $noreg, %x, $noreg, 1, 4 /* e16 */, 0 + $v8 = COPY %y ... --- name: vluxeiN_vd_incompatible_emul @@ -967,8 +1126,10 @@ body: | ; CHECK-LABEL: name: vluxeiN_vd_incompatible_emul ; CHECK: %x:vr = PseudoVLUXEI8_V_M1_M1 $noreg, $noreg, $noreg, -1, 3 /* e8 */, 0 /* tu, mu */ ; CHECK-NEXT: %y:vr = PseudoVADD_VV_MF2 $noreg, %x, $noreg, 1, 3 /* e8 */, 0 /* tu, mu */ + ; CHECK-NEXT: $v8 = COPY %y %x:vr = PseudoVLUXEI8_V_M1_M1 $noreg, $noreg, $noreg, -1, 3 /* e8 */, 0 %y:vr = PseudoVADD_VV_MF2 $noreg, %x, $noreg, 1, 3 /* e8 */, 0 + $v8 = COPY %y ... --- name: vmop_mm @@ -977,8 +1138,10 @@ body: | ; CHECK-LABEL: name: vmop_mm ; CHECK: %x:vr = PseudoVMAND_MM_B8 $noreg, $noreg, 1, 0 /* e8 */ ; CHECK-NEXT: %y:vr = PseudoVMAND_MM_B8 $noreg, %x, 1, 0 /* e8 */ + ; CHECK-NEXT: $v8 = COPY %y %x:vr = PseudoVMAND_MM_B8 $noreg, $noreg, -1, 0 %y:vr = PseudoVMAND_MM_B8 $noreg, %x, 1, 0 + $v8 = COPY %y ... --- name: vmop_mm_incompatible_eew @@ -987,8 +1150,10 @@ body: | ; CHECK-LABEL: name: vmop_mm_incompatible_eew ; CHECK: %x:vr = PseudoVMAND_MM_B8 $noreg, $noreg, -1, 0 /* e8 */ ; CHECK-NEXT: %y:vr = PseudoVADD_VV_M1 $noreg, $noreg, %x, 1, 3 /* e8 */, 0 /* tu, mu */ + ; CHECK-NEXT: $v8 = COPY %y %x:vr = PseudoVMAND_MM_B8 $noreg, $noreg, -1, 0 %y:vr = PseudoVADD_VV_M1 $noreg, $noreg, %x, 1, 3 /* e8 */, 0 + $v8 = COPY %y ... --- name: vmop_mm_incompatible_emul @@ -997,8 +1162,10 @@ body: | ; CHECK-LABEL: name: vmop_mm_incompatible_emul ; CHECK: %x:vr = PseudoVMAND_MM_B8 $noreg, $noreg, -1, 0 /* e8 */ ; CHECK-NEXT: %y:vr = PseudoVMAND_MM_B16 $noreg, %x, 1, 0 /* e8 */ + ; CHECK-NEXT: $v8 = COPY %y %x:vr = PseudoVMAND_MM_B8 $noreg, $noreg, -1, 0 %y:vr = PseudoVMAND_MM_B16 $noreg, %x, 1, 0 + $v8 = COPY %y ... --- name: vmop_mm_mask @@ -1007,8 +1174,10 @@ body: | ; CHECK-LABEL: name: vmop_mm_mask ; CHECK: %x:vmv0 = PseudoVMAND_MM_B8 $noreg, $noreg, 1, 0 /* e8 */ ; CHECK-NEXT: %y:vrnov0 = PseudoVADD_VV_M1_MASK $noreg, $noreg, $noreg, %x, 1, 3 /* e8 */, 0 /* tu, mu */ + ; CHECK-NEXT: $v8 = COPY %y %x:vmv0 = PseudoVMAND_MM_B8 $noreg, $noreg, -1, 0 %y:vrnov0 = PseudoVADD_VV_M1_MASK $noreg, $noreg, $noreg, %x, 1, 3 /* e8 */, 0 + $v8 = COPY %y ... --- name: vmop_mm_mask_larger_emul_user @@ -1017,8 +1186,10 @@ body: | ; CHECK-LABEL: name: vmop_mm_mask_larger_emul_user ; CHECK: %x:vmv0 = PseudoVMAND_MM_B8 $noreg, $noreg, 1, 0 /* e8 */ ; CHECK-NEXT: %y:vrm2nov0 = PseudoVADD_VV_M2_MASK $noreg, $noreg, $noreg, %x, 1, 4 /* e16 */, 0 /* tu, mu */ + ; CHECK-NEXT: $v8m2 = COPY %y %x:vmv0 = PseudoVMAND_MM_B8 $noreg, $noreg, -1, 0 %y:vrm2nov0 = PseudoVADD_VV_M2_MASK $noreg, $noreg, $noreg, %x, 1, 4 /* e16 */, 0 + $v8m2 = COPY %y ... --- name: vmop_mm_mask_incompatible_emul @@ -1027,8 +1198,10 @@ body: | ; CHECK-LABEL: name: vmop_mm_mask_incompatible_emul ; CHECK: %x:vmv0 = PseudoVMAND_MM_B8 $noreg, $noreg, -1, 0 /* e8 */ ; CHECK-NEXT: %y:vrnov0 = PseudoVADD_VV_MF2_MASK $noreg, $noreg, $noreg, %x, 1, 3 /* e8 */, 0 /* tu, mu */ + ; CHECK-NEXT: $v8 = COPY %y %x:vmv0 = PseudoVMAND_MM_B8 $noreg, $noreg, -1, 0 %y:vrnov0 = PseudoVADD_VV_MF2_MASK $noreg, $noreg, $noreg, %x, 1, 3 /* e8 */, 0 + $v8 = COPY %y ... --- name: vmop_vv @@ -1037,8 +1210,10 @@ body: | ; CHECK-LABEL: name: vmop_vv ; CHECK: %x:vr = PseudoVMSEQ_VV_M1 $noreg, $noreg, 1, 3 /* e8 */ ; CHECK-NEXT: %y:vr = PseudoVMAND_MM_B8 $noreg, %x, 1, 0 /* e8 */ + ; CHECK-NEXT: $v8 = COPY %y %x:vr = PseudoVMSEQ_VV_M1 $noreg, $noreg, -1, 3 /* e8 */ %y:vr = PseudoVMAND_MM_B8 $noreg, %x, 1, 0 + $v8 = COPY %y ... --- name: vmop_vv_maskuser @@ -1047,8 +1222,10 @@ body: | ; CHECK-LABEL: name: vmop_vv_maskuser ; CHECK: %x:vmv0 = PseudoVMSEQ_VV_M1 $noreg, $noreg, 1, 3 /* e8 */ ; CHECK-NEXT: %y:vrnov0 = PseudoVADD_VV_M1_MASK $noreg, $noreg, $noreg, %x, 1, 3 /* e8 */, 0 /* tu, mu */ + ; CHECK-NEXT: $v8 = COPY %y %x:vmv0 = PseudoVMSEQ_VV_M1 $noreg, $noreg, -1, 3 /* e8 */ %y:vrnov0 = PseudoVADD_VV_M1_MASK $noreg, $noreg, $noreg, %x, 1, 3 /* e8 */, 0 + $v8 = COPY %y ... --- name: vmop_vv_maskuser_incompatible_eew @@ -1057,8 +1234,10 @@ body: | ; CHECK-LABEL: name: vmop_vv_maskuser_incompatible_eew ; CHECK: %x:vmv0 = PseudoVMSEQ_VV_M1 $noreg, $noreg, -1, 3 /* e8 */ ; CHECK-NEXT: %y:vrnov0 = PseudoVADD_VV_M1_MASK $noreg, $noreg, $noreg, %x, 1, 4 /* e16 */, 0 /* tu, mu */ + ; CHECK-NEXT: $v8 = COPY %y %x:vmv0 = PseudoVMSEQ_VV_M1 $noreg, $noreg, -1, 3 /* e8 */ %y:vrnov0 = PseudoVADD_VV_M1_MASK $noreg, $noreg, $noreg, %x, 1, 4 /* e16 */, 0 + $v8 = COPY %y ... --- name: vmop_vv_incompatible_emul @@ -1067,8 +1246,10 @@ body: | ; CHECK-LABEL: name: vmop_vv_incompatible_emul ; CHECK: %x:vr = PseudoVMSEQ_VV_M1 $noreg, $noreg, -1, 3 /* e8 */ ; CHECK-NEXT: %y:vr = PseudoVMAND_MM_B16 $noreg, %x, 1, 0 /* e8 */ + ; CHECK-NEXT: $v8 = COPY %y %x:vr = PseudoVMSEQ_VV_M1 $noreg, $noreg, -1, 3 /* e8 */ %y:vr = PseudoVMAND_MM_B16 $noreg, %x, 1, 0 + $v8 = COPY %y ... --- name: vmop_vv_maskuser_incompaible_emul @@ -1077,8 +1258,10 @@ body: | ; CHECK-LABEL: name: vmop_vv_maskuser_incompaible_emul ; CHECK: %x:vmv0 = PseudoVMSEQ_VV_M1 $noreg, $noreg, -1, 3 /* e8 */ ; CHECK-NEXT: %y:vrnov0 = PseudoVADD_VV_MF2_MASK $noreg, $noreg, $noreg, %x, 1, 3 /* e8 */, 0 /* tu, mu */ + ; CHECK-NEXT: $v8 = COPY %y %x:vmv0 = PseudoVMSEQ_VV_M1 $noreg, $noreg, -1, 3 /* e8 */ %y:vrnov0 = PseudoVADD_VV_MF2_MASK $noreg, $noreg, $noreg, %x, 1, 3 /* e8 */, 0 + $v8 = COPY %y ... --- name: vmop_vv_maskuser_larger_emul @@ -1087,8 +1270,10 @@ body: | ; CHECK-LABEL: name: vmop_vv_maskuser_larger_emul ; CHECK: %x:vmv0 = PseudoVMSEQ_VV_M1 $noreg, $noreg, 1, 3 /* e8 */ ; CHECK-NEXT: %y:vrm2nov0 = PseudoVADD_VV_M2_MASK $noreg, $noreg, $noreg, %x, 1, 4 /* e16 */, 0 /* tu, mu */ + ; CHECK-NEXT: $v8m2 = COPY %y %x:vmv0 = PseudoVMSEQ_VV_M1 $noreg, $noreg, -1, 3 /* e8 */ %y:vrm2nov0 = PseudoVADD_VV_M2_MASK $noreg, $noreg, $noreg, %x, 1, 4 /* e16 */, 0 + $v8m2 = COPY %y ... --- name: vmop_vv_consumer_incompatible_eew @@ -1097,8 +1282,10 @@ body: | ; CHECK-LABEL: name: vmop_vv_consumer_incompatible_eew ; CHECK: %x:vr = PseudoVADD_VV_M1 $noreg, $noreg, $noreg, -1, 3 /* e8 */, 0 /* tu, mu */ ; CHECK-NEXT: %y:vr = PseudoVMSEQ_VV_M1 $noreg, %x, 1, 4 /* e16 */ + ; CHECK-NEXT: $v8 = COPY %y %x:vr = PseudoVADD_VV_M1 $noreg, $noreg, $noreg, -1, 3 /* e8 */, 0 %y:vr = PseudoVMSEQ_VV_M1 $noreg, %x, 1, 4 /* e16 */ + $v8 = COPY %y ... --- name: vmop_vv_consumer_incompatible_emul @@ -1107,8 +1294,10 @@ body: | ; CHECK-LABEL: name: vmop_vv_consumer_incompatible_emul ; CHECK: %x:vr = PseudoVADD_VV_M1 $noreg, $noreg, $noreg, -1, 3 /* e8 */, 0 /* tu, mu */ ; CHECK-NEXT: %y:vr = PseudoVMSEQ_VV_MF2 $noreg, %x, 1, 3 /* e8 */ + ; CHECK-NEXT: $v8 = COPY %y %x:vr = PseudoVADD_VV_M1 $noreg, $noreg, $noreg, -1, 3 /* e8 */, 0 %y:vr = PseudoVMSEQ_VV_MF2 $noreg, %x, 1, 3 /* e8 */ + $v8 = COPY %y ... --- name: vmop_vv_passthru_use @@ -1118,9 +1307,11 @@ body: | ; CHECK: %x:vrnov0 = PseudoVMAND_MM_B8 $noreg, $noreg, 1, 0 /* e8 */ ; CHECK-NEXT: %y:vrnov0 = PseudoVMSEQ_VV_M1_MASK %x, $noreg, $noreg, $noreg, 1, 3 /* e8 */, 1 /* ta, mu */ ; CHECK-NEXT: %z:vr = PseudoVMAND_MM_B8 %y, $noreg, 1, 0 /* e8 */ + ; CHECK-NEXT: $v8 = COPY %z %x:vrnov0 = PseudoVMAND_MM_B8 $noreg, $noreg, -1, 0 /* e1 */ %y:vrnov0 = PseudoVMSEQ_VV_M1_MASK %x, $noreg, $noreg, $noreg, 1, 3 /* e8 */, 1 %z:vr = PseudoVMAND_MM_B8 %y, $noreg, 1, 0 /* e1 */ + $v8 = COPY %z ... --- name: vmop_vv_passthru_use_incompatible_eew @@ -1130,9 +1321,11 @@ body: | ; CHECK: %x:vrnov0 = PseudoVADD_VV_M1 $noreg, $noreg, $noreg, -1, 3 /* e8 */, 0 /* tu, mu */ ; CHECK-NEXT: %y:vrnov0 = PseudoVMSEQ_VV_M1_MASK %x, $noreg, $noreg, $noreg, 1, 3 /* e8 */, 1 /* ta, mu */ ; CHECK-NEXT: %z:vr = PseudoVMAND_MM_B8 %y, $noreg, 1, 0 /* e8 */ + ; CHECK-NEXT: $v8 = COPY %z %x:vrnov0 = PseudoVADD_VV_M1 $noreg, $noreg, $noreg, -1, 3 /* e8 */, 0 %y:vrnov0 = PseudoVMSEQ_VV_M1_MASK %x, $noreg, $noreg, $noreg, 1, 3 /* e8 */, 1 %z:vr = PseudoVMAND_MM_B8 %y, $noreg, 1, 0 /* e1 */ + $v8 = COPY %z ... --- name: vmop_vv_passthru_use_incompatible_emul @@ -1142,9 +1335,11 @@ body: | ; CHECK: %x:vrnov0 = PseudoVMAND_MM_B16 $noreg, $noreg, -1, 0 /* e8 */ ; CHECK-NEXT: %y:vrnov0 = PseudoVMSEQ_VV_M1_MASK %x, $noreg, $noreg, $noreg, 1, 3 /* e8 */, 1 /* ta, mu */ ; CHECK-NEXT: %z:vr = PseudoVMAND_MM_B8 %y, $noreg, 1, 0 /* e8 */ + ; CHECK-NEXT: $v8 = COPY %z %x:vrnov0 = PseudoVMAND_MM_B16 $noreg, $noreg, -1, 0 /* e1 */ %y:vrnov0 = PseudoVMSEQ_VV_M1_MASK %x, $noreg, $noreg, $noreg, 1, 3 /* e8 */, 1 %z:vr = PseudoVMAND_MM_B8 %y, $noreg, 1, 0 /* e1 */ + $v8 = COPY %z ... --- name: vmerge_vim @@ -1153,8 +1348,10 @@ body: | ; CHECK-LABEL: name: vmerge_vim ; CHECK: %x:vr = PseudoVADD_VV_M1 $noreg, $noreg, $noreg, 1, 3 /* e8 */, 0 /* tu, mu */ ; CHECK-NEXT: %y:vrnov0 = PseudoVMERGE_VIM_M1 $noreg, %x, 9, $v0, 1, 3 /* e8 */ + ; CHECK-NEXT: $v8 = COPY %y %x:vr = PseudoVADD_VV_M1 $noreg, $noreg, $noreg, -1, 3 /* e8 */, 0 %y:vrnov0 = PseudoVMERGE_VIM_M1 $noreg, %x, 9, $v0, 1, 3 /* e8 */ + $v8 = COPY %y ... --- name: vmerge_vim_incompatible_eew @@ -1163,8 +1360,10 @@ body: | ; CHECK-LABEL: name: vmerge_vim_incompatible_eew ; CHECK: %x:vr = PseudoVADD_VV_M1 $noreg, $noreg, $noreg, -1, 4 /* e16 */, 0 /* tu, mu */ ; CHECK-NEXT: %y:vrnov0 = PseudoVMERGE_VIM_M1 $noreg, %x, 9, $v0, 1, 3 /* e8 */ + ; CHECK-NEXT: $v8 = COPY %y %x:vr = PseudoVADD_VV_M1 $noreg, $noreg, $noreg, -1, 4 /* e16 */, 0 %y:vrnov0 = PseudoVMERGE_VIM_M1 $noreg, %x, 9, $v0, 1, 3 /* e8 */ + $v8 = COPY %y ... --- name: vmerge_vim_incompatible_emul @@ -1173,8 +1372,10 @@ body: | ; CHECK-LABEL: name: vmerge_vim_incompatible_emul ; CHECK: %x:vr = PseudoVADD_VV_M1 $noreg, $noreg, $noreg, -1, 3 /* e8 */, 0 /* tu, mu */ ; CHECK-NEXT: %y:vrnov0 = PseudoVMERGE_VIM_MF2 $noreg, %x, 9, $v0, 1, 3 /* e8 */ + ; CHECK-NEXT: $v8 = COPY %y %x:vr = PseudoVADD_VV_M1 $noreg, $noreg, $noreg, -1, 3 /* e8 */, 0 %y:vrnov0 = PseudoVMERGE_VIM_MF2 $noreg, %x, 9, $v0, 1, 3 /* e8 */ + $v8 = COPY %y ... --- name: vmerge_vxm @@ -1183,8 +1384,10 @@ body: | ; CHECK-LABEL: name: vmerge_vxm ; CHECK: %x:vr = PseudoVADD_VV_M1 $noreg, $noreg, $noreg, 1, 3 /* e8 */, 0 /* tu, mu */ ; CHECK-NEXT: %y:vrnov0 = PseudoVMERGE_VXM_M1 $noreg, %x, $noreg, $v0, 1, 3 /* e8 */ + ; CHECK-NEXT: $v8 = COPY %y %x:vr = PseudoVADD_VV_M1 $noreg, $noreg, $noreg, -1, 3 /* e8 */, 0 %y:vrnov0 = PseudoVMERGE_VXM_M1 $noreg, %x, $noreg, $v0, 1, 3 /* e8 */ + $v8 = COPY %y ... --- name: vmerge_vxm_incompatible_eew @@ -1193,8 +1396,10 @@ body: | ; CHECK-LABEL: name: vmerge_vxm_incompatible_eew ; CHECK: %x:vr = PseudoVADD_VV_M1 $noreg, $noreg, $noreg, -1, 4 /* e16 */, 0 /* tu, mu */ ; CHECK-NEXT: %y:vrnov0 = PseudoVMERGE_VXM_M1 $noreg, %x, $noreg, $v0, 1, 3 /* e8 */ + ; CHECK-NEXT: $v8 = COPY %y %x:vr = PseudoVADD_VV_M1 $noreg, $noreg, $noreg, -1, 4 /* e16 */, 0 %y:vrnov0 = PseudoVMERGE_VXM_M1 $noreg, %x, $noreg, $v0, 1, 3 /* e8 */ + $v8 = COPY %y ... --- name: vmerge_vxm_incompatible_emul @@ -1203,8 +1408,10 @@ body: | ; CHECK-LABEL: name: vmerge_vxm_incompatible_emul ; CHECK: %x:vr = PseudoVADD_VV_M1 $noreg, $noreg, $noreg, -1, 3 /* e8 */, 0 /* tu, mu */ ; CHECK-NEXT: %y:vrnov0 = PseudoVMERGE_VXM_MF2 $noreg, %x, $noreg, $v0, 1, 3 /* e8 */ + ; CHECK-NEXT: $v8 = COPY %y %x:vr = PseudoVADD_VV_M1 $noreg, $noreg, $noreg, -1, 3 /* e8 */, 0 %y:vrnov0 = PseudoVMERGE_VXM_MF2 $noreg, %x, $noreg, $v0, 1, 3 /* e8 */ + $v8 = COPY %y ... --- name: vmerge_vvm @@ -1213,8 +1420,10 @@ body: | ; CHECK-LABEL: name: vmerge_vvm ; CHECK: %x:vr = PseudoVADD_VV_M1 $noreg, $noreg, $noreg, 1, 3 /* e8 */, 0 /* tu, mu */ ; CHECK-NEXT: %y:vrnov0 = PseudoVMERGE_VVM_M1 $noreg, $noreg, %x, $v0, 1, 3 /* e8 */ + ; CHECK-NEXT: $v8 = COPY %y %x:vr = PseudoVADD_VV_M1 $noreg, $noreg, $noreg, -1, 3 /* e8 */, 0 %y:vrnov0 = PseudoVMERGE_VVM_M1 $noreg, $noreg, %x, $v0, 1, 3 /* e8 */ + $v8 = COPY %y ... --- name: vmerge_vvm_incompatible_eew @@ -1223,8 +1432,10 @@ body: | ; CHECK-LABEL: name: vmerge_vvm_incompatible_eew ; CHECK: %x:vr = PseudoVADD_VV_M1 $noreg, $noreg, $noreg, -1, 4 /* e16 */, 0 /* tu, mu */ ; CHECK-NEXT: %y:vrnov0 = PseudoVMERGE_VVM_M1 $noreg, $noreg, %x, $v0, 1, 3 /* e8 */ + ; CHECK-NEXT: $v8 = COPY %y %x:vr = PseudoVADD_VV_M1 $noreg, $noreg, $noreg, -1, 4 /* e16 */, 0 %y:vrnov0 = PseudoVMERGE_VVM_M1 $noreg, $noreg, %x, $v0, 1, 3 /* e8 */ + $v8 = COPY %y ... --- name: vmerge_vvm_incompatible_emul @@ -1233,8 +1444,10 @@ body: | ; CHECK-LABEL: name: vmerge_vvm_incompatible_emul ; CHECK: %x:vr = PseudoVADD_VV_M1 $noreg, $noreg, $noreg, -1, 3 /* e8 */, 0 /* tu, mu */ ; CHECK-NEXT: %y:vrnov0 = PseudoVMERGE_VVM_MF2 $noreg, $noreg, %x, $v0, 1, 3 /* e8 */ + ; CHECK-NEXT: $v8 = COPY %y %x:vr = PseudoVADD_VV_M1 $noreg, $noreg, $noreg, -1, 3 /* e8 */, 0 %y:vrnov0 = PseudoVMERGE_VVM_MF2 $noreg, $noreg, %x, $v0, 1, 3 /* e8 */ + $v8 = COPY %y ... --- name: vmv_v_i @@ -1243,8 +1456,10 @@ body: | ; CHECK-LABEL: name: vmv_v_i ; CHECK: %x:vr = PseudoVADD_VV_M1 $noreg, $noreg, $noreg, -1, 3 /* e8 */, 0 /* tu, mu */ ; CHECK-NEXT: %y:vr = PseudoVMV_V_I_M1 %x, 9, 1, 3 /* e8 */, 0 /* tu, mu */ + ; CHECK-NEXT: $v8 = COPY %y %x:vr = PseudoVADD_VV_M1 $noreg, $noreg, $noreg, -1, 3 /* e8 */, 0 %y:vr = PseudoVMV_V_I_M1 %x, 9, 1, 3 /* e8 */, 0 + $v8 = COPY %y ... --- name: vmv_v_i_incompatible_eew @@ -1253,8 +1468,10 @@ body: | ; CHECK-LABEL: name: vmv_v_i_incompatible_eew ; CHECK: %x:vr = PseudoVADD_VV_M1 $noreg, $noreg, $noreg, -1, 4 /* e16 */, 0 /* tu, mu */ ; CHECK-NEXT: %y:vr = PseudoVMV_V_I_M1 %x, 9, 1, 3 /* e8 */, 0 /* tu, mu */ + ; CHECK-NEXT: $v8 = COPY %y %x:vr = PseudoVADD_VV_M1 $noreg, $noreg, $noreg, -1, 4 /* e16 */, 0 %y:vr = PseudoVMV_V_I_M1 %x, 9, 1, 3 /* e8 */, 0 + $v8 = COPY %y ... --- name: vmv_v_i_incompatible_emul @@ -1263,8 +1480,10 @@ body: | ; CHECK-LABEL: name: vmv_v_i_incompatible_emul ; CHECK: %x:vr = PseudoVADD_VV_M1 $noreg, $noreg, $noreg, -1, 3 /* e8 */, 0 /* tu, mu */ ; CHECK-NEXT: %y:vr = PseudoVMV_V_I_MF2 %x, 9, 1, 3 /* e8 */, 0 /* tu, mu */ + ; CHECK-NEXT: $v8 = COPY %y %x:vr = PseudoVADD_VV_M1 $noreg, $noreg, $noreg, -1, 3 /* e8 */, 0 %y:vr = PseudoVMV_V_I_MF2 %x, 9, 1, 3 /* e8 */, 0 + $v8 = COPY %y ... --- name: vmv_v_x @@ -1273,8 +1492,10 @@ body: | ; CHECK-LABEL: name: vmv_v_x ; CHECK: %x:vr = PseudoVADD_VV_M1 $noreg, $noreg, $noreg, -1, 3 /* e8 */, 0 /* tu, mu */ ; CHECK-NEXT: %y:vr = PseudoVMV_V_X_M1 %x, $noreg, 1, 3 /* e8 */, 0 /* tu, mu */ + ; CHECK-NEXT: $v8 = COPY %y %x:vr = PseudoVADD_VV_M1 $noreg, $noreg, $noreg, -1, 3 /* e8 */, 0 %y:vr = PseudoVMV_V_X_M1 %x, $noreg, 1, 3 /* e8 */, 0 + $v8 = COPY %y ... --- name: vmv_v_x_incompatible_eew @@ -1283,8 +1504,10 @@ body: | ; CHECK-LABEL: name: vmv_v_x_incompatible_eew ; CHECK: %x:vr = PseudoVADD_VV_M1 $noreg, $noreg, $noreg, -1, 4 /* e16 */, 0 /* tu, mu */ ; CHECK-NEXT: %y:vr = PseudoVMV_V_X_M1 %x, $noreg, 1, 3 /* e8 */, 0 /* tu, mu */ + ; CHECK-NEXT: $v8 = COPY %y %x:vr = PseudoVADD_VV_M1 $noreg, $noreg, $noreg, -1, 4 /* e16 */, 0 %y:vr = PseudoVMV_V_X_M1 %x, $noreg, 1, 3 /* e8 */, 0 + $v8 = COPY %y ... --- name: vmv_v_x_incompatible_emul @@ -1293,8 +1516,10 @@ body: | ; CHECK-LABEL: name: vmv_v_x_incompatible_emul ; CHECK: %x:vr = PseudoVADD_VV_M1 $noreg, $noreg, $noreg, -1, 3 /* e8 */, 0 /* tu, mu */ ; CHECK-NEXT: %y:vr = PseudoVMV_V_X_MF2 %x, $noreg, 1, 3 /* e8 */, 0 /* tu, mu */ + ; CHECK-NEXT: $v8 = COPY %y %x:vr = PseudoVADD_VV_M1 $noreg, $noreg, $noreg, -1, 3 /* e8 */, 0 %y:vr = PseudoVMV_V_X_MF2 %x, $noreg, 1, 3 /* e8 */, 0 + $v8 = COPY %y ... --- name: vmv_v_v @@ -1303,8 +1528,10 @@ body: | ; CHECK-LABEL: name: vmv_v_v ; CHECK: %x:vr = PseudoVADD_VV_M1 $noreg, $noreg, $noreg, 1, 3 /* e8 */, 0 /* tu, mu */ ; CHECK-NEXT: %y:vr = PseudoVMV_V_V_M1 $noreg, %x, 1, 3 /* e8 */, 0 /* tu, mu */ + ; CHECK-NEXT: $v8 = COPY %y %x:vr = PseudoVADD_VV_M1 $noreg, $noreg, $noreg, -1, 3 /* e8 */, 0 %y:vr = PseudoVMV_V_V_M1 $noreg, %x, 1, 3 /* e8 */, 0 + $v8 = COPY %y ... --- name: vmv_v_v_incompatible_eew @@ -1313,8 +1540,10 @@ body: | ; CHECK-LABEL: name: vmv_v_v_incompatible_eew ; CHECK: %x:vr = PseudoVADD_VV_M1 $noreg, $noreg, $noreg, -1, 4 /* e16 */, 0 /* tu, mu */ ; CHECK-NEXT: %y:vr = PseudoVMV_V_V_M1 $noreg, %x, 1, 3 /* e8 */, 0 /* tu, mu */ + ; CHECK-NEXT: $v8 = COPY %y %x:vr = PseudoVADD_VV_M1 $noreg, $noreg, $noreg, -1, 4 /* e16 */, 0 %y:vr = PseudoVMV_V_V_M1 $noreg, %x, 1, 3 /* e8 */, 0 + $v8 = COPY %y ... --- name: vmv_v_v_incompatible_emul @@ -1323,8 +1552,10 @@ body: | ; CHECK-LABEL: name: vmv_v_v_incompatible_emul ; CHECK: %x:vr = PseudoVADD_VV_M1 $noreg, $noreg, $noreg, -1, 3 /* e8 */, 0 /* tu, mu */ ; CHECK-NEXT: %y:vr = PseudoVMV_V_V_MF2 $noreg, %x, 1, 3 /* e8 */, 0 /* tu, mu */ + ; CHECK-NEXT: $v8 = COPY %y %x:vr = PseudoVADD_VV_M1 $noreg, $noreg, $noreg, -1, 3 /* e8 */, 0 %y:vr = PseudoVMV_V_V_MF2 $noreg, %x, 1, 3 /* e8 */, 0 + $v8 = COPY %y ... --- name: viota_m_dest @@ -1333,8 +1564,10 @@ body: | ; CHECK-LABEL: name: viota_m_dest ; CHECK: early-clobber %x:vr = PseudoVIOTA_M_M1 $noreg, $noreg, 1, 3 /* e8 */, 0 /* tu, mu */ ; CHECK-NEXT: %y:vr = PseudoVADD_VV_M1 $noreg, %x, $noreg, 1, 3 /* e8 */, 0 /* tu, mu */ + ; CHECK-NEXT: $v8 = COPY %y %x:vr = PseudoVIOTA_M_M1 $noreg, $noreg, -1, 3 /* e8 */, 0 %y:vr = PseudoVADD_VV_M1 $noreg, %x, $noreg, 1, 3 /* e8 */, 0 + $v8 = COPY %y ... --- name: viota_m_dest_incompatible_eew @@ -1343,8 +1576,10 @@ body: | ; CHECK-LABEL: name: viota_m_dest_incompatible_eew ; CHECK: early-clobber %x:vr = PseudoVIOTA_M_M1 $noreg, $noreg, -1, 3 /* e8 */, 0 /* tu, mu */ ; CHECK-NEXT: %y:vr = PseudoVADD_VV_M1 $noreg, %x, $noreg, 1, 4 /* e16 */, 0 /* tu, mu */ + ; CHECK-NEXT: $v8 = COPY %y %x:vr = PseudoVIOTA_M_M1 $noreg, $noreg, -1, 3 /* e8 */, 0 %y:vr = PseudoVADD_VV_M1 $noreg, %x, $noreg, 1, 4 /* e16 */, 0 + $v8 = COPY %y ... --- name: viota_m_dest_incompatible_emul @@ -1353,8 +1588,10 @@ body: | ; CHECK-LABEL: name: viota_m_dest_incompatible_emul ; CHECK: early-clobber %x:vr = PseudoVIOTA_M_M1 $noreg, $noreg, -1, 3 /* e8 */, 0 /* tu, mu */ ; CHECK-NEXT: %y:vr = PseudoVADD_VV_MF2 $noreg, %x, $noreg, 1, 3 /* e8 */, 0 /* tu, mu */ + ; CHECK-NEXT: $v8 = COPY %y %x:vr = PseudoVIOTA_M_M1 $noreg, $noreg, -1, 3 /* e8 */, 0 %y:vr = PseudoVADD_VV_MF2 $noreg, %x, $noreg, 1, 3 /* e8 */, 0 + $v8 = COPY %y ... --- name: viota_m_dest_passthru_use @@ -1364,9 +1601,11 @@ body: | ; CHECK: %x:vr = PseudoVADD_VV_M1 $noreg, $noreg, $noreg, 1, 3 /* e8 */, 0 /* tu, mu */ ; CHECK-NEXT: early-clobber %y:vr = PseudoVIOTA_M_M1 %x, $noreg, 1, 3 /* e8 */, 0 /* tu, mu */ ; CHECK-NEXT: %z:vr = PseudoVADD_VV_M1 $noreg, %y, $noreg, 1, 3 /* e8 */, 0 /* tu, mu */ + ; CHECK-NEXT: $v8 = COPY %z %x:vr = PseudoVADD_VV_M1 $noreg, $noreg, $noreg, -1, 3 /* e8 */, 0 %y:vr = PseudoVIOTA_M_M1 %x, $noreg, 1, 3 /* e8 */, 0 %z:vr = PseudoVADD_VV_M1 $noreg, %y, $noreg, 1, 3 /* e8 */, 0 + $v8 = COPY %z ... --- name: viota_m_dest_passthru_use_incompatible_eew @@ -1376,9 +1615,11 @@ body: | ; CHECK: %x:vr = PseudoVADD_VV_M1 $noreg, $noreg, $noreg, -1, 3 /* e8 */, 0 /* tu, mu */ ; CHECK-NEXT: early-clobber %y:vr = PseudoVIOTA_M_M1 %x, $noreg, 1, 4 /* e16 */, 0 /* tu, mu */ ; CHECK-NEXT: %z:vr = PseudoVADD_VV_M1 $noreg, %y, $noreg, 1, 4 /* e16 */, 0 /* tu, mu */ + ; CHECK-NEXT: $v8 = COPY %z %x:vr = PseudoVADD_VV_M1 $noreg, $noreg, $noreg, -1, 3 /* e8 */, 0 %y:vr = PseudoVIOTA_M_M1 %x, $noreg, 1, 4 /* e16 */, 0 %z:vr = PseudoVADD_VV_M1 $noreg, %y, $noreg, 1, 4 /* e16 */, 0 + $v8 = COPY %z ... --- name: viota_m_dest_passthru_use_incompatible_emul @@ -1388,9 +1629,11 @@ body: | ; CHECK: %x:vr = PseudoVADD_VV_M1 $noreg, $noreg, $noreg, -1, 3 /* e8 */, 0 /* tu, mu */ ; CHECK-NEXT: early-clobber %y:vr = PseudoVIOTA_M_MF2 %x, $noreg, 1, 3 /* e8 */, 0 /* tu, mu */ ; CHECK-NEXT: %z:vr = PseudoVADD_VV_MF2 $noreg, %y, $noreg, 1, 3 /* e8 */, 0 /* tu, mu */ + ; CHECK-NEXT: $v8 = COPY %z %x:vr = PseudoVADD_VV_M1 $noreg, $noreg, $noreg, -1, 3 /* e8 */, 0 %y:vr = PseudoVIOTA_M_MF2 %x, $noreg, 1, 3 /* e8 */, 0 %z:vr = PseudoVADD_VV_MF2 $noreg, %y, $noreg, 1, 3 /* e8 */, 0 + $v8 = COPY %z ... --- name: viota_m_mask @@ -1399,8 +1642,10 @@ body: | ; CHECK-LABEL: name: viota_m_mask ; CHECK: %x:vr = PseudoVMSEQ_VV_M1 $noreg, $noreg, 1, 3 /* e8 */ ; CHECK-NEXT: early-clobber %y:vr = PseudoVIOTA_M_M1 $noreg, %x, 1, 3 /* e8 */, 0 /* tu, mu */ + ; CHECK-NEXT: $v8 = COPY %y %x:vr = PseudoVMSEQ_VV_M1 $noreg, $noreg, -1, 3 /* e8 */ %y:vr = PseudoVIOTA_M_M1 $noreg, %x, 1, 3 /* e8 */, 0 + $v8 = COPY %y ... --- name: viota_m_mask_scale_mask @@ -1409,8 +1654,10 @@ body: | ; CHECK-LABEL: name: viota_m_mask_scale_mask ; CHECK: early-clobber %x:vr = PseudoVMSEQ_VV_M2 $noreg, $noreg, 1, 4 /* e16 */ ; CHECK-NEXT: early-clobber %y:vr = PseudoVIOTA_M_M1 $noreg, %x, 1, 3 /* e8 */, 0 /* tu, mu */ + ; CHECK-NEXT: $v8 = COPY %y %x:vr = PseudoVMSEQ_VV_M2 $noreg, $noreg, -1, 4 /* e16 */ %y:vr = PseudoVIOTA_M_M1 $noreg, %x, 1, 3 /* e8 */, 0 + $v8 = COPY %y ... --- name: viota_m_mask_incompatible_emul_from_sew @@ -1419,8 +1666,10 @@ body: | ; CHECK-LABEL: name: viota_m_mask_incompatible_emul_from_sew ; CHECK: %x:vr = PseudoVMAND_MM_B1 $noreg, $noreg, -1, 0 /* e8 */ ; CHECK-NEXT: early-clobber %y:vr = PseudoVIOTA_M_M1 $noreg, %x, 1, 4 /* e16 */, 0 /* tu, mu */ + ; CHECK-NEXT: $v8 = COPY %y %x:vr = PseudoVMAND_MM_B1 $noreg, $noreg, -1, 0 %y:vr = PseudoVIOTA_M_M1 $noreg, %x, 1, 4 /* e16 */, 0 + $v8 = COPY %y ... --- name: viota_m_mask_incompatible_emul_from_lmul @@ -1429,8 +1678,10 @@ body: | ; CHECK-LABEL: name: viota_m_mask_incompatible_emul_from_lmul ; CHECK: %x:vr = PseudoVMAND_MM_B1 $noreg, $noreg, -1, 0 /* e8 */ ; CHECK-NEXT: early-clobber %y:vr = PseudoVIOTA_M_MF2 $noreg, %x, 1, 3 /* e8 */, 0 /* tu, mu */ + ; CHECK-NEXT: $v8 = COPY %y %x:vr = PseudoVMAND_MM_B1 $noreg, $noreg, -1, 0 %y:vr = PseudoVIOTA_M_MF2 $noreg, %x, 1, 3 /* e8 */, 0 + $v8 = COPY %y ... --- name: vred_vs2 @@ -1439,8 +1690,10 @@ body: | ; CHECK-LABEL: name: vred_vs2 ; CHECK: %x:vr = PseudoVADD_VV_M1 $noreg, $noreg, $noreg, 1, 3 /* e8 */, 0 /* tu, mu */ ; CHECK-NEXT: %y:vr = PseudoVREDAND_VS_M1_E8 $noreg, %x, $noreg, 1, 3 /* e8 */, 0 /* tu, mu */ + ; CHECK-NEXT: $v8 = COPY %y %x:vr = PseudoVADD_VV_M1 $noreg, $noreg, $noreg, -1, 3 /* e8 */, 0 %y:vr = PseudoVREDAND_VS_M1_E8 $noreg, %x, $noreg, 1, 3 /* e8 */, 0 + $v8 = COPY %y ... --- name: vred_vs1 @@ -1449,8 +1702,10 @@ body: | ; CHECK-LABEL: name: vred_vs1 ; CHECK: %x:vr = PseudoVADD_VV_M1 $noreg, $noreg, $noreg, 1, 3 /* e8 */, 0 /* tu, mu */ ; CHECK-NEXT: %y:vr = PseudoVREDAND_VS_M1_E8 $noreg, $noreg, %x, 1, 3 /* e8 */, 0 /* tu, mu */ + ; CHECK-NEXT: $v8 = COPY %y %x:vr = PseudoVADD_VV_M1 $noreg, $noreg, $noreg, -1, 3 /* e8 */, 0 %y:vr = PseudoVREDAND_VS_M1_E8 $noreg, $noreg, %x, 1, 3 /* e8 */, 0 + $v8 = COPY %y ... --- name: vred_vs1_vs2 @@ -1459,8 +1714,10 @@ body: | ; CHECK-LABEL: name: vred_vs1_vs2 ; CHECK: %x:vr = PseudoVADD_VV_M1 $noreg, $noreg, $noreg, 1, 3 /* e8 */, 0 /* tu, mu */ ; CHECK-NEXT: %y:vr = PseudoVREDAND_VS_M1_E8 $noreg, %x, %x, 1, 3 /* e8 */, 0 /* tu, mu */ + ; CHECK-NEXT: $v8 = COPY %y %x:vr = PseudoVADD_VV_M1 $noreg, $noreg, $noreg, -1, 3 /* e8 */, 0 %y:vr = PseudoVREDAND_VS_M1_E8 $noreg, %x, %x, 1, 3 /* e8 */, 0 + $v8 = COPY %y ... --- name: vred_vs1_vs2_incompatible_eew @@ -1469,8 +1726,10 @@ body: | ; CHECK-LABEL: name: vred_vs1_vs2_incompatible_eew ; CHECK: %x:vr = PseudoVADD_VV_M1 $noreg, $noreg, $noreg, -1, 3 /* e8 */, 0 /* tu, mu */ ; CHECK-NEXT: %y:vr = PseudoVREDAND_VS_M1_E8 $noreg, %x, %x, 1, 4 /* e16 */, 0 /* tu, mu */ + ; CHECK-NEXT: $v8 = COPY %y %x:vr = PseudoVADD_VV_M1 $noreg, $noreg, $noreg, -1, 3 /* e8 */, 0 %y:vr = PseudoVREDAND_VS_M1_E8 $noreg, %x, %x, 1, 4 /* e16 */, 0 + $v8 = COPY %y ... --- name: vred_vs1_vs2_incompatible_emul @@ -1479,8 +1738,10 @@ body: | ; CHECK-LABEL: name: vred_vs1_vs2_incompatible_emul ; CHECK: %x:vr = PseudoVADD_VV_M1 $noreg, $noreg, $noreg, -1, 3 /* e8 */, 0 /* tu, mu */ ; CHECK-NEXT: %y:vr = PseudoVREDAND_VS_MF2_E8 $noreg, %x, %x, 1, 3 /* e8 */, 0 /* tu, mu */ + ; CHECK-NEXT: $v8 = COPY %y %x:vr = PseudoVADD_VV_M1 $noreg, $noreg, $noreg, -1, 3 /* e8 */, 0 %y:vr = PseudoVREDAND_VS_MF2_E8 $noreg, %x, %x, 1, 3 /* e8 */, 0 + $v8 = COPY %y ... --- name: vred_other_user_is_vl0 @@ -1490,9 +1751,13 @@ body: | ; CHECK: %x:vr = PseudoVADD_VV_M1 $noreg, $noreg, $noreg, 1, 3 /* e8 */, 0 /* tu, mu */ ; CHECK-NEXT: %y:vr = PseudoVREDSUM_VS_M1_E8 $noreg, $noreg, %x, 1, 3 /* e8 */, 0 /* tu, mu */ ; CHECK-NEXT: %z:vr = PseudoVADD_VV_M1 $noreg, %x, $noreg, 0, 3 /* e8 */, 0 /* tu, mu */ + ; CHECK-NEXT: $v8 = COPY %y + ; CHECK-NEXT: $v9 = COPY %z %x:vr = PseudoVADD_VV_M1 $noreg, $noreg, $noreg, -1, 3 /* e8 */, 0 %y:vr = PseudoVREDSUM_VS_M1_E8 $noreg, $noreg, %x, 1, 3 /* e8 */, 0 %z:vr = PseudoVADD_VV_M1 $noreg, %x, $noreg, 0, 3 /* e8 */, 0 + $v8 = COPY %y + $v9 = COPY %z ... --- name: vred_both_vl0 @@ -1502,9 +1767,13 @@ body: | ; CHECK: %x:vr = PseudoVADD_VV_M1 $noreg, $noreg, $noreg, 1, 3 /* e8 */, 0 /* tu, mu */ ; CHECK-NEXT: %y:vr = PseudoVREDSUM_VS_M1_E8 $noreg, $noreg, %x, 0, 3 /* e8 */, 0 /* tu, mu */ ; CHECK-NEXT: %z:vr = PseudoVADD_VV_M1 $noreg, %x, $noreg, 0, 3 /* e8 */, 0 /* tu, mu */ + ; CHECK-NEXT: $v8 = COPY %y + ; CHECK-NEXT: $v9 = COPY %z %x:vr = PseudoVADD_VV_M1 $noreg, $noreg, $noreg, -1, 3 /* e8 */, 0 %y:vr = PseudoVREDSUM_VS_M1_E8 $noreg, $noreg, %x, 0, 3 /* e8 */, 0 %z:vr = PseudoVADD_VV_M1 $noreg, %x, $noreg, 0, 3 /* e8 */, 0 + $v8 = COPY %y + $v9 = COPY %z ... --- name: vred_vl0_and_vlreg @@ -1515,10 +1784,14 @@ body: | ; CHECK-NEXT: %x:vr = PseudoVADD_VV_M1 $noreg, $noreg, $noreg, 1, 3 /* e8 */, 0 /* tu, mu */ ; CHECK-NEXT: %y:vr = PseudoVREDSUM_VS_M1_E8 $noreg, $noreg, %x, %vl, 3 /* e8 */, 0 /* tu, mu */ ; CHECK-NEXT: %z:vr = PseudoVADD_VV_M1 $noreg, %x, $noreg, 0, 3 /* e8 */, 0 /* tu, mu */ + ; CHECK-NEXT: $v8 = COPY %y + ; CHECK-NEXT: $v9 = COPY %z %vl:gprnox0 = COPY $x1 %x:vr = PseudoVADD_VV_M1 $noreg, $noreg, $noreg, -1, 3 /* e8 */, 0 %y:vr = PseudoVREDSUM_VS_M1_E8 $noreg, $noreg, %x, %vl, 3 /* e8 */, 0 %z:vr = PseudoVADD_VV_M1 $noreg, %x, $noreg, 0, 3 /* e8 */, 0 + $v8 = COPY %y + $v9 = COPY %z ... --- name: vred_vlreg_and_vl0 @@ -1529,10 +1802,14 @@ body: | ; CHECK-NEXT: %x:vr = PseudoVADD_VV_M1 $noreg, $noreg, $noreg, -1, 3 /* e8 */, 0 /* tu, mu */ ; CHECK-NEXT: %y:vr = PseudoVREDSUM_VS_M1_E8 $noreg, $noreg, %x, 0, 3 /* e8 */, 0 /* tu, mu */ ; CHECK-NEXT: %z:vr = PseudoVADD_VV_M1 $noreg, %x, $noreg, %vl, 3 /* e8 */, 0 /* tu, mu */ + ; CHECK-NEXT: $v8 = COPY %y + ; CHECK-NEXT: $v9 = COPY %z %vl:gprnox0 = COPY $x1 %x:vr = PseudoVADD_VV_M1 $noreg, $noreg, $noreg, -1, 3 /* e8 */, 0 %y:vr = PseudoVREDSUM_VS_M1_E8 $noreg, $noreg, %x, 0, 3 /* e8 */, 0 %z:vr = PseudoVADD_VV_M1 $noreg, %x, $noreg, %vl, 3 /* e8 */, 0 + $v8 = COPY %y + $v9 = COPY %z ... --- name: vred_other_user_is_vl2 @@ -1542,9 +1819,13 @@ body: | ; CHECK: %x:vr = PseudoVADD_VV_M1 $noreg, $noreg, $noreg, 2, 3 /* e8 */, 0 /* tu, mu */ ; CHECK-NEXT: %y:vr = PseudoVREDSUM_VS_M1_E8 $noreg, $noreg, %x, 1, 3 /* e8 */, 0 /* tu, mu */ ; CHECK-NEXT: %z:vr = PseudoVADD_VV_M1 $noreg, %x, $noreg, 2, 3 /* e8 */, 0 /* tu, mu */ + ; CHECK-NEXT: $v8 = COPY %y + ; CHECK-NEXT: $v9 = COPY %z %x:vr = PseudoVADD_VV_M1 $noreg, $noreg, $noreg, -1, 3 /* e8 */, 0 %y:vr = PseudoVREDSUM_VS_M1_E8 $noreg, $noreg, %x, 1, 3 /* e8 */, 0 %z:vr = PseudoVADD_VV_M1 $noreg, %x, $noreg, 2, 3 /* e8 */, 0 + $v8 = COPY %y + $v9 = COPY %z ... --- name: vwred_vs2 @@ -1553,8 +1834,10 @@ body: | ; CHECK-LABEL: name: vwred_vs2 ; CHECK: %x:vr = PseudoVADD_VV_M1 $noreg, $noreg, $noreg, 1, 4 /* e16 */, 0 /* tu, mu */ ; CHECK-NEXT: %y:vr = PseudoVWREDSUM_VS_M1_E8 $noreg, $noreg, %x, 1, 3 /* e8 */, 0 /* tu, mu */ + ; CHECK-NEXT: $v8 = COPY %y %x:vr = PseudoVADD_VV_M1 $noreg, $noreg, $noreg, -1, 4 /* e16 */, 0 %y:vr = PseudoVWREDSUM_VS_M1_E8 $noreg, $noreg, %x, 1, 3 /* e8 */, 0 + $v8 = COPY %y ... --- name: vwred_vs1 @@ -1563,8 +1846,10 @@ body: | ; CHECK-LABEL: name: vwred_vs1 ; CHECK: %x:vr = PseudoVADD_VV_M1 $noreg, $noreg, $noreg, 1, 3 /* e8 */, 0 /* tu, mu */ ; CHECK-NEXT: %y:vr = PseudoVWREDSUM_VS_M1_E8 $noreg, %x, $noreg, 1, 3 /* e8 */, 0 /* tu, mu */ + ; CHECK-NEXT: $v8 = COPY %y %x:vr = PseudoVADD_VV_M1 $noreg, $noreg, $noreg, -1, 3 /* e8 */, 0 %y:vr = PseudoVWREDSUM_VS_M1_E8 $noreg, %x, $noreg, 1, 3 /* e8 */, 0 + $v8 = COPY %y ... --- name: vwred_vs1_incompatible_eew @@ -1573,8 +1858,10 @@ body: | ; CHECK-LABEL: name: vwred_vs1_incompatible_eew ; CHECK: %x:vr = PseudoVADD_VV_M1 $noreg, $noreg, $noreg, -1, 4 /* e16 */, 0 /* tu, mu */ ; CHECK-NEXT: %y:vr = PseudoVWREDSUM_VS_M1_E8 $noreg, %x, $noreg, 1, 3 /* e8 */, 0 /* tu, mu */ + ; CHECK-NEXT: $v8 = COPY %y %x:vr = PseudoVADD_VV_M1 $noreg, $noreg, $noreg, -1, 4 /* e16 */, 0 %y:vr = PseudoVWREDSUM_VS_M1_E8 $noreg, %x, $noreg, 1, 3 /* e8 */, 0 + $v8 = COPY %y ... --- name: vwred_vs2_incompatible_eew @@ -1583,8 +1870,10 @@ body: | ; CHECK-LABEL: name: vwred_vs2_incompatible_eew ; CHECK: %x:vr = PseudoVADD_VV_M1 $noreg, $noreg, $noreg, -1, 3 /* e8 */, 0 /* tu, mu */ ; CHECK-NEXT: %y:vr = PseudoVWREDSUM_VS_M1_E8 $noreg, $noreg, %x, 1, 3 /* e8 */, 0 /* tu, mu */ + ; CHECK-NEXT: $v8 = COPY %y %x:vr = PseudoVADD_VV_M1 $noreg, $noreg, $noreg, -1, 3 /* e8 */, 0 %y:vr = PseudoVWREDSUM_VS_M1_E8 $noreg, $noreg, %x, 1, 3 /* e8 */, 0 + $v8 = COPY %y ... --- name: vwred_incompatible_emul @@ -1593,8 +1882,10 @@ body: | ; CHECK-LABEL: name: vwred_incompatible_emul ; CHECK: %x:vr = PseudoVADD_VV_M1 $noreg, $noreg, $noreg, -1, 3 /* e8 */, 0 /* tu, mu */ ; CHECK-NEXT: %y:vr = PseudoVWREDSUM_VS_MF2_E8 $noreg, %x, $noreg, 1, 3 /* e8 */, 0 /* tu, mu */ + ; CHECK-NEXT: $v8 = COPY %y %x:vr = PseudoVADD_VV_M1 $noreg, $noreg, $noreg, -1, 3 /* e8 */, 0 %y:vr = PseudoVWREDSUM_VS_MF2_E8 $noreg, %x, $noreg, 1, 3 /* e8 */, 0 + $v8 = COPY %y ... --- name: vfred_vs2 @@ -1603,8 +1894,10 @@ body: | ; CHECK-LABEL: name: vfred_vs2 ; CHECK: %x:vr = nofpexcept PseudoVFCVT_X_F_V_M1 $noreg, $noreg, 0, 1, 5 /* e32 */, 0 /* tu, mu */ ; CHECK-NEXT: %y:vr = PseudoVFREDMAX_VS_M1_E32 $noreg, %x, $noreg, 1, 5 /* e32 */, 0 /* tu, mu */ + ; CHECK-NEXT: $v8 = COPY %y %x:vr = nofpexcept PseudoVFCVT_X_F_V_M1 $noreg, $noreg, 0, -1, 5 /* e32 */, 0 %y:vr = PseudoVFREDMAX_VS_M1_E32 $noreg, %x, $noreg, 1, 5 /* e32 */, 0 + $v8 = COPY %y ... --- name: vfred_vs1 @@ -1613,8 +1906,10 @@ body: | ; CHECK-LABEL: name: vfred_vs1 ; CHECK: %x:vr = nofpexcept PseudoVFCVT_X_F_V_M1 $noreg, $noreg, 0, 1, 5 /* e32 */, 0 /* tu, mu */ ; CHECK-NEXT: %y:vr = PseudoVFREDMAX_VS_M1_E32 $noreg, $noreg, %x, 1, 5 /* e32 */, 0 /* tu, mu */ + ; CHECK-NEXT: $v8 = COPY %y %x:vr = nofpexcept PseudoVFCVT_X_F_V_M1 $noreg, $noreg, 0, -1, 5 /* e32 */, 0 %y:vr = PseudoVFREDMAX_VS_M1_E32 $noreg, $noreg, %x, 1, 5 /* e32 */, 0 + $v8 = COPY %y ... --- name: vfred_vs1_vs2 @@ -1623,8 +1918,10 @@ body: | ; CHECK-LABEL: name: vfred_vs1_vs2 ; CHECK: %x:vr = nofpexcept PseudoVFCVT_X_F_V_M1 $noreg, $noreg, 0, 1, 5 /* e32 */, 0 /* tu, mu */ ; CHECK-NEXT: %y:vr = PseudoVFREDMAX_VS_M1_E32 $noreg, %x, %x, 1, 5 /* e32 */, 0 /* tu, mu */ + ; CHECK-NEXT: $v8 = COPY %y %x:vr = nofpexcept PseudoVFCVT_X_F_V_M1 $noreg, $noreg, 0, -1, 5 /* e32 */, 0 %y:vr = PseudoVFREDMAX_VS_M1_E32 $noreg, %x, %x, 1, 5 /* e32 */, 0 + $v8 = COPY %y ... --- name: vfred_vs1_vs2_incompatible_eew @@ -1633,8 +1930,10 @@ body: | ; CHECK-LABEL: name: vfred_vs1_vs2_incompatible_eew ; CHECK: %x:vr = nofpexcept PseudoVFCVT_X_F_V_M1 $noreg, $noreg, 0, -1, 6 /* e64 */, 0 /* tu, mu */ ; CHECK-NEXT: %y:vr = PseudoVFREDMAX_VS_M1_E32 $noreg, %x, %x, 1, 5 /* e32 */, 0 /* tu, mu */ + ; CHECK-NEXT: $v8 = COPY %y %x:vr = nofpexcept PseudoVFCVT_X_F_V_M1 $noreg, $noreg, 0, -1, 6 /* e64 */, 0 %y:vr = PseudoVFREDMAX_VS_M1_E32 $noreg, %x, %x, 1, 5 /* e32 */, 0 + $v8 = COPY %y ... --- name: vfred_vs1_vs2_incompatible_emul @@ -1643,8 +1942,10 @@ body: | ; CHECK-LABEL: name: vfred_vs1_vs2_incompatible_emul ; CHECK: %x:vr = nofpexcept PseudoVFCVT_X_F_V_M1 $noreg, $noreg, 0, -1, 5 /* e32 */, 0 /* tu, mu */ ; CHECK-NEXT: %y:vr = PseudoVFREDMAX_VS_MF2_E32 $noreg, %x, %x, 1, 5 /* e32 */, 0 /* tu, mu */ + ; CHECK-NEXT: $v8 = COPY %y %x:vr = nofpexcept PseudoVFCVT_X_F_V_M1 $noreg, $noreg, 0, -1, 5 /* e32 */, 0 %y:vr = PseudoVFREDMAX_VS_MF2_E32 $noreg, %x, %x, 1, 5 /* e32 */, 0 + $v8 = COPY %y ... --- name: vwred_passthru_use @@ -1654,9 +1955,11 @@ body: | ; CHECK: %x:vr = PseudoVADD_VV_M1 $noreg, $noreg, $noreg, -1, 4 /* e16 */, 0 /* tu, mu */ ; CHECK-NEXT: %y:vr = PseudoVWREDSUM_VS_MF2_E8 %x, $noreg, $noreg, 1, 3 /* e8 */, 0 /* tu, mu */ ; CHECK-NEXT: %z:vr = PseudoVADD_VV_M1 $noreg, %y, $noreg, 1, 4 /* e16 */, 0 /* tu, mu */ + ; CHECK-NEXT: $v8 = COPY %z %x:vr = PseudoVADD_VV_M1 $noreg, $noreg, $noreg, -1, 4 /* e16 */, 0 %y:vr = PseudoVWREDSUM_VS_MF2_E8 %x, $noreg, $noreg, 1, 3 /* e8 */, 0 %z:vr = PseudoVADD_VV_M1 $noreg, %y, $noreg, 1, 4 /* e16 */, 0 + $v8 = COPY %z ... --- name: vwred_passthru_use_incompatible_eew @@ -1666,9 +1969,11 @@ body: | ; CHECK: %x:vr = PseudoVADD_VV_M1 $noreg, $noreg, $noreg, -1, 3 /* e8 */, 0 /* tu, mu */ ; CHECK-NEXT: %y:vr = PseudoVWREDSUM_VS_MF2_E8 %x, $noreg, $noreg, 1, 3 /* e8 */, 0 /* tu, mu */ ; CHECK-NEXT: %z:vr = PseudoVADD_VV_M1 $noreg, %y, $noreg, 1, 4 /* e16 */, 0 /* tu, mu */ + ; CHECK-NEXT: $v8 = COPY %z %x:vr = PseudoVADD_VV_M1 $noreg, $noreg, $noreg, -1, 3 /* e8 */, 0 %y:vr = PseudoVWREDSUM_VS_MF2_E8 %x, $noreg, $noreg, 1, 3 /* e8 */, 0 %z:vr = PseudoVADD_VV_M1 $noreg, %y, $noreg, 1, 4 /* e16 */, 0 + $v8 = COPY %z ... --- name: vwred_passthru_use_incompatible_emul @@ -1678,9 +1983,11 @@ body: | ; CHECK: %x:vr = PseudoVADD_VV_M1 $noreg, $noreg, $noreg, -1, 4 /* e16 */, 0 /* tu, mu */ ; CHECK-NEXT: %y:vr = PseudoVWREDSUM_VS_MF4_E8 %x, $noreg, $noreg, 1, 3 /* e8 */, 0 /* tu, mu */ ; CHECK-NEXT: %z:vr = PseudoVADD_VV_MF2 $noreg, %y, $noreg, 1, 4 /* e16 */, 0 /* tu, mu */ + ; CHECK-NEXT: $v8 = COPY %z %x:vr = PseudoVADD_VV_M1 $noreg, $noreg, $noreg, -1, 4 /* e16 */, 0 %y:vr = PseudoVWREDSUM_VS_MF4_E8 %x, $noreg, $noreg, 1, 3 /* e8 */, 0 %z:vr = PseudoVADD_VV_MF2 $noreg, %y, $noreg, 1, 4 /* e16 */, 0 + $v8 = COPY %z ... --- name: vfirst_v @@ -1749,8 +2056,10 @@ body: | ; CHECK-LABEL: name: vmclr_m ; CHECK: %x:vr = PseudoVMCLR_M_B8 1, 0 /* e8 */ ; CHECK-NEXT: %y:vr = PseudoVMAND_MM_B8 $noreg, %x, 1, 0 /* e8 */ + ; CHECK-NEXT: $v8 = COPY %y %x:vr = PseudoVMCLR_M_B8 -1, 0 %y:vr = PseudoVMAND_MM_B8 $noreg, %x, 1, 0 + $v8 = COPY %y ... --- name: vmclr_m_incompatible_eew @@ -1759,8 +2068,10 @@ body: | ; CHECK-LABEL: name: vmclr_m_incompatible_eew ; CHECK: %x:vr = PseudoVMCLR_M_B8 -1, 0 /* e8 */ ; CHECK-NEXT: %y:vr = PseudoVADD_VV_M1 $noreg, $noreg, %x, 1, 3 /* e8 */, 0 /* tu, mu */ + ; CHECK-NEXT: $v8 = COPY %y %x:vr = PseudoVMCLR_M_B8 -1, 0 %y:vr = PseudoVADD_VV_M1 $noreg, $noreg, %x, 1, 3 /* e8 */, 0 + $v8 = COPY %y ... --- name: vmclr_m_incompatible_emul @@ -1769,8 +2080,10 @@ body: | ; CHECK-LABEL: name: vmclr_m_incompatible_emul ; CHECK: %x:vr = PseudoVMCLR_M_B8 -1, 0 /* e8 */ ; CHECK-NEXT: %y:vr = PseudoVMAND_MM_B16 $noreg, %x, 1, 0 /* e8 */ + ; CHECK-NEXT: $v8 = COPY %y %x:vr = PseudoVMCLR_M_B8 -1, 0 %y:vr = PseudoVMAND_MM_B16 $noreg, %x, 1, 0 + $v8 = COPY %y ... --- name: vmset_m @@ -1779,8 +2092,10 @@ body: | ; CHECK-LABEL: name: vmset_m ; CHECK: %x:vr = PseudoVMSET_M_B8 1, 0 /* e8 */ ; CHECK-NEXT: %y:vr = PseudoVMAND_MM_B8 $noreg, %x, 1, 0 /* e8 */ + ; CHECK-NEXT: $v8 = COPY %y %x:vr = PseudoVMSET_M_B8 -1, 0 %y:vr = PseudoVMAND_MM_B8 $noreg, %x, 1, 0 + $v8 = COPY %y ... --- name: vmset_m_incompatible_eew @@ -1789,8 +2104,10 @@ body: | ; CHECK-LABEL: name: vmset_m_incompatible_eew ; CHECK: %x:vr = PseudoVMSET_M_B8 -1, 0 /* e8 */ ; CHECK-NEXT: %y:vr = PseudoVADD_VV_M1 $noreg, $noreg, %x, 1, 3 /* e8 */, 0 /* tu, mu */ + ; CHECK-NEXT: $v8 = COPY %y %x:vr = PseudoVMSET_M_B8 -1, 0 %y:vr = PseudoVADD_VV_M1 $noreg, $noreg, %x, 1, 3 /* e8 */, 0 + $v8 = COPY %y ... --- name: vmset_m_incompatible_emul @@ -1799,8 +2116,10 @@ body: | ; CHECK-LABEL: name: vmset_m_incompatible_emul ; CHECK: %x:vr = PseudoVMSET_M_B8 -1, 0 /* e8 */ ; CHECK-NEXT: %y:vr = PseudoVMAND_MM_B16 $noreg, %x, 1, 0 /* e8 */ + ; CHECK-NEXT: $v8 = COPY %y %x:vr = PseudoVMSET_M_B8 -1, 0 %y:vr = PseudoVMAND_MM_B16 $noreg, %x, 1, 0 + $v8 = COPY %y ... --- name: vrgatherei16_vv @@ -1811,6 +2130,7 @@ body: | ; CHECK-NEXT: %y:vr = PseudoVADD_VV_M1 $noreg, %x, $noreg, 1, 5 /* e32 */, 0 /* tu, mu */ %x:vr = PseudoVRGATHEREI16_VV_M1_E32_MF2 $noreg, $noreg, $noreg, -1, 5 /* e32 */, 0 %y:vr = PseudoVADD_VV_M1 $noreg, %x, $noreg, 1, 5 /* e32 */, 0 + $v8 = COPY %y ... --- name: vrgatherei16_vv_incompatible_data_eew @@ -1821,6 +2141,7 @@ body: | ; CHECK-NEXT: early-clobber %y:vr = PseudoVRGATHEREI16_VV_M1_E32_MF2 $noreg, %x, $noreg, 1, 5 /* e32 */, 0 /* tu, mu */ %x:vr = PseudoVADD_VV_M1 $noreg, $noreg, $noreg, -1, 4 /* e16 */, 0 %y:vr = PseudoVRGATHEREI16_VV_M1_E32_MF2 $noreg, %x, $noreg, 1, 5 /* e32 */, 0 + $v8 = COPY %y ... --- name: vrgatherei16_vv_incompatible_index_eew @@ -1831,6 +2152,7 @@ body: | ; CHECK-NEXT: early-clobber %y:vr = PseudoVRGATHEREI16_VV_M1_E32_MF2 $noreg, $noreg, %x, 1, 5 /* e32 */, 0 /* tu, mu */ %x:vr = PseudoVADD_VV_MF2 $noreg, $noreg, $noreg, -1, 4 /* e16 */, 0 %y:vr = PseudoVRGATHEREI16_VV_M1_E32_MF2 $noreg, $noreg, %x, 1, 5 /* e32 */, 0 + $v8 = COPY %y ... --- name: vrgatherei16_vv_incompatible_dest_emul @@ -1841,6 +2163,7 @@ body: | ; CHECK-NEXT: %y:vr = PseudoVADD_VV_MF2 $noreg, %x, $noreg, 1, 5 /* e32 */, 0 /* tu, mu */ %x:vr = PseudoVRGATHEREI16_VV_M1_E32_MF2 $noreg, $noreg, $noreg, -1, 5 /* e32 */, 0 %y:vr = PseudoVADD_VV_MF2 $noreg, %x, $noreg, 1, 5 /* e32 */, 0 + $v8 = COPY %y ... --- name: vrgatherei16_vv_incompatible_source_emul @@ -1851,6 +2174,7 @@ body: | ; CHECK-NEXT: early-clobber %y:vr = PseudoVRGATHEREI16_VV_M1_E32_MF2 $noreg, %x, $noreg, 1, 5 /* e32 */, 0 /* tu, mu */ %x:vr = PseudoVADD_VV_MF2 $noreg, $noreg, $noreg, -1, 5 /* e32 */, 0 %y:vr = PseudoVRGATHEREI16_VV_M1_E32_MF2 $noreg, %x, $noreg, 1, 5 /* e32 */, 0 + $v8 = COPY %y ... --- name: vrgatherei16_vv_incompatible_index_emul @@ -1861,3 +2185,4 @@ body: | ; CHECK-NEXT: early-clobber %y:vr = PseudoVRGATHEREI16_VV_M1_E32_MF2 $noreg, $noreg, %x, 1, 5 /* e32 */, 0 /* tu, mu */ %x:vr = PseudoVADD_VV_M1 $noreg, $noreg, $noreg, -1, 4 /* e16 */, 0 %y:vr = PseudoVRGATHEREI16_VV_M1_E32_MF2 $noreg, $noreg, %x, 1, 5 /* e32 */, 0 + $v8 = COPY %y diff --git a/llvm/test/CodeGen/RISCV/rvv/vl-opt.ll b/llvm/test/CodeGen/RISCV/rvv/vl-opt.ll index 823c2bb..cd282c2 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vl-opt.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vl-opt.ll @@ -1,50 +1,28 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 -; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs \ -; RUN: -riscv-enable-vl-optimizer=false | FileCheck %s -check-prefixes=CHECK,NOVLOPT -; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs \ -; RUN: -riscv-enable-vl-optimizer=false | FileCheck %s -check-prefixes=CHECK,NOVLOPT -; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v -riscv-enable-vl-optimizer \ -; RUN: -verify-machineinstrs | FileCheck %s -check-prefixes=CHECK,VLOPT -; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v -riscv-enable-vl-optimizer \ -; RUN: -verify-machineinstrs | FileCheck %s -check-prefixes=CHECK,VLOPT +; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs | FileCheck %s +; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs | FileCheck %s declare <vscale x 4 x i32> @llvm.riscv.vadd.nxv4i32.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, iXLen) define <vscale x 4 x i32> @different_imm_vl_with_ta(<vscale x 4 x i32> %passthru, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b, iXLen %vl1, iXLen %vl2) { -; NOVLOPT-LABEL: different_imm_vl_with_ta: -; NOVLOPT: # %bb.0: -; NOVLOPT-NEXT: vsetivli zero, 5, e32, m2, ta, ma -; NOVLOPT-NEXT: vadd.vv v8, v10, v12 -; NOVLOPT-NEXT: vsetivli zero, 4, e32, m2, ta, ma -; NOVLOPT-NEXT: vadd.vv v8, v8, v10 -; NOVLOPT-NEXT: ret -; -; VLOPT-LABEL: different_imm_vl_with_ta: -; VLOPT: # %bb.0: -; VLOPT-NEXT: vsetivli zero, 4, e32, m2, ta, ma -; VLOPT-NEXT: vadd.vv v8, v10, v12 -; VLOPT-NEXT: vadd.vv v8, v8, v10 -; VLOPT-NEXT: ret +; CHECK-LABEL: different_imm_vl_with_ta: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 4, e32, m2, ta, ma +; CHECK-NEXT: vadd.vv v8, v10, v12 +; CHECK-NEXT: vadd.vv v8, v8, v10 +; CHECK-NEXT: ret %v = call <vscale x 4 x i32> @llvm.riscv.vadd.nxv4i32.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b, iXLen 5) %w = call <vscale x 4 x i32> @llvm.riscv.vadd.nxv4i32.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i32> %v, <vscale x 4 x i32> %a, iXLen 4) ret <vscale x 4 x i32> %w } define <vscale x 4 x i32> @vlmax_and_imm_vl_with_ta(<vscale x 4 x i32> %passthru, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b, iXLen %vl1, iXLen %vl2) { -; NOVLOPT-LABEL: vlmax_and_imm_vl_with_ta: -; NOVLOPT: # %bb.0: -; NOVLOPT-NEXT: vsetvli a0, zero, e32, m2, ta, ma -; NOVLOPT-NEXT: vadd.vv v8, v10, v12 -; NOVLOPT-NEXT: vsetivli zero, 4, e32, m2, ta, ma -; NOVLOPT-NEXT: vadd.vv v8, v8, v10 -; NOVLOPT-NEXT: ret -; -; VLOPT-LABEL: vlmax_and_imm_vl_with_ta: -; VLOPT: # %bb.0: -; VLOPT-NEXT: vsetivli zero, 4, e32, m2, ta, ma -; VLOPT-NEXT: vadd.vv v8, v10, v12 -; VLOPT-NEXT: vadd.vv v8, v8, v10 -; VLOPT-NEXT: ret +; CHECK-LABEL: vlmax_and_imm_vl_with_ta: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 4, e32, m2, ta, ma +; CHECK-NEXT: vadd.vv v8, v10, v12 +; CHECK-NEXT: vadd.vv v8, v8, v10 +; CHECK-NEXT: ret %v = call <vscale x 4 x i32> @llvm.riscv.vadd.nxv4i32.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b, iXLen -1) %w = call <vscale x 4 x i32> @llvm.riscv.vadd.nxv4i32.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i32> %v, <vscale x 4 x i32> %a, iXLen 4) ret <vscale x 4 x i32> %w @@ -126,22 +104,13 @@ define <vscale x 4 x i32> @different_vl_with_tu(<vscale x 4 x i32> %passthru, <v ; We can propagate VL to a tail-undisturbed policy, provided none of its users ; are passthrus (i.e. read past VL). define <vscale x 4 x i32> @different_imm_vl_with_tu(<vscale x 4 x i32> %passthru, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b, iXLen %vl1, iXLen %vl2) { -; NOVLOPT-LABEL: different_imm_vl_with_tu: -; NOVLOPT: # %bb.0: -; NOVLOPT-NEXT: vsetivli zero, 5, e32, m2, tu, ma -; NOVLOPT-NEXT: vmv2r.v v14, v10 -; NOVLOPT-NEXT: vadd.vv v14, v10, v12 -; NOVLOPT-NEXT: vsetivli zero, 4, e32, m2, tu, ma -; NOVLOPT-NEXT: vadd.vv v8, v14, v10 -; NOVLOPT-NEXT: ret -; -; VLOPT-LABEL: different_imm_vl_with_tu: -; VLOPT: # %bb.0: -; VLOPT-NEXT: vsetivli zero, 4, e32, m2, tu, ma -; VLOPT-NEXT: vmv2r.v v14, v10 -; VLOPT-NEXT: vadd.vv v14, v10, v12 -; VLOPT-NEXT: vadd.vv v8, v14, v10 -; VLOPT-NEXT: ret +; CHECK-LABEL: different_imm_vl_with_tu: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 4, e32, m2, tu, ma +; CHECK-NEXT: vmv2r.v v14, v10 +; CHECK-NEXT: vadd.vv v14, v10, v12 +; CHECK-NEXT: vadd.vv v8, v14, v10 +; CHECK-NEXT: ret %v = call <vscale x 4 x i32> @llvm.riscv.vadd.nxv4i32.nxv4i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b, iXLen 5) %w = call <vscale x 4 x i32> @llvm.riscv.vadd.nxv4i32.nxv4i32(<vscale x 4 x i32> %passthru, <vscale x 4 x i32> %v, <vscale x 4 x i32> %a, iXLen 4) ret <vscale x 4 x i32> %w @@ -195,22 +164,13 @@ define <vscale x 4 x i32> @dont_optimize_tied_def(<vscale x 4 x i32> %a, <vscale } define void @optimize_ternary_use(<vscale x 4 x i16> %a, <vscale x 4 x i32> %b, <vscale x 4 x i32> %c, ptr %p, iXLen %vl) { -; NOVLOPT-LABEL: optimize_ternary_use: -; NOVLOPT: # %bb.0: -; NOVLOPT-NEXT: vsetvli a2, zero, e32, m2, ta, ma -; NOVLOPT-NEXT: vzext.vf2 v14, v8 -; NOVLOPT-NEXT: vsetvli zero, a1, e32, m2, ta, ma -; NOVLOPT-NEXT: vmadd.vv v14, v10, v12 -; NOVLOPT-NEXT: vse32.v v14, (a0) -; NOVLOPT-NEXT: ret -; -; VLOPT-LABEL: optimize_ternary_use: -; VLOPT: # %bb.0: -; VLOPT-NEXT: vsetvli zero, a1, e32, m2, ta, ma -; VLOPT-NEXT: vzext.vf2 v14, v8 -; VLOPT-NEXT: vmadd.vv v14, v10, v12 -; VLOPT-NEXT: vse32.v v14, (a0) -; VLOPT-NEXT: ret +; CHECK-LABEL: optimize_ternary_use: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, ma +; CHECK-NEXT: vzext.vf2 v14, v8 +; CHECK-NEXT: vmadd.vv v14, v10, v12 +; CHECK-NEXT: vse32.v v14, (a0) +; CHECK-NEXT: ret %1 = zext <vscale x 4 x i16> %a to <vscale x 4 x i32> %2 = mul <vscale x 4 x i32> %b, %1 %3 = add <vscale x 4 x i32> %2, %c @@ -221,28 +181,16 @@ define void @optimize_ternary_use(<vscale x 4 x i16> %a, <vscale x 4 x i32> %b, ; This function has a copy between two vrm2 virtual registers, make sure we can ; reduce vl between it. define void @fadd_fcmp_select_copy(<vscale x 4 x float> %v, <vscale x 4 x i1> %c, ptr %p, iXLen %vl) { -; NOVLOPT-LABEL: fadd_fcmp_select_copy: -; NOVLOPT: # %bb.0: -; NOVLOPT-NEXT: vsetvli a2, zero, e32, m2, ta, ma -; NOVLOPT-NEXT: vfadd.vv v8, v8, v8 -; NOVLOPT-NEXT: fmv.w.x fa5, zero -; NOVLOPT-NEXT: vmflt.vf v10, v8, fa5 -; NOVLOPT-NEXT: vmand.mm v10, v0, v10 -; NOVLOPT-NEXT: vsetvli zero, a1, e32, m2, ta, ma -; NOVLOPT-NEXT: vse32.v v8, (a0) -; NOVLOPT-NEXT: vsm.v v10, (a0) -; NOVLOPT-NEXT: ret -; -; VLOPT-LABEL: fadd_fcmp_select_copy: -; VLOPT: # %bb.0: -; VLOPT-NEXT: vsetvli zero, a1, e32, m2, ta, ma -; VLOPT-NEXT: vfadd.vv v8, v8, v8 -; VLOPT-NEXT: fmv.w.x fa5, zero -; VLOPT-NEXT: vmflt.vf v10, v8, fa5 -; VLOPT-NEXT: vmand.mm v10, v0, v10 -; VLOPT-NEXT: vse32.v v8, (a0) -; VLOPT-NEXT: vsm.v v10, (a0) -; VLOPT-NEXT: ret +; CHECK-LABEL: fadd_fcmp_select_copy: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, ma +; CHECK-NEXT: vfadd.vv v8, v8, v8 +; CHECK-NEXT: fmv.w.x fa5, zero +; CHECK-NEXT: vmflt.vf v10, v8, fa5 +; CHECK-NEXT: vmand.mm v10, v0, v10 +; CHECK-NEXT: vse32.v v8, (a0) +; CHECK-NEXT: vsm.v v10, (a0) +; CHECK-NEXT: ret %fadd = fadd <vscale x 4 x float> %v, %v %fcmp = fcmp olt <vscale x 4 x float> %fadd, zeroinitializer %select = select <vscale x 4 x i1> %c, <vscale x 4 x i1> %fcmp, <vscale x 4 x i1> zeroinitializer diff --git a/llvm/test/CodeGen/RISCV/rvv/vl-opt.mir b/llvm/test/CodeGen/RISCV/rvv/vl-opt.mir index 9883351..60398cd 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vl-opt.mir +++ b/llvm/test/CodeGen/RISCV/rvv/vl-opt.mir @@ -12,9 +12,11 @@ body: | ; CHECK-NEXT: %vl:gprnox0 = COPY $x1 ; CHECK-NEXT: %x:vr = PseudoVADD_VV_MF4 $noreg, $noreg, $noreg, -1, 4 /* e16 */, 0 /* tu, mu */ ; CHECK-NEXT: %y:vr = PseudoVNSRL_WV_MF4 $noreg, %x, $noreg, %vl, 4 /* e16 */, 0 /* tu, mu */ + ; CHECK-NEXT: $v8 = COPY %y %vl:gprnox0 = COPY $x1 %x:vr = PseudoVADD_VV_MF4 $noreg, $noreg, $noreg, -1, 4 /* e16 */, 0 /* tu, mu */ %y:vr = PseudoVNSRL_WV_MF4 $noreg, %x, $noreg, %vl, 4 /* e16 */, 0 /* tu, mu */ + $v8 = COPY %y ... --- name: vredsum_vv_user @@ -28,10 +30,14 @@ body: | ; CHECK-NEXT: %x:vr = PseudoVADD_VV_M1 $noreg, $noreg, $noreg, -1, 5 /* e32 */, 0 /* tu, mu */ ; CHECK-NEXT: %y:vr = PseudoVREDSUM_VS_M1_E64 $noreg, %x, $noreg, -1, 6 /* e64 */, 0 /* tu, mu */ ; CHECK-NEXT: %z:vr = PseudoVADD_VV_M1 $noreg, %x, $noreg, %vl, 5 /* e32 */, 0 /* tu, mu */ + ; CHECK-NEXT: $v8 = COPY %y + ; CHECK-NEXT: $v9 = COPY %z %vl:gprnox0 = COPY $x1 %x:vr = PseudoVADD_VV_M1 $noreg, $noreg, $noreg, -1, 5 /* e32 */, 0 /* tu, mu */ %y:vr = PseudoVREDSUM_VS_M1_E64 $noreg, %x, $noreg, -1, 6 /* e64 */, 0 /* tu, mu */ %z:vr = PseudoVADD_VV_M1 $noreg, %x, $noreg, %vl, 5 /* e32 */, 0 /* tu, mu */ + $v8 = COPY %y + $v9 = COPY %z ... --- name: use_largest_common_vl_imm_imm @@ -41,9 +47,13 @@ body: | ; CHECK: %x:vr = PseudoVADD_VV_M1 $noreg, $noreg, $noreg, 2, 3 /* e8 */, 0 /* tu, mu */ ; CHECK-NEXT: %y:vr = PseudoVADD_VV_M1 $noreg, %x, $noreg, 1, 3 /* e8 */, 0 /* tu, mu */ ; CHECK-NEXT: %z:vr = PseudoVADD_VV_M1 $noreg, %x, $noreg, 2, 3 /* e8 */, 0 /* tu, mu */ + ; CHECK-NEXT: $v8 = COPY %y + ; CHECK-NEXT: $v9 = COPY %z %x:vr = PseudoVADD_VV_M1 $noreg, $noreg, $noreg, -1, 3 /* e8 */, 0 %y:vr = PseudoVADD_VV_M1 $noreg, %x, $noreg, 1, 3 /* e8 */, 0 %z:vr = PseudoVADD_VV_M1 $noreg, %x, $noreg, 2, 3 /* e8 */, 0 + $v8 = COPY %y + $v9 = COPY %z ... --- name: use_largest_common_vl_same_reg @@ -57,10 +67,14 @@ body: | ; CHECK-NEXT: %x:vr = PseudoVADD_VV_M1 $noreg, $noreg, $noreg, %vl, 3 /* e8 */, 0 /* tu, mu */ ; CHECK-NEXT: %y:vr = PseudoVADD_VV_M1 $noreg, %x, $noreg, %vl, 3 /* e8 */, 0 /* tu, mu */ ; CHECK-NEXT: %z:vr = PseudoVADD_VV_M1 $noreg, %x, $noreg, %vl, 3 /* e8 */, 0 /* tu, mu */ + ; CHECK-NEXT: $v8 = COPY %y + ; CHECK-NEXT: $v9 = COPY %z %vl:gprnox0 = COPY $x1 %x:vr = PseudoVADD_VV_M1 $noreg, $noreg, $noreg, -1, 3 /* e8 */, 0 %y:vr = PseudoVADD_VV_M1 $noreg, %x, $noreg, %vl, 3 /* e8 */, 0 %z:vr = PseudoVADD_VV_M1 $noreg, %x, $noreg, %vl, 3 /* e8 */, 0 + $v8 = COPY %y + $v9 = COPY %z ... --- name: use_largest_common_vl_diff_regs @@ -75,11 +89,15 @@ body: | ; CHECK-NEXT: %x:vr = PseudoVADD_VV_M1 $noreg, $noreg, $noreg, -1, 3 /* e8 */, 0 /* tu, mu */ ; CHECK-NEXT: %y:vr = PseudoVADD_VV_M1 $noreg, %x, $noreg, %vl0, 3 /* e8 */, 0 /* tu, mu */ ; CHECK-NEXT: %z:vr = PseudoVADD_VV_M1 $noreg, %x, $noreg, %vl1, 3 /* e8 */, 0 /* tu, mu */ + ; CHECK-NEXT: $v8 = COPY %y + ; CHECK-NEXT: $v9 = COPY %z %vl0:gprnox0 = COPY $x1 %vl1:gprnox0 = COPY $x2 %x:vr = PseudoVADD_VV_M1 $noreg, $noreg, $noreg, -1, 3 /* e8 */, 0 %y:vr = PseudoVADD_VV_M1 $noreg, %x, $noreg, %vl0, 3 /* e8 */, 0 %z:vr = PseudoVADD_VV_M1 $noreg, %x, $noreg, %vl1, 3 /* e8 */, 0 + $v8 = COPY %y + $v9 = COPY %z ... --- name: use_largest_common_vl_imm_reg @@ -93,10 +111,14 @@ body: | ; CHECK-NEXT: %x:vr = PseudoVADD_VV_M1 $noreg, $noreg, $noreg, -1, 3 /* e8 */, 0 /* tu, mu */ ; CHECK-NEXT: %y:vr = PseudoVADD_VV_M1 $noreg, %x, $noreg, %vl, 3 /* e8 */, 0 /* tu, mu */ ; CHECK-NEXT: %z:vr = PseudoVADD_VV_M1 $noreg, %x, $noreg, 1, 3 /* e8 */, 0 /* tu, mu */ + ; CHECK-NEXT: $v8 = COPY %y + ; CHECK-NEXT: $v9 = COPY %z %vl:gprnox0 = COPY $x1 %x:vr = PseudoVADD_VV_M1 $noreg, $noreg, $noreg, -1, 3 /* e8 */, 0 %y:vr = PseudoVADD_VV_M1 $noreg, %x, $noreg, %vl, 3 /* e8 */, 0 %z:vr = PseudoVADD_VV_M1 $noreg, %x, $noreg, 1, 3 /* e8 */, 0 + $v8 = COPY %y + $v9 = COPY %z ... --- name: use_largest_common_vl_imm_vlmax @@ -106,9 +128,13 @@ body: | ; CHECK: %x:vr = PseudoVADD_VV_M1 $noreg, $noreg, $noreg, -1, 3 /* e8 */, 0 /* tu, mu */ ; CHECK-NEXT: %y:vr = PseudoVADD_VV_M1 $noreg, %x, $noreg, 1, 3 /* e8 */, 0 /* tu, mu */ ; CHECK-NEXT: %z:vr = PseudoVADD_VV_M1 $noreg, %x, $noreg, -1, 3 /* e8 */, 0 /* tu, mu */ + ; CHECK-NEXT: $v8 = COPY %y + ; CHECK-NEXT: $v9 = COPY %z %x:vr = PseudoVADD_VV_M1 $noreg, $noreg, $noreg, -1, 3 /* e8 */, 0 %y:vr = PseudoVADD_VV_M1 $noreg, %x, $noreg, 1, 3 /* e8 */, 0 %z:vr = PseudoVADD_VV_M1 $noreg, %x, $noreg, -1, 3 /* e8 */, 0 + $v8 = COPY %y + $v9 = COPY %z ... --- name: vfcvt_x_f_v_nofpexcept @@ -117,8 +143,10 @@ body: | ; CHECK-LABEL: name: vfcvt_x_f_v_nofpexcept ; CHECK: %x:vr = nofpexcept PseudoVFCVT_X_F_V_M1 $noreg, $noreg, 0, 1, 3 /* e8 */, 0 /* tu, mu */ ; CHECK-NEXT: %y:vr = PseudoVADD_VV_M1 $noreg, %x, $noreg, 1, 3 /* e8 */, 0 /* tu, mu */ + ; CHECK-NEXT: $v8 = COPY %y %x:vr = nofpexcept PseudoVFCVT_X_F_V_M1 $noreg, $noreg, 0, -1, 3 /* e32 */, 0 %y:vr = PseudoVADD_VV_M1 $noreg, %x, $noreg, 1, 3 /* e8 */, 0 + $v8 = COPY %y ... --- name: vfcvt_x_f_v_fpexcept @@ -127,8 +155,10 @@ body: | ; CHECK-LABEL: name: vfcvt_x_f_v_fpexcept ; CHECK: %x:vr = PseudoVFCVT_X_F_V_M1 $noreg, $noreg, 0, -1, 3 /* e8 */, 0 /* tu, mu */ ; CHECK-NEXT: %y:vr = PseudoVADD_VV_M1 $noreg, %x, $noreg, 1, 3 /* e8 */, 0 /* tu, mu */ + ; CHECK-NEXT: $v8 = COPY %y %x:vr = PseudoVFCVT_X_F_V_M1 $noreg, $noreg, 0, -1, 3 /* e32 */, 0 %y:vr = PseudoVADD_VV_M1 $noreg, %x, $noreg, 1, 3 /* e8 */, 0 + $v8 = COPY %y ... --- name: vfncvtbf16_f_f_w_nofpexcept @@ -137,8 +167,10 @@ body: | ; CHECK-LABEL: name: vfncvtbf16_f_f_w_nofpexcept ; CHECK: early-clobber %x:vr = nofpexcept PseudoVFNCVTBF16_F_F_W_M1_E16 $noreg, $noreg, 7, 1, 4 /* e16 */, 0 /* tu, mu */, implicit $frm ; CHECK-NEXT: %y:vr = PseudoVADD_VV_M1 $noreg, %x, $noreg, 1, 4 /* e16 */, 0 /* tu, mu */ + ; CHECK-NEXT: $v8 = COPY %y %x:vr = nofpexcept PseudoVFNCVTBF16_F_F_W_M1_E16 $noreg, $noreg, 7, -1, 4 /* e16 */, 0 /* tu, mu */, implicit $frm %y:vr = PseudoVADD_VV_M1 $noreg, %x, $noreg, 1, 4 /* e16 */, 0 + $v8 = COPY %y ... --- name: vfsqrt_nofpexcept @@ -147,8 +179,10 @@ body: | ; CHECK-LABEL: name: vfsqrt_nofpexcept ; CHECK: %x:vrm2 = nofpexcept PseudoVFSQRT_V_M2_E32 $noreg, $noreg, 7, 6, 5 /* e32 */, 3 /* ta, ma */, implicit $frm ; CHECK-NEXT: early-clobber %y:vr = nofpexcept PseudoVFNCVTBF16_F_F_W_M1_E16 $noreg, %x, 7, 6, 4 /* e16 */, 3 /* ta, ma */, implicit $frm + ; CHECK-NEXT: $v8 = COPY %y %x:vrm2 = nofpexcept PseudoVFSQRT_V_M2_E32 $noreg, $noreg, 7, 8, 5, 3, implicit $frm early-clobber %y:vr = nofpexcept PseudoVFNCVTBF16_F_F_W_M1_E16 $noreg, %x, 7, 6, 4, 3, implicit $frm + $v8 = COPY %y ... --- name: vfsqrt_fpexcept @@ -157,8 +191,10 @@ body: | ; CHECK-LABEL: name: vfsqrt_fpexcept ; CHECK: %x:vrm2 = PseudoVFSQRT_V_M2_E32 $noreg, $noreg, 7, 8, 5 /* e32 */, 3 /* ta, ma */, implicit $frm ; CHECK-NEXT: early-clobber %y:vr = nofpexcept PseudoVFNCVTBF16_F_F_W_M1_E16 $noreg, %x, 7, 6, 4 /* e16 */, 3 /* ta, ma */, implicit $frm + ; CHECK-NEXT: $v8 = COPY %y %x:vrm2 = PseudoVFSQRT_V_M2_E32 $noreg, $noreg, 7, 8, 5, 3, implicit $frm early-clobber %y:vr = nofpexcept PseudoVFNCVTBF16_F_F_W_M1_E16 $noreg, %x, 7, 6, 4, 3, implicit $frm + $v8 = COPY %y ... --- name: vfrsqrt7_nofpexcept @@ -167,8 +203,10 @@ body: | ; CHECK-LABEL: name: vfrsqrt7_nofpexcept ; CHECK: %x:vrm2 = nofpexcept PseudoVFRSQRT7_V_M2_E32 $noreg, $noreg, 1, 5 /* e32 */, 0 /* tu, mu */ ; CHECK-NEXT: %y:vrm2 = PseudoVADD_VV_M2 $noreg, %x, $noreg, 1, 5 /* e32 */, 0 /* tu, mu */ + ; CHECK-NEXT: $v8m2 = COPY %y %x:vrm2 = nofpexcept PseudoVFRSQRT7_V_M2_E32 $noreg, $noreg, 7, 5, 0 %y:vrm2 = PseudoVADD_VV_M2 $noreg, %x, $noreg, 1, 5 /* e32 */, 0 + $v8m2 = COPY %y ... --- name: vfrsqrt7_fpexcept @@ -177,8 +215,10 @@ body: | ; CHECK-LABEL: name: vfrsqrt7_fpexcept ; CHECK: %x:vrm2 = PseudoVFRSQRT7_V_M2_E32 $noreg, $noreg, 7, 5 /* e32 */, 0 /* tu, mu */ ; CHECK-NEXT: %y:vrm2 = PseudoVADD_VV_M2 $noreg, %x, $noreg, 1, 5 /* e32 */, 0 /* tu, mu */ + ; CHECK-NEXT: $v8m2 = COPY %y %x:vrm2 = PseudoVFRSQRT7_V_M2_E32 $noreg, $noreg, 7, 5, 0 %y:vrm2 = PseudoVADD_VV_M2 $noreg, %x, $noreg, 1, 5 /* e32 */, 0 + $v8m2 = COPY %y ... --- name: vwadd_tied_vs1 @@ -187,8 +227,10 @@ body: | ; CHECK-LABEL: name: vwadd_tied_vs1 ; CHECK: %x:vr = PseudoVADD_VV_M1 $noreg, $noreg, $noreg, 1, 3 /* e8 */, 0 /* tu, mu */ ; CHECK-NEXT: early-clobber %y:vrm2 = PseudoVWADD_WV_M1_TIED $noreg, %x, 1, 3 /* e8 */, 0 /* tu, mu */ + ; CHECK-NEXT: $v8m2 = COPY %y %x:vr = PseudoVADD_VV_M1 $noreg, $noreg, $noreg, -1, 3 /* e8 */, 0 /* tu, mu */ %y:vrm2 = PseudoVWADD_WV_M1_TIED $noreg, %x, 1, 3 /* e8 */, 0 /* tu, mu */ + $v8m2 = COPY %y ... --- name: crossbb @@ -202,11 +244,13 @@ body: | ; CHECK-NEXT: bb.1: ; CHECK-NEXT: %a1:vr = PseudoVADD_VV_M1 $noreg, %c, $noreg, 1, 3 /* e8 */, 0 /* tu, mu */ ; CHECK-NEXT: %a2:vr = PseudoVADD_VV_M1 $noreg, %a1, $noreg, 1, 3 /* e8 */, 0 /* tu, mu */ + ; CHECK-NEXT: $v8 = COPY %a2 ; CHECK-NEXT: PseudoRET ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.2: ; CHECK-NEXT: %b1:vr = PseudoVADD_VV_M1 $noreg, %c, $noreg, 1, 3 /* e8 */, 0 /* tu, mu */ ; CHECK-NEXT: %b2:vr = PseudoVADD_VV_M1 $noreg, %b1, $noreg, 1, 3 /* e8 */, 0 /* tu, mu */ + ; CHECK-NEXT: $v8 = COPY %b2 ; CHECK-NEXT: PseudoRET ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.3: @@ -221,10 +265,12 @@ body: | bb.1: %a1:vr = PseudoVADD_VV_M1 $noreg, %c, $noreg, -1, 3 /* e8 */, 0 /* tu, mu */ %a2:vr = PseudoVADD_VV_M1 $noreg, %a1, $noreg, 1, 3 /* e8 */, 0 /* tu, mu */ + $v8 = COPY %a2 PseudoRET bb.2: %b1:vr = PseudoVADD_VV_M1 $noreg, %c, $noreg, -1, 3 /* e8 */, 0 /* tu, mu */ %b2:vr = PseudoVADD_VV_M1 $noreg, %b1, $noreg, 1, 3 /* e8 */, 0 /* tu, mu */ + $v8 = COPY %b2 PseudoRET bb.3: liveins: $x1 @@ -237,17 +283,21 @@ name: unreachable body: | ; CHECK-LABEL: name: unreachable ; CHECK: bb.0: - ; CHECK-NEXT: %x:vr = PseudoVADD_VV_M1 $noreg, $noreg, $noreg, 1, 3 /* e8 */, 0 /* tu, mu */ + ; CHECK-NEXT: %x:vr = PseudoVADD_VV_M1 $noreg, $noreg, $noreg, -1, 3 /* e8 */, 0 /* tu, mu */ + ; CHECK-NEXT: $v8 = COPY %x ; CHECK-NEXT: PseudoRET ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.1: ; CHECK-NEXT: %y:vr = PseudoVADD_VV_M1 $noreg, %x, $noreg, 1, 3 /* e8 */, 0 /* tu, mu */ + ; CHECK-NEXT: $v8 = COPY %y ; CHECK-NEXT: PseudoRET bb.0: %x:vr = PseudoVADD_VV_M1 $noreg, $noreg, $noreg, -1, 3 /* e8 */, 0 /* tu, mu */ + $v8 = COPY %x PseudoRET bb.1: %y:vr = PseudoVADD_VV_M1 $noreg, %x, $noreg, 1, 3 /* e8 */, 0 /* tu, mu */ + $v8 = COPY %y PseudoRET ... --- @@ -259,9 +309,11 @@ body: | ; CHECK: %x:vr = PseudoVADD_VV_M1 $noreg, $noreg, $noreg, 1, 3 /* e8 */, 0 /* tu, mu */ ; CHECK-NEXT: %y:vr = PseudoVADD_VV_M1 %x, $noreg, $noreg, 1, 3 /* e8 */, 0 /* tu, mu */ ; CHECK-NEXT: %z:vr = PseudoVADD_VV_M1 $noreg, %y, $noreg, 1, 3 /* e8 */, 0 /* tu, mu */ + ; CHECK-NEXT: $v8 = COPY %z %x:vr = PseudoVADD_VV_M1 $noreg, $noreg, $noreg, -1, 3 /* e8 */, 0 /* tu, mu */ %y:vr = PseudoVADD_VV_M1 %x, $noreg, $noreg, 1, 3 /* e8 */, 0 /* tu, mu */ %z:vr = PseudoVADD_VV_M1 $noreg, %y, $noreg, 1, 3 /* e8 */, 0 /* tu, mu */ + $v8 = COPY %z ... --- # Can't reduce %x because %y uses it as a passthru, and %y's inactive elements are demanded by %z @@ -272,9 +324,11 @@ body: | ; CHECK: %x:vr = PseudoVADD_VV_M1 $noreg, $noreg, $noreg, -1, 3 /* e8 */, 0 /* tu, mu */ ; CHECK-NEXT: %y:vr = PseudoVADD_VV_M1 %x, $noreg, $noreg, 1, 3 /* e8 */, 0 /* tu, mu */ ; CHECK-NEXT: %z:vr = PseudoVADD_VV_M1 $noreg, %y, $noreg, 2, 3 /* e8 */, 0 /* tu, mu */ + ; CHECK-NEXT: $v8 = COPY %z %x:vr = PseudoVADD_VV_M1 $noreg, $noreg, $noreg, -1, 3 /* e8 */, 0 /* tu, mu */ %y:vr = PseudoVADD_VV_M1 %x, $noreg, $noreg, 1, 3 /* e8 */, 0 /* tu, mu */ %z:vr = PseudoVADD_VV_M1 $noreg, %y, $noreg, 2, 3 /* e8 */, 0 /* tu, mu */ + $v8 = COPY %z ... --- # Can reduce %x even though %y uses it as a passthru, because %y's inactive elements aren't demanded @@ -287,11 +341,13 @@ body: | ; CHECK-NEXT: %z:vr = PseudoVADD_VV_M1 %y, $noreg, $noreg, 1, 3 /* e8 */, 0 /* tu, mu */ ; CHECK-NEXT: %a:vr = PseudoVADD_VV_M1 %z, $noreg, $noreg, 1, 3 /* e8 */, 0 /* tu, mu */ ; CHECK-NEXT: %b:vr = PseudoVADD_VV_M1 $noreg, %a, $noreg, 1, 3 /* e8 */, 0 /* tu, mu */ + ; CHECK-NEXT: $v8 = COPY %b %x:vr = PseudoVADD_VV_M1 $noreg, $noreg, $noreg, -1, 3 /* e8 */, 0 /* tu, mu */ %y:vr = PseudoVADD_VV_M1 %x, $noreg, $noreg, 1, 3 /* e8 */, 0 /* tu, mu */ %z:vr = PseudoVADD_VV_M1 %y, $noreg, $noreg, 1, 3 /* e8 */, 0 /* tu, mu */ %a:vr = PseudoVADD_VV_M1 %z, $noreg, $noreg, 1, 3 /* e8 */, 0 /* tu, mu */ %b:vr = PseudoVADD_VV_M1 $noreg, %a, $noreg, 1, 3 /* e8 */, 0 /* tu, mu */ + $v8 = COPY %b ... --- # Can't reduce %x because %y uses it as a passthru, and %y's inactive elements are ultimately demanded in %b @@ -304,11 +360,13 @@ body: | ; CHECK-NEXT: %z:vr = PseudoVADD_VV_M1 %y, $noreg, $noreg, 1, 3 /* e8 */, 0 /* tu, mu */ ; CHECK-NEXT: %a:vr = PseudoVADD_VV_M1 %z, $noreg, $noreg, 1, 3 /* e8 */, 0 /* tu, mu */ ; CHECK-NEXT: %b:vr = PseudoVADD_VV_M1 $noreg, %a, $noreg, 2, 3 /* e8 */, 0 /* tu, mu */ + ; CHECK-NEXT: $v8 = COPY %b %x:vr = PseudoVADD_VV_M1 $noreg, $noreg, $noreg, -1, 3 /* e8 */, 0 /* tu, mu */ %y:vr = PseudoVADD_VV_M1 %x, $noreg, $noreg, 1, 3 /* e8 */, 0 /* tu, mu */ %z:vr = PseudoVADD_VV_M1 %y, $noreg, $noreg, 1, 3 /* e8 */, 0 /* tu, mu */ %a:vr = PseudoVADD_VV_M1 %z, $noreg, $noreg, 1, 3 /* e8 */, 0 /* tu, mu */ %b:vr = PseudoVADD_VV_M1 $noreg, %a, $noreg, 2, 3 /* e8 */, 0 /* tu, mu */ + $v8 = COPY %b ... --- name: vxsat_dead @@ -317,8 +375,10 @@ body: | ; CHECK-LABEL: name: vxsat_dead ; CHECK: %x:vr = PseudoVSADDU_VV_M1 $noreg, $noreg, $noreg, 1, 3 /* e8 */, 0 /* tu, mu */, implicit-def dead $vxsat ; CHECK-NEXT: %y:vr = PseudoVADD_VV_M1 $noreg, %x, $noreg, 1, 3 /* e8 */, 0 /* tu, mu */ + ; CHECK-NEXT: $v8 = COPY %y %x:vr = PseudoVSADDU_VV_M1 $noreg, $noreg, $noreg, -1, 3 /* e8 */, 0 /* tu, mu */, implicit-def dead $vxsat %y:vr = PseudoVADD_VV_M1 $noreg, %x, $noreg, 1, 3 /* e8 */, 0 /* tu, mu */ + $v8 = COPY %y ... --- name: vxsat_not_dead @@ -327,8 +387,10 @@ body: | ; CHECK-LABEL: name: vxsat_not_dead ; CHECK: %x:vr = PseudoVSADDU_VV_M1 $noreg, $noreg, $noreg, -1, 3 /* e8 */, 0 /* tu, mu */, implicit-def $vxsat ; CHECK-NEXT: %y:vr = PseudoVADD_VV_M1 $noreg, %x, $noreg, 1, 3 /* e8 */, 0 /* tu, mu */ + ; CHECK-NEXT: $v8 = COPY %y %x:vr = PseudoVSADDU_VV_M1 $noreg, $noreg, $noreg, -1, 3 /* e8 */, 0 /* tu, mu */, implicit-def $vxsat %y:vr = PseudoVADD_VV_M1 $noreg, %x, $noreg, 1, 3 /* e8 */, 0 /* tu, mu */ + $v8 = COPY %y ... --- name: copy @@ -338,9 +400,11 @@ body: | ; CHECK: %x:vr = PseudoVADD_VV_M1 $noreg, $noreg, $noreg, 1, 3 /* e8 */, 0 /* tu, mu */ ; CHECK-NEXT: %y:vr = COPY %x ; CHECK-NEXT: %z:vr = PseudoVADD_VV_M1 $noreg, %y, $noreg, 1, 3 /* e8 */, 0 /* tu, mu */ + ; CHECK-NEXT: $v8 = COPY %z %x:vr = PseudoVADD_VV_M1 $noreg, $noreg, $noreg, -1, 3 /* e8 */, 0 /* tu, mu */ %y:vr = COPY %x %z:vr = PseudoVADD_VV_M1 $noreg, %y, $noreg, 1, 3 /* e8 */, 0 /* tu, mu */ + $v8 = COPY %z ... --- name: copy_multiple_users @@ -351,10 +415,14 @@ body: | ; CHECK-NEXT: %y:vr = COPY %x ; CHECK-NEXT: %z0:vr = PseudoVADD_VV_M1 $noreg, %y, $noreg, 1, 3 /* e8 */, 0 /* tu, mu */ ; CHECK-NEXT: %z1:vr = PseudoVADD_VV_M1 $noreg, %y, $noreg, 3, 3 /* e8 */, 0 /* tu, mu */ + ; CHECK-NEXT: $v8 = COPY %z0 + ; CHECK-NEXT: $v9 = COPY %z1 %x:vr = PseudoVADD_VV_M1 $noreg, $noreg, $noreg, -1, 3 /* e8 */, 0 /* tu, mu */ %y:vr = COPY %x %z0:vr = PseudoVADD_VV_M1 $noreg, %y, $noreg, 1, 3 /* e8 */, 0 /* tu, mu */ %z1:vr = PseudoVADD_VV_M1 $noreg, %y, $noreg, 3, 3 /* e8 */, 0 /* tu, mu */ + $v8 = COPY %z0 + $v9 = COPY %z1 ... --- name: copy_user_invalid_sew @@ -364,9 +432,11 @@ body: | ; CHECK: %x:vr = PseudoVADD_VV_M1 $noreg, $noreg, $noreg, -1, 3 /* e8 */, 0 /* tu, mu */ ; CHECK-NEXT: %y:vr = COPY %x ; CHECK-NEXT: %z:vr = PseudoVADD_VV_M1 $noreg, %y, $noreg, 1, 4 /* e16 */, 0 /* tu, mu */ + ; CHECK-NEXT: $v8 = COPY %z %x:vr = PseudoVADD_VV_M1 $noreg, $noreg, $noreg, -1, 3 /* e8 */, 0 /* tu, mu */ %y:vr = COPY %x %z:vr = PseudoVADD_VV_M1 $noreg, %y, $noreg, 1, 4 /* e16 */, 0 /* tu, mu */ + $v8 = COPY %z ... --- name: phi @@ -387,6 +457,7 @@ body: | ; CHECK-NEXT: bb.2: ; CHECK-NEXT: %y:vr = PHI %w, %bb.0, %x, %bb.1 ; CHECK-NEXT: %z:vr = PseudoVADD_VV_M1 $noreg, %y, $noreg, 1, 3 /* e8 */, 0 /* tu, mu */ + ; CHECK-NEXT: $v8 = COPY %z bb.0: %w:vr = PseudoVADD_VV_M1 $noreg, $noreg, $noreg, -1, 3 /* e8 */, 0 /* tu, mu */ BNE $noreg, $noreg, %bb.2 @@ -395,6 +466,7 @@ body: | bb.2: %y:vr = PHI %w, %bb.0, %x, %bb.1 %z:vr = PseudoVADD_VV_M1 $noreg, %y, $noreg, 1, 3 /* e8 */, 0 /* tu, mu */ + $v8 = COPY %z ... --- name: phi_user_invalid_sew @@ -415,6 +487,7 @@ body: | ; CHECK-NEXT: bb.2: ; CHECK-NEXT: %y:vr = PHI %w, %bb.0, %x, %bb.1 ; CHECK-NEXT: %z:vr = PseudoVADD_VV_M1 $noreg, %y, $noreg, 1, 4 /* e16 */, 0 /* tu, mu */ + ; CHECK-NEXT: $v8 = COPY %z bb.0: %w:vr = PseudoVADD_VV_M1 $noreg, $noreg, $noreg, -1, 3 /* e8 */, 0 /* tu, mu */ BNE $noreg, $noreg, %bb.2 @@ -423,6 +496,7 @@ body: | bb.2: %y:vr = PHI %w, %bb.0, %x, %bb.1 %z:vr = PseudoVADD_VV_M1 $noreg, %y, $noreg, 1, 4 /* e16 */, 0 /* tu, mu */ + $v8 = COPY %z ... --- name: phi_different_incoming_sew @@ -443,6 +517,7 @@ body: | ; CHECK-NEXT: bb.2: ; CHECK-NEXT: %y:vr = PHI %w, %bb.0, %x, %bb.1 ; CHECK-NEXT: %z:vr = PseudoVADD_VV_M1 $noreg, %y, $noreg, 1, 3 /* e8 */, 0 /* tu, mu */ + ; CHECK-NEXT: $v8 = COPY %z bb.0: %w:vr = PseudoVADD_VV_M1 $noreg, $noreg, $noreg, -1, 3 /* e8 */, 0 /* tu, mu */ BNE $noreg, $noreg, %bb.2 @@ -451,6 +526,7 @@ body: | bb.2: %y:vr = PHI %w, %bb.0, %x, %bb.1 %z:vr = PseudoVADD_VV_M1 $noreg, %y, $noreg, 1, 3 /* e8 */, 0 /* tu, mu */ + $v8 = COPY %z ... --- name: phi_cycle_direct @@ -467,12 +543,14 @@ body: | ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: %y:vr = PHI %x, %bb.0, %y, %bb.1 ; CHECK-NEXT: %z:vr = PseudoVADD_VV_M1 $noreg, %y, $noreg, 1, 3 /* e8 */, 0 /* tu, mu */ + ; CHECK-NEXT: $v8 = COPY %z ; CHECK-NEXT: PseudoBR %bb.1 bb.0: %x:vr = PseudoVADD_VV_M1 $noreg, $noreg, $noreg, -1, 3 /* e8 */, 0 /* tu, mu */ bb.1: %y:vr = PHI %x, %bb.0, %y, %bb.1 %z:vr = PseudoVADD_VV_M1 $noreg, %y, $noreg, 1, 3 /* e8 */, 0 /* tu, mu */ + $v8 = COPY %z PseudoBR %bb.1 ... --- @@ -490,12 +568,14 @@ body: | ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: %y:vr = PHI %x, %bb.0, %z, %bb.1 ; CHECK-NEXT: %z:vr = PseudoVADD_VV_M1 $noreg, %y, $noreg, 1, 3 /* e8 */, 0 /* tu, mu */ + ; CHECK-NEXT: $v8 = COPY %z ; CHECK-NEXT: PseudoBR %bb.1 bb.0: %x:vr = PseudoVADD_VV_M1 $noreg, $noreg, $noreg, -1, 3 /* e8 */, 0 /* tu, mu */ bb.1: %y:vr = PHI %x, %bb.0, %z, %bb.1 %z:vr = PseudoVADD_VV_M1 $noreg, %y, $noreg, 1, 3 /* e8 */, 0 /* tu, mu */ + $v8 = COPY %z PseudoBR %bb.1 ... --- diff --git a/llvm/test/CodeGen/RISCV/rvv/vlopt-same-vl.ll b/llvm/test/CodeGen/RISCV/rvv/vlopt-same-vl.ll index a14268a..4b9f9a0 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vlopt-same-vl.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vlopt-same-vl.ll @@ -1,4 +1,4 @@ -; RUN: llc -mtriple=riscv64 -mattr=+v -riscv-enable-vl-optimizer \ +; RUN: llc -mtriple=riscv64 -mattr=+v \ ; RUN: -verify-machineinstrs -debug-only=riscv-vl-optimizer -o - 2>&1 %s | FileCheck %s ; REQUIRES: asserts diff --git a/llvm/test/CodeGen/RISCV/rvv/vlseg-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vlseg-rv32.ll index e6a98c9..eb3422d 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vlseg-rv32.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vlseg-rv32.ll @@ -2,4246 +2,3303 @@ ; RUN: llc -mtriple=riscv32 -mattr=+zve64d,+f,+d,+zvfh,+zvfbfmin \ ; RUN: -verify-machineinstrs < %s | FileCheck %s -declare target("riscv.vector.tuple", <vscale x 1 x i8>, 2) @llvm.riscv.vlseg2.triscv.vector.tuple_nxv1i8_2t(target("riscv.vector.tuple", <vscale x 1 x i8>, 2), ptr, i32, i32) -declare target("riscv.vector.tuple", <vscale x 1 x i8>, 2) @llvm.riscv.vlseg2.mask.triscv.vector.tuple_nxv1i8_2t.nxv1i1(target("riscv.vector.tuple", <vscale x 1 x i8>, 2), ptr, <vscale x 1 x i1>, i32, i32, i32) - -define <vscale x 1 x i8> @test_vlseg2_nxv1i8_triscv.vector.tuple_nxv1i8_2t(ptr %base, i32 %vl) { +define target("riscv.vector.tuple", <vscale x 1 x i8>, 2) @test_vlseg2_nxv1i8_triscv.vector.tuple_nxv1i8_2t(ptr %base, i32 %vl) { ; CHECK-LABEL: test_vlseg2_nxv1i8_triscv.vector.tuple_nxv1i8_2t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, ma -; CHECK-NEXT: vlseg2e8.v v7, (a0) +; CHECK-NEXT: vlseg2e8.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 1 x i8>, 2) @llvm.riscv.vlseg2.triscv.vector.tuple_nxv1i8_2t(target("riscv.vector.tuple", <vscale x 1 x i8>, 2) undef, ptr %base, i32 %vl, i32 3) - %1 = call <vscale x 1 x i8> @llvm.riscv.tuple.extract.nxv1i8.triscv.vector.tuple_nxv1i8_2t(target("riscv.vector.tuple", <vscale x 1 x i8>, 2) %0, i32 1) - ret <vscale x 1 x i8> %1 + ret target("riscv.vector.tuple", <vscale x 1 x i8>, 2) %0 } - -define <vscale x 1 x i8> @test_vlseg2_mask_nxv1i8_triscv.vector.tuple_nxv1i8_2t(ptr %base, i32 %vl, <vscale x 1 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 1 x i8>, 2) @test_vlseg2_mask_nxv1i8_triscv.vector.tuple_nxv1i8_2t(ptr %base, i32 %vl, <vscale x 1 x i1> %mask) { ; CHECK-LABEL: test_vlseg2_mask_nxv1i8_triscv.vector.tuple_nxv1i8_2t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, ma -; CHECK-NEXT: vlseg2e8.v v7, (a0), v0.t +; CHECK-NEXT: vlseg2e8.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 1 x i8>, 2) @llvm.riscv.vlseg2.mask.triscv.vector.tuple_nxv1i8_2t.nxv1i1(target("riscv.vector.tuple", <vscale x 1 x i8>, 2) undef, ptr %base, <vscale x 1 x i1> %mask, i32 %vl, i32 1, i32 3) - %1 = call <vscale x 1 x i8> @llvm.riscv.tuple.extract.nxv1i8.triscv.vector.tuple_nxv1i8_2t(target("riscv.vector.tuple", <vscale x 1 x i8>, 2) %0, i32 1) - ret <vscale x 1 x i8> %1 + ret target("riscv.vector.tuple", <vscale x 1 x i8>, 2) %0 } - -declare target("riscv.vector.tuple", <vscale x 2 x i8>, 2) @llvm.riscv.vlseg2.triscv.vector.tuple_nxv2i8_2t(target("riscv.vector.tuple", <vscale x 2 x i8>, 2), ptr, i32, i32) -declare target("riscv.vector.tuple", <vscale x 2 x i8>, 2) @llvm.riscv.vlseg2.mask.triscv.vector.tuple_nxv2i8_2t.nxv2i1(target("riscv.vector.tuple", <vscale x 2 x i8>, 2), ptr, <vscale x 2 x i1>, i32, i32, i32) - -define <vscale x 2 x i8> @test_vlseg2_nxv2i8_triscv.vector.tuple_nxv2i8_2t(ptr %base, i32 %vl) { +define target("riscv.vector.tuple", <vscale x 2 x i8>, 2) @test_vlseg2_nxv2i8_triscv.vector.tuple_nxv2i8_2t(ptr %base, i32 %vl) { ; CHECK-LABEL: test_vlseg2_nxv2i8_triscv.vector.tuple_nxv2i8_2t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, ma -; CHECK-NEXT: vlseg2e8.v v7, (a0) +; CHECK-NEXT: vlseg2e8.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 2 x i8>, 2) @llvm.riscv.vlseg2.triscv.vector.tuple_nxv2i8_2t(target("riscv.vector.tuple", <vscale x 2 x i8>, 2) undef, ptr %base, i32 %vl, i32 3) - %1 = call <vscale x 2 x i8> @llvm.riscv.tuple.extract.nxv2i8.triscv.vector.tuple_nxv2i8_2t(target("riscv.vector.tuple", <vscale x 2 x i8>, 2) %0, i32 1) - ret <vscale x 2 x i8> %1 + ret target("riscv.vector.tuple", <vscale x 2 x i8>, 2) %0 } - -define <vscale x 2 x i8> @test_vlseg2_mask_nxv2i8_triscv.vector.tuple_nxv2i8_2t(ptr %base, i32 %vl, <vscale x 2 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 2 x i8>, 2) @test_vlseg2_mask_nxv2i8_triscv.vector.tuple_nxv2i8_2t(ptr %base, i32 %vl, <vscale x 2 x i1> %mask) { ; CHECK-LABEL: test_vlseg2_mask_nxv2i8_triscv.vector.tuple_nxv2i8_2t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, ma -; CHECK-NEXT: vlseg2e8.v v7, (a0), v0.t +; CHECK-NEXT: vlseg2e8.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 2 x i8>, 2) @llvm.riscv.vlseg2.mask.triscv.vector.tuple_nxv2i8_2t.nxv2i1(target("riscv.vector.tuple", <vscale x 2 x i8>, 2) undef, ptr %base, <vscale x 2 x i1> %mask, i32 %vl, i32 1, i32 3) - %1 = call <vscale x 2 x i8> @llvm.riscv.tuple.extract.nxv2i8.triscv.vector.tuple_nxv2i8_2t(target("riscv.vector.tuple", <vscale x 2 x i8>, 2) %0, i32 1) - ret <vscale x 2 x i8> %1 + ret target("riscv.vector.tuple", <vscale x 2 x i8>, 2) %0 } - -declare target("riscv.vector.tuple", <vscale x 4 x i8>, 2) @llvm.riscv.vlseg2.triscv.vector.tuple_nxv4i8_2t(target("riscv.vector.tuple", <vscale x 4 x i8>, 2), ptr, i32, i32) -declare target("riscv.vector.tuple", <vscale x 4 x i8>, 2) @llvm.riscv.vlseg2.mask.triscv.vector.tuple_nxv4i8_2t.nxv4i1(target("riscv.vector.tuple", <vscale x 4 x i8>, 2), ptr, <vscale x 4 x i1>, i32, i32, i32) - -define <vscale x 4 x i8> @test_vlseg2_nxv4i8_triscv.vector.tuple_nxv4i8_2t(ptr %base, i32 %vl) { +define target("riscv.vector.tuple", <vscale x 4 x i8>, 2) @test_vlseg2_nxv4i8_triscv.vector.tuple_nxv4i8_2t(ptr %base, i32 %vl) { ; CHECK-LABEL: test_vlseg2_nxv4i8_triscv.vector.tuple_nxv4i8_2t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, ma -; CHECK-NEXT: vlseg2e8.v v7, (a0) +; CHECK-NEXT: vlseg2e8.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 2) @llvm.riscv.vlseg2.triscv.vector.tuple_nxv4i8_2t(target("riscv.vector.tuple", <vscale x 4 x i8>, 2) undef, ptr %base, i32 %vl, i32 3) - %1 = call <vscale x 4 x i8> @llvm.riscv.tuple.extract.nxv4i8.triscv.vector.tuple_nxv4i8_2t(target("riscv.vector.tuple", <vscale x 4 x i8>, 2) %0, i32 1) - ret <vscale x 4 x i8> %1 + ret target("riscv.vector.tuple", <vscale x 4 x i8>, 2) %0 } - -define <vscale x 4 x i8> @test_vlseg2_mask_nxv4i8_triscv.vector.tuple_nxv4i8_2t(ptr %base, i32 %vl, <vscale x 4 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 4 x i8>, 2) @test_vlseg2_mask_nxv4i8_triscv.vector.tuple_nxv4i8_2t(ptr %base, i32 %vl, <vscale x 4 x i1> %mask) { ; CHECK-LABEL: test_vlseg2_mask_nxv4i8_triscv.vector.tuple_nxv4i8_2t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, ma -; CHECK-NEXT: vlseg2e8.v v7, (a0), v0.t +; CHECK-NEXT: vlseg2e8.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 2) @llvm.riscv.vlseg2.mask.triscv.vector.tuple_nxv4i8_2t.nxv4i1(target("riscv.vector.tuple", <vscale x 4 x i8>, 2) undef, ptr %base, <vscale x 4 x i1> %mask, i32 %vl, i32 1, i32 3) - %1 = call <vscale x 4 x i8> @llvm.riscv.tuple.extract.nxv4i8.triscv.vector.tuple_nxv4i8_2t(target("riscv.vector.tuple", <vscale x 4 x i8>, 2) %0, i32 1) - ret <vscale x 4 x i8> %1 + ret target("riscv.vector.tuple", <vscale x 4 x i8>, 2) %0 } - -declare target("riscv.vector.tuple", <vscale x 8 x i8>, 2) @llvm.riscv.vlseg2.triscv.vector.tuple_nxv8i8_2t(target("riscv.vector.tuple", <vscale x 8 x i8>, 2), ptr, i32, i32) -declare target("riscv.vector.tuple", <vscale x 8 x i8>, 2) @llvm.riscv.vlseg2.mask.triscv.vector.tuple_nxv8i8_2t.nxv8i1(target("riscv.vector.tuple", <vscale x 8 x i8>, 2), ptr, <vscale x 8 x i1>, i32, i32, i32) - -define <vscale x 8 x i8> @test_vlseg2_nxv8i8_triscv.vector.tuple_nxv8i8_2t(ptr %base, i32 %vl) { +define target("riscv.vector.tuple", <vscale x 8 x i8>, 2) @test_vlseg2_nxv8i8_triscv.vector.tuple_nxv8i8_2t(ptr %base, i32 %vl) { ; CHECK-LABEL: test_vlseg2_nxv8i8_triscv.vector.tuple_nxv8i8_2t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, ma -; CHECK-NEXT: vlseg2e8.v v7, (a0) +; CHECK-NEXT: vlseg2e8.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 2) @llvm.riscv.vlseg2.triscv.vector.tuple_nxv8i8_2t(target("riscv.vector.tuple", <vscale x 8 x i8>, 2) undef, ptr %base, i32 %vl, i32 3) - %1 = call <vscale x 8 x i8> @llvm.riscv.tuple.extract.nxv8i8.triscv.vector.tuple_nxv8i8_2t(target("riscv.vector.tuple", <vscale x 8 x i8>, 2) %0, i32 1) - ret <vscale x 8 x i8> %1 + ret target("riscv.vector.tuple", <vscale x 8 x i8>, 2) %0 } - -define <vscale x 8 x i8> @test_vlseg2_mask_nxv8i8_triscv.vector.tuple_nxv8i8_2t(ptr %base, i32 %vl, <vscale x 8 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 8 x i8>, 2) @test_vlseg2_mask_nxv8i8_triscv.vector.tuple_nxv8i8_2t(ptr %base, i32 %vl, <vscale x 8 x i1> %mask) { ; CHECK-LABEL: test_vlseg2_mask_nxv8i8_triscv.vector.tuple_nxv8i8_2t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, ma -; CHECK-NEXT: vlseg2e8.v v7, (a0), v0.t +; CHECK-NEXT: vlseg2e8.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 2) @llvm.riscv.vlseg2.mask.triscv.vector.tuple_nxv8i8_2t.nxv8i1(target("riscv.vector.tuple", <vscale x 8 x i8>, 2) undef, ptr %base, <vscale x 8 x i1> %mask, i32 %vl, i32 1, i32 3) - %1 = call <vscale x 8 x i8> @llvm.riscv.tuple.extract.nxv8i8.triscv.vector.tuple_nxv8i8_2t(target("riscv.vector.tuple", <vscale x 8 x i8>, 2) %0, i32 1) - ret <vscale x 8 x i8> %1 + ret target("riscv.vector.tuple", <vscale x 8 x i8>, 2) %0 } - -declare target("riscv.vector.tuple", <vscale x 16 x i8>, 2) @llvm.riscv.vlseg2.triscv.vector.tuple_nxv16i8_2t(target("riscv.vector.tuple", <vscale x 16 x i8>, 2), ptr, i32, i32) -declare target("riscv.vector.tuple", <vscale x 16 x i8>, 2) @llvm.riscv.vlseg2.mask.triscv.vector.tuple_nxv16i8_2t.nxv16i1(target("riscv.vector.tuple", <vscale x 16 x i8>, 2), ptr, <vscale x 16 x i1>, i32, i32, i32) - -define <vscale x 16 x i8> @test_vlseg2_nxv16i8_triscv.vector.tuple_nxv16i8_2t(ptr %base, i32 %vl) { +define target("riscv.vector.tuple", <vscale x 16 x i8>, 2) @test_vlseg2_nxv16i8_triscv.vector.tuple_nxv16i8_2t(ptr %base, i32 %vl) { ; CHECK-LABEL: test_vlseg2_nxv16i8_triscv.vector.tuple_nxv16i8_2t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e8, m2, ta, ma -; CHECK-NEXT: vlseg2e8.v v6, (a0) +; CHECK-NEXT: vlseg2e8.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 2) @llvm.riscv.vlseg2.triscv.vector.tuple_nxv16i8_2t(target("riscv.vector.tuple", <vscale x 16 x i8>, 2) undef, ptr %base, i32 %vl, i32 3) - %1 = call <vscale x 16 x i8> @llvm.riscv.tuple.extract.nxv16i8.triscv.vector.tuple_nxv16i8_2t(target("riscv.vector.tuple", <vscale x 16 x i8>, 2) %0, i32 1) - ret <vscale x 16 x i8> %1 + ret target("riscv.vector.tuple", <vscale x 16 x i8>, 2) %0 } - -define <vscale x 16 x i8> @test_vlseg2_mask_nxv16i8_triscv.vector.tuple_nxv16i8_2t(ptr %base, i32 %vl, <vscale x 16 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 16 x i8>, 2) @test_vlseg2_mask_nxv16i8_triscv.vector.tuple_nxv16i8_2t(ptr %base, i32 %vl, <vscale x 16 x i1> %mask) { ; CHECK-LABEL: test_vlseg2_mask_nxv16i8_triscv.vector.tuple_nxv16i8_2t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e8, m2, ta, ma -; CHECK-NEXT: vlseg2e8.v v6, (a0), v0.t +; CHECK-NEXT: vlseg2e8.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 2) @llvm.riscv.vlseg2.mask.triscv.vector.tuple_nxv16i8_2t.nxv16i1(target("riscv.vector.tuple", <vscale x 16 x i8>, 2) undef, ptr %base, <vscale x 16 x i1> %mask, i32 %vl, i32 1, i32 3) - %1 = call <vscale x 16 x i8> @llvm.riscv.tuple.extract.nxv16i8.triscv.vector.tuple_nxv16i8_2t(target("riscv.vector.tuple", <vscale x 16 x i8>, 2) %0, i32 1) - ret <vscale x 16 x i8> %1 + ret target("riscv.vector.tuple", <vscale x 16 x i8>, 2) %0 } - -declare target("riscv.vector.tuple", <vscale x 32 x i8>, 2) @llvm.riscv.vlseg2.triscv.vector.tuple_nxv32i8_2t(target("riscv.vector.tuple", <vscale x 32 x i8>, 2), ptr, i32, i32) -declare target("riscv.vector.tuple", <vscale x 32 x i8>, 2) @llvm.riscv.vlseg2.mask.triscv.vector.tuple_nxv32i8_2t.nxv32i1(target("riscv.vector.tuple", <vscale x 32 x i8>, 2), ptr, <vscale x 32 x i1>, i32, i32, i32) - -define <vscale x 32 x i8> @test_vlseg2_nxv32i8_triscv.vector.tuple_nxv32i8_2t(ptr %base, i32 %vl) { +define target("riscv.vector.tuple", <vscale x 32 x i8>, 2) @test_vlseg2_nxv32i8_triscv.vector.tuple_nxv32i8_2t(ptr %base, i32 %vl) { ; CHECK-LABEL: test_vlseg2_nxv32i8_triscv.vector.tuple_nxv32i8_2t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e8, m4, ta, ma -; CHECK-NEXT: vlseg2e8.v v4, (a0) +; CHECK-NEXT: vlseg2e8.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 32 x i8>, 2) @llvm.riscv.vlseg2.triscv.vector.tuple_nxv32i8_2t(target("riscv.vector.tuple", <vscale x 32 x i8>, 2) undef, ptr %base, i32 %vl, i32 3) - %1 = call <vscale x 32 x i8> @llvm.riscv.tuple.extract.nxv32i8.triscv.vector.tuple_nxv32i8_2t(target("riscv.vector.tuple", <vscale x 32 x i8>, 2) %0, i32 1) - ret <vscale x 32 x i8> %1 + ret target("riscv.vector.tuple", <vscale x 32 x i8>, 2) %0 } - -define <vscale x 32 x i8> @test_vlseg2_mask_nxv32i8_triscv.vector.tuple_nxv32i8_2t(ptr %base, i32 %vl, <vscale x 32 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 32 x i8>, 2) @test_vlseg2_mask_nxv32i8_triscv.vector.tuple_nxv32i8_2t(ptr %base, i32 %vl, <vscale x 32 x i1> %mask) { ; CHECK-LABEL: test_vlseg2_mask_nxv32i8_triscv.vector.tuple_nxv32i8_2t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e8, m4, ta, ma -; CHECK-NEXT: vlseg2e8.v v4, (a0), v0.t +; CHECK-NEXT: vlseg2e8.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 32 x i8>, 2) @llvm.riscv.vlseg2.mask.triscv.vector.tuple_nxv32i8_2t.nxv32i1(target("riscv.vector.tuple", <vscale x 32 x i8>, 2) undef, ptr %base, <vscale x 32 x i1> %mask, i32 %vl, i32 1, i32 3) - %1 = call <vscale x 32 x i8> @llvm.riscv.tuple.extract.nxv32i8.triscv.vector.tuple_nxv32i8_2t(target("riscv.vector.tuple", <vscale x 32 x i8>, 2) %0, i32 1) - ret <vscale x 32 x i8> %1 + ret target("riscv.vector.tuple", <vscale x 32 x i8>, 2) %0 } - -declare target("riscv.vector.tuple", <vscale x 1 x i8>, 3) @llvm.riscv.vlseg3.triscv.vector.tuple_nxv1i8_3t(target("riscv.vector.tuple", <vscale x 1 x i8>, 3), ptr, i32, i32) -declare target("riscv.vector.tuple", <vscale x 1 x i8>, 3) @llvm.riscv.vlseg3.mask.triscv.vector.tuple_nxv1i8_3t.nxv1i1(target("riscv.vector.tuple", <vscale x 1 x i8>, 3), ptr, <vscale x 1 x i1>, i32, i32, i32) - -define <vscale x 1 x i8> @test_vlseg3_nxv1i8_triscv.vector.tuple_nxv1i8_3t(ptr %base, i32 %vl) { +define target("riscv.vector.tuple", <vscale x 1 x i8>, 3) @test_vlseg3_nxv1i8_triscv.vector.tuple_nxv1i8_3t(ptr %base, i32 %vl) { ; CHECK-LABEL: test_vlseg3_nxv1i8_triscv.vector.tuple_nxv1i8_3t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, ma -; CHECK-NEXT: vlseg3e8.v v7, (a0) +; CHECK-NEXT: vlseg3e8.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 1 x i8>, 3) @llvm.riscv.vlseg3.triscv.vector.tuple_nxv1i8_3t(target("riscv.vector.tuple", <vscale x 1 x i8>, 3) undef, ptr %base, i32 %vl, i32 3) - %1 = call <vscale x 1 x i8> @llvm.riscv.tuple.extract.nxv1i8.triscv.vector.tuple_nxv1i8_3t(target("riscv.vector.tuple", <vscale x 1 x i8>, 3) %0, i32 1) - ret <vscale x 1 x i8> %1 + ret target("riscv.vector.tuple", <vscale x 1 x i8>, 3) %0 } - -define <vscale x 1 x i8> @test_vlseg3_mask_nxv1i8_triscv.vector.tuple_nxv1i8_3t(ptr %base, i32 %vl, <vscale x 1 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 1 x i8>, 3) @test_vlseg3_mask_nxv1i8_triscv.vector.tuple_nxv1i8_3t(ptr %base, i32 %vl, <vscale x 1 x i1> %mask) { ; CHECK-LABEL: test_vlseg3_mask_nxv1i8_triscv.vector.tuple_nxv1i8_3t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, ma -; CHECK-NEXT: vlseg3e8.v v7, (a0), v0.t +; CHECK-NEXT: vlseg3e8.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 1 x i8>, 3) @llvm.riscv.vlseg3.mask.triscv.vector.tuple_nxv1i8_3t.nxv1i1(target("riscv.vector.tuple", <vscale x 1 x i8>, 3) undef, ptr %base, <vscale x 1 x i1> %mask, i32 %vl, i32 1, i32 3) - %1 = call <vscale x 1 x i8> @llvm.riscv.tuple.extract.nxv1i8.triscv.vector.tuple_nxv1i8_3t(target("riscv.vector.tuple", <vscale x 1 x i8>, 3) %0, i32 1) - ret <vscale x 1 x i8> %1 + ret target("riscv.vector.tuple", <vscale x 1 x i8>, 3) %0 } - -declare target("riscv.vector.tuple", <vscale x 2 x i8>, 3) @llvm.riscv.vlseg3.triscv.vector.tuple_nxv2i8_3t(target("riscv.vector.tuple", <vscale x 2 x i8>, 3), ptr, i32, i32) -declare target("riscv.vector.tuple", <vscale x 2 x i8>, 3) @llvm.riscv.vlseg3.mask.triscv.vector.tuple_nxv2i8_3t.nxv2i1(target("riscv.vector.tuple", <vscale x 2 x i8>, 3), ptr, <vscale x 2 x i1>, i32, i32, i32) - -define <vscale x 2 x i8> @test_vlseg3_nxv2i8_triscv.vector.tuple_nxv2i8_3t(ptr %base, i32 %vl) { +define target("riscv.vector.tuple", <vscale x 2 x i8>, 3) @test_vlseg3_nxv2i8_triscv.vector.tuple_nxv2i8_3t(ptr %base, i32 %vl) { ; CHECK-LABEL: test_vlseg3_nxv2i8_triscv.vector.tuple_nxv2i8_3t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, ma -; CHECK-NEXT: vlseg3e8.v v7, (a0) +; CHECK-NEXT: vlseg3e8.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 2 x i8>, 3) @llvm.riscv.vlseg3.triscv.vector.tuple_nxv2i8_3t(target("riscv.vector.tuple", <vscale x 2 x i8>, 3) undef, ptr %base, i32 %vl, i32 3) - %1 = call <vscale x 2 x i8> @llvm.riscv.tuple.extract.nxv2i8.triscv.vector.tuple_nxv2i8_3t(target("riscv.vector.tuple", <vscale x 2 x i8>, 3) %0, i32 1) - ret <vscale x 2 x i8> %1 + ret target("riscv.vector.tuple", <vscale x 2 x i8>, 3) %0 } - -define <vscale x 2 x i8> @test_vlseg3_mask_nxv2i8_triscv.vector.tuple_nxv2i8_3t(ptr %base, i32 %vl, <vscale x 2 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 2 x i8>, 3) @test_vlseg3_mask_nxv2i8_triscv.vector.tuple_nxv2i8_3t(ptr %base, i32 %vl, <vscale x 2 x i1> %mask) { ; CHECK-LABEL: test_vlseg3_mask_nxv2i8_triscv.vector.tuple_nxv2i8_3t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, ma -; CHECK-NEXT: vlseg3e8.v v7, (a0), v0.t +; CHECK-NEXT: vlseg3e8.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 2 x i8>, 3) @llvm.riscv.vlseg3.mask.triscv.vector.tuple_nxv2i8_3t.nxv2i1(target("riscv.vector.tuple", <vscale x 2 x i8>, 3) undef, ptr %base, <vscale x 2 x i1> %mask, i32 %vl, i32 1, i32 3) - %1 = call <vscale x 2 x i8> @llvm.riscv.tuple.extract.nxv2i8.triscv.vector.tuple_nxv2i8_3t(target("riscv.vector.tuple", <vscale x 2 x i8>, 3) %0, i32 1) - ret <vscale x 2 x i8> %1 + ret target("riscv.vector.tuple", <vscale x 2 x i8>, 3) %0 } - -declare target("riscv.vector.tuple", <vscale x 4 x i8>, 3) @llvm.riscv.vlseg3.triscv.vector.tuple_nxv4i8_3t(target("riscv.vector.tuple", <vscale x 4 x i8>, 3), ptr, i32, i32) -declare target("riscv.vector.tuple", <vscale x 4 x i8>, 3) @llvm.riscv.vlseg3.mask.triscv.vector.tuple_nxv4i8_3t.nxv4i1(target("riscv.vector.tuple", <vscale x 4 x i8>, 3), ptr, <vscale x 4 x i1>, i32, i32, i32) - -define <vscale x 4 x i8> @test_vlseg3_nxv4i8_triscv.vector.tuple_nxv4i8_3t(ptr %base, i32 %vl) { +define target("riscv.vector.tuple", <vscale x 4 x i8>, 3) @test_vlseg3_nxv4i8_triscv.vector.tuple_nxv4i8_3t(ptr %base, i32 %vl) { ; CHECK-LABEL: test_vlseg3_nxv4i8_triscv.vector.tuple_nxv4i8_3t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, ma -; CHECK-NEXT: vlseg3e8.v v7, (a0) +; CHECK-NEXT: vlseg3e8.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 3) @llvm.riscv.vlseg3.triscv.vector.tuple_nxv4i8_3t(target("riscv.vector.tuple", <vscale x 4 x i8>, 3) undef, ptr %base, i32 %vl, i32 3) - %1 = call <vscale x 4 x i8> @llvm.riscv.tuple.extract.nxv4i8.triscv.vector.tuple_nxv4i8_3t(target("riscv.vector.tuple", <vscale x 4 x i8>, 3) %0, i32 1) - ret <vscale x 4 x i8> %1 + ret target("riscv.vector.tuple", <vscale x 4 x i8>, 3) %0 } - -define <vscale x 4 x i8> @test_vlseg3_mask_nxv4i8_triscv.vector.tuple_nxv4i8_3t(ptr %base, i32 %vl, <vscale x 4 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 4 x i8>, 3) @test_vlseg3_mask_nxv4i8_triscv.vector.tuple_nxv4i8_3t(ptr %base, i32 %vl, <vscale x 4 x i1> %mask) { ; CHECK-LABEL: test_vlseg3_mask_nxv4i8_triscv.vector.tuple_nxv4i8_3t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, ma -; CHECK-NEXT: vlseg3e8.v v7, (a0), v0.t +; CHECK-NEXT: vlseg3e8.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 3) @llvm.riscv.vlseg3.mask.triscv.vector.tuple_nxv4i8_3t.nxv4i1(target("riscv.vector.tuple", <vscale x 4 x i8>, 3) undef, ptr %base, <vscale x 4 x i1> %mask, i32 %vl, i32 1, i32 3) - %1 = call <vscale x 4 x i8> @llvm.riscv.tuple.extract.nxv4i8.triscv.vector.tuple_nxv4i8_3t(target("riscv.vector.tuple", <vscale x 4 x i8>, 3) %0, i32 1) - ret <vscale x 4 x i8> %1 + ret target("riscv.vector.tuple", <vscale x 4 x i8>, 3) %0 } - -declare target("riscv.vector.tuple", <vscale x 8 x i8>, 3) @llvm.riscv.vlseg3.triscv.vector.tuple_nxv8i8_3t(target("riscv.vector.tuple", <vscale x 8 x i8>, 3), ptr, i32, i32) -declare target("riscv.vector.tuple", <vscale x 8 x i8>, 3) @llvm.riscv.vlseg3.mask.triscv.vector.tuple_nxv8i8_3t.nxv8i1(target("riscv.vector.tuple", <vscale x 8 x i8>, 3), ptr, <vscale x 8 x i1>, i32, i32, i32) - -define <vscale x 8 x i8> @test_vlseg3_nxv8i8_triscv.vector.tuple_nxv8i8_3t(ptr %base, i32 %vl) { +define target("riscv.vector.tuple", <vscale x 8 x i8>, 3) @test_vlseg3_nxv8i8_triscv.vector.tuple_nxv8i8_3t(ptr %base, i32 %vl) { ; CHECK-LABEL: test_vlseg3_nxv8i8_triscv.vector.tuple_nxv8i8_3t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, ma -; CHECK-NEXT: vlseg3e8.v v7, (a0) +; CHECK-NEXT: vlseg3e8.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 3) @llvm.riscv.vlseg3.triscv.vector.tuple_nxv8i8_3t(target("riscv.vector.tuple", <vscale x 8 x i8>, 3) undef, ptr %base, i32 %vl, i32 3) - %1 = call <vscale x 8 x i8> @llvm.riscv.tuple.extract.nxv8i8.triscv.vector.tuple_nxv8i8_3t(target("riscv.vector.tuple", <vscale x 8 x i8>, 3) %0, i32 1) - ret <vscale x 8 x i8> %1 + ret target("riscv.vector.tuple", <vscale x 8 x i8>, 3) %0 } - -define <vscale x 8 x i8> @test_vlseg3_mask_nxv8i8_triscv.vector.tuple_nxv8i8_3t(ptr %base, i32 %vl, <vscale x 8 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 8 x i8>, 3) @test_vlseg3_mask_nxv8i8_triscv.vector.tuple_nxv8i8_3t(ptr %base, i32 %vl, <vscale x 8 x i1> %mask) { ; CHECK-LABEL: test_vlseg3_mask_nxv8i8_triscv.vector.tuple_nxv8i8_3t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, ma -; CHECK-NEXT: vlseg3e8.v v7, (a0), v0.t +; CHECK-NEXT: vlseg3e8.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 3) @llvm.riscv.vlseg3.mask.triscv.vector.tuple_nxv8i8_3t.nxv8i1(target("riscv.vector.tuple", <vscale x 8 x i8>, 3) undef, ptr %base, <vscale x 8 x i1> %mask, i32 %vl, i32 1, i32 3) - %1 = call <vscale x 8 x i8> @llvm.riscv.tuple.extract.nxv8i8.triscv.vector.tuple_nxv8i8_3t(target("riscv.vector.tuple", <vscale x 8 x i8>, 3) %0, i32 1) - ret <vscale x 8 x i8> %1 + ret target("riscv.vector.tuple", <vscale x 8 x i8>, 3) %0 } - -declare target("riscv.vector.tuple", <vscale x 16 x i8>, 3) @llvm.riscv.vlseg3.triscv.vector.tuple_nxv16i8_3t(target("riscv.vector.tuple", <vscale x 16 x i8>, 3), ptr, i32, i32) -declare target("riscv.vector.tuple", <vscale x 16 x i8>, 3) @llvm.riscv.vlseg3.mask.triscv.vector.tuple_nxv16i8_3t.nxv16i1(target("riscv.vector.tuple", <vscale x 16 x i8>, 3), ptr, <vscale x 16 x i1>, i32, i32, i32) - -define <vscale x 16 x i8> @test_vlseg3_nxv16i8_triscv.vector.tuple_nxv16i8_3t(ptr %base, i32 %vl) { +define target("riscv.vector.tuple", <vscale x 16 x i8>, 3) @test_vlseg3_nxv16i8_triscv.vector.tuple_nxv16i8_3t(ptr %base, i32 %vl) { ; CHECK-LABEL: test_vlseg3_nxv16i8_triscv.vector.tuple_nxv16i8_3t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e8, m2, ta, ma -; CHECK-NEXT: vlseg3e8.v v6, (a0) +; CHECK-NEXT: vlseg3e8.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 3) @llvm.riscv.vlseg3.triscv.vector.tuple_nxv16i8_3t(target("riscv.vector.tuple", <vscale x 16 x i8>, 3) undef, ptr %base, i32 %vl, i32 3) - %1 = call <vscale x 16 x i8> @llvm.riscv.tuple.extract.nxv16i8.triscv.vector.tuple_nxv16i8_3t(target("riscv.vector.tuple", <vscale x 16 x i8>, 3) %0, i32 1) - ret <vscale x 16 x i8> %1 + ret target("riscv.vector.tuple", <vscale x 16 x i8>, 3) %0 } - -define <vscale x 16 x i8> @test_vlseg3_mask_nxv16i8_triscv.vector.tuple_nxv16i8_3t(ptr %base, i32 %vl, <vscale x 16 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 16 x i8>, 3) @test_vlseg3_mask_nxv16i8_triscv.vector.tuple_nxv16i8_3t(ptr %base, i32 %vl, <vscale x 16 x i1> %mask) { ; CHECK-LABEL: test_vlseg3_mask_nxv16i8_triscv.vector.tuple_nxv16i8_3t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e8, m2, ta, ma -; CHECK-NEXT: vlseg3e8.v v6, (a0), v0.t +; CHECK-NEXT: vlseg3e8.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 3) @llvm.riscv.vlseg3.mask.triscv.vector.tuple_nxv16i8_3t.nxv16i1(target("riscv.vector.tuple", <vscale x 16 x i8>, 3) undef, ptr %base, <vscale x 16 x i1> %mask, i32 %vl, i32 1, i32 3) - %1 = call <vscale x 16 x i8> @llvm.riscv.tuple.extract.nxv16i8.triscv.vector.tuple_nxv16i8_3t(target("riscv.vector.tuple", <vscale x 16 x i8>, 3) %0, i32 1) - ret <vscale x 16 x i8> %1 + ret target("riscv.vector.tuple", <vscale x 16 x i8>, 3) %0 } - -declare target("riscv.vector.tuple", <vscale x 1 x i8>, 4) @llvm.riscv.vlseg4.triscv.vector.tuple_nxv1i8_4t(target("riscv.vector.tuple", <vscale x 1 x i8>, 4), ptr, i32, i32) -declare target("riscv.vector.tuple", <vscale x 1 x i8>, 4) @llvm.riscv.vlseg4.mask.triscv.vector.tuple_nxv1i8_4t.nxv1i1(target("riscv.vector.tuple", <vscale x 1 x i8>, 4), ptr, <vscale x 1 x i1>, i32, i32, i32) - -define <vscale x 1 x i8> @test_vlseg4_nxv1i8_triscv.vector.tuple_nxv1i8_4t(ptr %base, i32 %vl) { +define target("riscv.vector.tuple", <vscale x 1 x i8>, 4) @test_vlseg4_nxv1i8_triscv.vector.tuple_nxv1i8_4t(ptr %base, i32 %vl) { ; CHECK-LABEL: test_vlseg4_nxv1i8_triscv.vector.tuple_nxv1i8_4t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, ma -; CHECK-NEXT: vlseg4e8.v v7, (a0) +; CHECK-NEXT: vlseg4e8.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 1 x i8>, 4) @llvm.riscv.vlseg4.triscv.vector.tuple_nxv1i8_4t(target("riscv.vector.tuple", <vscale x 1 x i8>, 4) undef, ptr %base, i32 %vl, i32 3) - %1 = call <vscale x 1 x i8> @llvm.riscv.tuple.extract.nxv1i8.triscv.vector.tuple_nxv1i8_4t(target("riscv.vector.tuple", <vscale x 1 x i8>, 4) %0, i32 1) - ret <vscale x 1 x i8> %1 + ret target("riscv.vector.tuple", <vscale x 1 x i8>, 4) %0 } - -define <vscale x 1 x i8> @test_vlseg4_mask_nxv1i8_triscv.vector.tuple_nxv1i8_4t(ptr %base, i32 %vl, <vscale x 1 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 1 x i8>, 4) @test_vlseg4_mask_nxv1i8_triscv.vector.tuple_nxv1i8_4t(ptr %base, i32 %vl, <vscale x 1 x i1> %mask) { ; CHECK-LABEL: test_vlseg4_mask_nxv1i8_triscv.vector.tuple_nxv1i8_4t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, ma -; CHECK-NEXT: vlseg4e8.v v7, (a0), v0.t +; CHECK-NEXT: vlseg4e8.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 1 x i8>, 4) @llvm.riscv.vlseg4.mask.triscv.vector.tuple_nxv1i8_4t.nxv1i1(target("riscv.vector.tuple", <vscale x 1 x i8>, 4) undef, ptr %base, <vscale x 1 x i1> %mask, i32 %vl, i32 1, i32 3) - %1 = call <vscale x 1 x i8> @llvm.riscv.tuple.extract.nxv1i8.triscv.vector.tuple_nxv1i8_4t(target("riscv.vector.tuple", <vscale x 1 x i8>, 4) %0, i32 1) - ret <vscale x 1 x i8> %1 + ret target("riscv.vector.tuple", <vscale x 1 x i8>, 4) %0 } - -declare target("riscv.vector.tuple", <vscale x 2 x i8>, 4) @llvm.riscv.vlseg4.triscv.vector.tuple_nxv2i8_4t(target("riscv.vector.tuple", <vscale x 2 x i8>, 4), ptr, i32, i32) -declare target("riscv.vector.tuple", <vscale x 2 x i8>, 4) @llvm.riscv.vlseg4.mask.triscv.vector.tuple_nxv2i8_4t.nxv2i1(target("riscv.vector.tuple", <vscale x 2 x i8>, 4), ptr, <vscale x 2 x i1>, i32, i32, i32) - -define <vscale x 2 x i8> @test_vlseg4_nxv2i8_triscv.vector.tuple_nxv2i8_4t(ptr %base, i32 %vl) { +define target("riscv.vector.tuple", <vscale x 2 x i8>, 4) @test_vlseg4_nxv2i8_triscv.vector.tuple_nxv2i8_4t(ptr %base, i32 %vl) { ; CHECK-LABEL: test_vlseg4_nxv2i8_triscv.vector.tuple_nxv2i8_4t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, ma -; CHECK-NEXT: vlseg4e8.v v7, (a0) +; CHECK-NEXT: vlseg4e8.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 2 x i8>, 4) @llvm.riscv.vlseg4.triscv.vector.tuple_nxv2i8_4t(target("riscv.vector.tuple", <vscale x 2 x i8>, 4) undef, ptr %base, i32 %vl, i32 3) - %1 = call <vscale x 2 x i8> @llvm.riscv.tuple.extract.nxv2i8.triscv.vector.tuple_nxv2i8_4t(target("riscv.vector.tuple", <vscale x 2 x i8>, 4) %0, i32 1) - ret <vscale x 2 x i8> %1 + ret target("riscv.vector.tuple", <vscale x 2 x i8>, 4) %0 } - -define <vscale x 2 x i8> @test_vlseg4_mask_nxv2i8_triscv.vector.tuple_nxv2i8_4t(ptr %base, i32 %vl, <vscale x 2 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 2 x i8>, 4) @test_vlseg4_mask_nxv2i8_triscv.vector.tuple_nxv2i8_4t(ptr %base, i32 %vl, <vscale x 2 x i1> %mask) { ; CHECK-LABEL: test_vlseg4_mask_nxv2i8_triscv.vector.tuple_nxv2i8_4t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, ma -; CHECK-NEXT: vlseg4e8.v v7, (a0), v0.t +; CHECK-NEXT: vlseg4e8.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 2 x i8>, 4) @llvm.riscv.vlseg4.mask.triscv.vector.tuple_nxv2i8_4t.nxv2i1(target("riscv.vector.tuple", <vscale x 2 x i8>, 4) undef, ptr %base, <vscale x 2 x i1> %mask, i32 %vl, i32 1, i32 3) - %1 = call <vscale x 2 x i8> @llvm.riscv.tuple.extract.nxv2i8.triscv.vector.tuple_nxv2i8_4t(target("riscv.vector.tuple", <vscale x 2 x i8>, 4) %0, i32 1) - ret <vscale x 2 x i8> %1 + ret target("riscv.vector.tuple", <vscale x 2 x i8>, 4) %0 } - -declare target("riscv.vector.tuple", <vscale x 4 x i8>, 4) @llvm.riscv.vlseg4.triscv.vector.tuple_nxv4i8_4t(target("riscv.vector.tuple", <vscale x 4 x i8>, 4), ptr, i32, i32) -declare target("riscv.vector.tuple", <vscale x 4 x i8>, 4) @llvm.riscv.vlseg4.mask.triscv.vector.tuple_nxv4i8_4t.nxv4i1(target("riscv.vector.tuple", <vscale x 4 x i8>, 4), ptr, <vscale x 4 x i1>, i32, i32, i32) - -define <vscale x 4 x i8> @test_vlseg4_nxv4i8_triscv.vector.tuple_nxv4i8_4t(ptr %base, i32 %vl) { +define target("riscv.vector.tuple", <vscale x 4 x i8>, 4) @test_vlseg4_nxv4i8_triscv.vector.tuple_nxv4i8_4t(ptr %base, i32 %vl) { ; CHECK-LABEL: test_vlseg4_nxv4i8_triscv.vector.tuple_nxv4i8_4t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, ma -; CHECK-NEXT: vlseg4e8.v v7, (a0) +; CHECK-NEXT: vlseg4e8.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 4) @llvm.riscv.vlseg4.triscv.vector.tuple_nxv4i8_4t(target("riscv.vector.tuple", <vscale x 4 x i8>, 4) undef, ptr %base, i32 %vl, i32 3) - %1 = call <vscale x 4 x i8> @llvm.riscv.tuple.extract.nxv4i8.triscv.vector.tuple_nxv4i8_4t(target("riscv.vector.tuple", <vscale x 4 x i8>, 4) %0, i32 1) - ret <vscale x 4 x i8> %1 + ret target("riscv.vector.tuple", <vscale x 4 x i8>, 4) %0 } - -define <vscale x 4 x i8> @test_vlseg4_mask_nxv4i8_triscv.vector.tuple_nxv4i8_4t(ptr %base, i32 %vl, <vscale x 4 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 4 x i8>, 4) @test_vlseg4_mask_nxv4i8_triscv.vector.tuple_nxv4i8_4t(ptr %base, i32 %vl, <vscale x 4 x i1> %mask) { ; CHECK-LABEL: test_vlseg4_mask_nxv4i8_triscv.vector.tuple_nxv4i8_4t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, ma -; CHECK-NEXT: vlseg4e8.v v7, (a0), v0.t +; CHECK-NEXT: vlseg4e8.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 4) @llvm.riscv.vlseg4.mask.triscv.vector.tuple_nxv4i8_4t.nxv4i1(target("riscv.vector.tuple", <vscale x 4 x i8>, 4) undef, ptr %base, <vscale x 4 x i1> %mask, i32 %vl, i32 1, i32 3) - %1 = call <vscale x 4 x i8> @llvm.riscv.tuple.extract.nxv4i8.triscv.vector.tuple_nxv4i8_4t(target("riscv.vector.tuple", <vscale x 4 x i8>, 4) %0, i32 1) - ret <vscale x 4 x i8> %1 + ret target("riscv.vector.tuple", <vscale x 4 x i8>, 4) %0 } - -declare target("riscv.vector.tuple", <vscale x 8 x i8>, 4) @llvm.riscv.vlseg4.triscv.vector.tuple_nxv8i8_4t(target("riscv.vector.tuple", <vscale x 8 x i8>, 4), ptr, i32, i32) -declare target("riscv.vector.tuple", <vscale x 8 x i8>, 4) @llvm.riscv.vlseg4.mask.triscv.vector.tuple_nxv8i8_4t.nxv8i1(target("riscv.vector.tuple", <vscale x 8 x i8>, 4), ptr, <vscale x 8 x i1>, i32, i32, i32) - -define <vscale x 8 x i8> @test_vlseg4_nxv8i8_triscv.vector.tuple_nxv8i8_4t(ptr %base, i32 %vl) { +define target("riscv.vector.tuple", <vscale x 8 x i8>, 4) @test_vlseg4_nxv8i8_triscv.vector.tuple_nxv8i8_4t(ptr %base, i32 %vl) { ; CHECK-LABEL: test_vlseg4_nxv8i8_triscv.vector.tuple_nxv8i8_4t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, ma -; CHECK-NEXT: vlseg4e8.v v7, (a0) +; CHECK-NEXT: vlseg4e8.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 4) @llvm.riscv.vlseg4.triscv.vector.tuple_nxv8i8_4t(target("riscv.vector.tuple", <vscale x 8 x i8>, 4) undef, ptr %base, i32 %vl, i32 3) - %1 = call <vscale x 8 x i8> @llvm.riscv.tuple.extract.nxv8i8.triscv.vector.tuple_nxv8i8_4t(target("riscv.vector.tuple", <vscale x 8 x i8>, 4) %0, i32 1) - ret <vscale x 8 x i8> %1 + ret target("riscv.vector.tuple", <vscale x 8 x i8>, 4) %0 } - -define <vscale x 8 x i8> @test_vlseg4_mask_nxv8i8_triscv.vector.tuple_nxv8i8_4t(ptr %base, i32 %vl, <vscale x 8 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 8 x i8>, 4) @test_vlseg4_mask_nxv8i8_triscv.vector.tuple_nxv8i8_4t(ptr %base, i32 %vl, <vscale x 8 x i1> %mask) { ; CHECK-LABEL: test_vlseg4_mask_nxv8i8_triscv.vector.tuple_nxv8i8_4t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, ma -; CHECK-NEXT: vlseg4e8.v v7, (a0), v0.t +; CHECK-NEXT: vlseg4e8.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 4) @llvm.riscv.vlseg4.mask.triscv.vector.tuple_nxv8i8_4t.nxv8i1(target("riscv.vector.tuple", <vscale x 8 x i8>, 4) undef, ptr %base, <vscale x 8 x i1> %mask, i32 %vl, i32 1, i32 3) - %1 = call <vscale x 8 x i8> @llvm.riscv.tuple.extract.nxv8i8.triscv.vector.tuple_nxv8i8_4t(target("riscv.vector.tuple", <vscale x 8 x i8>, 4) %0, i32 1) - ret <vscale x 8 x i8> %1 + ret target("riscv.vector.tuple", <vscale x 8 x i8>, 4) %0 } - -declare target("riscv.vector.tuple", <vscale x 16 x i8>, 4) @llvm.riscv.vlseg4.triscv.vector.tuple_nxv16i8_4t(target("riscv.vector.tuple", <vscale x 16 x i8>, 4), ptr, i32, i32) -declare target("riscv.vector.tuple", <vscale x 16 x i8>, 4) @llvm.riscv.vlseg4.mask.triscv.vector.tuple_nxv16i8_4t.nxv16i1(target("riscv.vector.tuple", <vscale x 16 x i8>, 4), ptr, <vscale x 16 x i1>, i32, i32, i32) - -define <vscale x 16 x i8> @test_vlseg4_nxv16i8_triscv.vector.tuple_nxv16i8_4t(ptr %base, i32 %vl) { +define target("riscv.vector.tuple", <vscale x 16 x i8>, 4) @test_vlseg4_nxv16i8_triscv.vector.tuple_nxv16i8_4t(ptr %base, i32 %vl) { ; CHECK-LABEL: test_vlseg4_nxv16i8_triscv.vector.tuple_nxv16i8_4t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e8, m2, ta, ma -; CHECK-NEXT: vlseg4e8.v v6, (a0) +; CHECK-NEXT: vlseg4e8.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 4) @llvm.riscv.vlseg4.triscv.vector.tuple_nxv16i8_4t(target("riscv.vector.tuple", <vscale x 16 x i8>, 4) undef, ptr %base, i32 %vl, i32 3) - %1 = call <vscale x 16 x i8> @llvm.riscv.tuple.extract.nxv16i8.triscv.vector.tuple_nxv16i8_4t(target("riscv.vector.tuple", <vscale x 16 x i8>, 4) %0, i32 1) - ret <vscale x 16 x i8> %1 + ret target("riscv.vector.tuple", <vscale x 16 x i8>, 4) %0 } - -define <vscale x 16 x i8> @test_vlseg4_mask_nxv16i8_triscv.vector.tuple_nxv16i8_4t(ptr %base, i32 %vl, <vscale x 16 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 16 x i8>, 4) @test_vlseg4_mask_nxv16i8_triscv.vector.tuple_nxv16i8_4t(ptr %base, i32 %vl, <vscale x 16 x i1> %mask) { ; CHECK-LABEL: test_vlseg4_mask_nxv16i8_triscv.vector.tuple_nxv16i8_4t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e8, m2, ta, ma -; CHECK-NEXT: vlseg4e8.v v6, (a0), v0.t +; CHECK-NEXT: vlseg4e8.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 4) @llvm.riscv.vlseg4.mask.triscv.vector.tuple_nxv16i8_4t.nxv16i1(target("riscv.vector.tuple", <vscale x 16 x i8>, 4) undef, ptr %base, <vscale x 16 x i1> %mask, i32 %vl, i32 1, i32 3) - %1 = call <vscale x 16 x i8> @llvm.riscv.tuple.extract.nxv16i8.triscv.vector.tuple_nxv16i8_4t(target("riscv.vector.tuple", <vscale x 16 x i8>, 4) %0, i32 1) - ret <vscale x 16 x i8> %1 + ret target("riscv.vector.tuple", <vscale x 16 x i8>, 4) %0 } - -declare target("riscv.vector.tuple", <vscale x 1 x i8>, 5) @llvm.riscv.vlseg5.triscv.vector.tuple_nxv1i8_5t(target("riscv.vector.tuple", <vscale x 1 x i8>, 5), ptr, i32, i32) -declare target("riscv.vector.tuple", <vscale x 1 x i8>, 5) @llvm.riscv.vlseg5.mask.triscv.vector.tuple_nxv1i8_5t.nxv1i1(target("riscv.vector.tuple", <vscale x 1 x i8>, 5), ptr, <vscale x 1 x i1>, i32, i32, i32) - -define <vscale x 1 x i8> @test_vlseg5_nxv1i8_triscv.vector.tuple_nxv1i8_5t(ptr %base, i32 %vl) { +define target("riscv.vector.tuple", <vscale x 1 x i8>, 5) @test_vlseg5_nxv1i8_triscv.vector.tuple_nxv1i8_5t(ptr %base, i32 %vl) { ; CHECK-LABEL: test_vlseg5_nxv1i8_triscv.vector.tuple_nxv1i8_5t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, ma -; CHECK-NEXT: vlseg5e8.v v7, (a0) +; CHECK-NEXT: vlseg5e8.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 1 x i8>, 5) @llvm.riscv.vlseg5.triscv.vector.tuple_nxv1i8_5t(target("riscv.vector.tuple", <vscale x 1 x i8>, 5) undef, ptr %base, i32 %vl, i32 3) - %1 = call <vscale x 1 x i8> @llvm.riscv.tuple.extract.nxv1i8.triscv.vector.tuple_nxv1i8_5t(target("riscv.vector.tuple", <vscale x 1 x i8>, 5) %0, i32 1) - ret <vscale x 1 x i8> %1 + ret target("riscv.vector.tuple", <vscale x 1 x i8>, 5) %0 } - -define <vscale x 1 x i8> @test_vlseg5_mask_nxv1i8_triscv.vector.tuple_nxv1i8_5t(ptr %base, i32 %vl, <vscale x 1 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 1 x i8>, 5) @test_vlseg5_mask_nxv1i8_triscv.vector.tuple_nxv1i8_5t(ptr %base, i32 %vl, <vscale x 1 x i1> %mask) { ; CHECK-LABEL: test_vlseg5_mask_nxv1i8_triscv.vector.tuple_nxv1i8_5t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, ma -; CHECK-NEXT: vlseg5e8.v v7, (a0), v0.t +; CHECK-NEXT: vlseg5e8.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 1 x i8>, 5) @llvm.riscv.vlseg5.mask.triscv.vector.tuple_nxv1i8_5t.nxv1i1(target("riscv.vector.tuple", <vscale x 1 x i8>, 5) undef, ptr %base, <vscale x 1 x i1> %mask, i32 %vl, i32 1, i32 3) - %1 = call <vscale x 1 x i8> @llvm.riscv.tuple.extract.nxv1i8.triscv.vector.tuple_nxv1i8_5t(target("riscv.vector.tuple", <vscale x 1 x i8>, 5) %0, i32 1) - ret <vscale x 1 x i8> %1 + ret target("riscv.vector.tuple", <vscale x 1 x i8>, 5) %0 } - -declare target("riscv.vector.tuple", <vscale x 2 x i8>, 5) @llvm.riscv.vlseg5.triscv.vector.tuple_nxv2i8_5t(target("riscv.vector.tuple", <vscale x 2 x i8>, 5), ptr, i32, i32) -declare target("riscv.vector.tuple", <vscale x 2 x i8>, 5) @llvm.riscv.vlseg5.mask.triscv.vector.tuple_nxv2i8_5t.nxv2i1(target("riscv.vector.tuple", <vscale x 2 x i8>, 5), ptr, <vscale x 2 x i1>, i32, i32, i32) - -define <vscale x 2 x i8> @test_vlseg5_nxv2i8_triscv.vector.tuple_nxv2i8_5t(ptr %base, i32 %vl) { +define target("riscv.vector.tuple", <vscale x 2 x i8>, 5) @test_vlseg5_nxv2i8_triscv.vector.tuple_nxv2i8_5t(ptr %base, i32 %vl) { ; CHECK-LABEL: test_vlseg5_nxv2i8_triscv.vector.tuple_nxv2i8_5t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, ma -; CHECK-NEXT: vlseg5e8.v v7, (a0) +; CHECK-NEXT: vlseg5e8.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 2 x i8>, 5) @llvm.riscv.vlseg5.triscv.vector.tuple_nxv2i8_5t(target("riscv.vector.tuple", <vscale x 2 x i8>, 5) undef, ptr %base, i32 %vl, i32 3) - %1 = call <vscale x 2 x i8> @llvm.riscv.tuple.extract.nxv2i8.triscv.vector.tuple_nxv2i8_5t(target("riscv.vector.tuple", <vscale x 2 x i8>, 5) %0, i32 1) - ret <vscale x 2 x i8> %1 + ret target("riscv.vector.tuple", <vscale x 2 x i8>, 5) %0 } - -define <vscale x 2 x i8> @test_vlseg5_mask_nxv2i8_triscv.vector.tuple_nxv2i8_5t(ptr %base, i32 %vl, <vscale x 2 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 2 x i8>, 5) @test_vlseg5_mask_nxv2i8_triscv.vector.tuple_nxv2i8_5t(ptr %base, i32 %vl, <vscale x 2 x i1> %mask) { ; CHECK-LABEL: test_vlseg5_mask_nxv2i8_triscv.vector.tuple_nxv2i8_5t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, ma -; CHECK-NEXT: vlseg5e8.v v7, (a0), v0.t +; CHECK-NEXT: vlseg5e8.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 2 x i8>, 5) @llvm.riscv.vlseg5.mask.triscv.vector.tuple_nxv2i8_5t.nxv2i1(target("riscv.vector.tuple", <vscale x 2 x i8>, 5) undef, ptr %base, <vscale x 2 x i1> %mask, i32 %vl, i32 1, i32 3) - %1 = call <vscale x 2 x i8> @llvm.riscv.tuple.extract.nxv2i8.triscv.vector.tuple_nxv2i8_5t(target("riscv.vector.tuple", <vscale x 2 x i8>, 5) %0, i32 1) - ret <vscale x 2 x i8> %1 + ret target("riscv.vector.tuple", <vscale x 2 x i8>, 5) %0 } - -declare target("riscv.vector.tuple", <vscale x 4 x i8>, 5) @llvm.riscv.vlseg5.triscv.vector.tuple_nxv4i8_5t(target("riscv.vector.tuple", <vscale x 4 x i8>, 5), ptr, i32, i32) -declare target("riscv.vector.tuple", <vscale x 4 x i8>, 5) @llvm.riscv.vlseg5.mask.triscv.vector.tuple_nxv4i8_5t.nxv4i1(target("riscv.vector.tuple", <vscale x 4 x i8>, 5), ptr, <vscale x 4 x i1>, i32, i32, i32) - -define <vscale x 4 x i8> @test_vlseg5_nxv4i8_triscv.vector.tuple_nxv4i8_5t(ptr %base, i32 %vl) { +define target("riscv.vector.tuple", <vscale x 4 x i8>, 5) @test_vlseg5_nxv4i8_triscv.vector.tuple_nxv4i8_5t(ptr %base, i32 %vl) { ; CHECK-LABEL: test_vlseg5_nxv4i8_triscv.vector.tuple_nxv4i8_5t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, ma -; CHECK-NEXT: vlseg5e8.v v7, (a0) +; CHECK-NEXT: vlseg5e8.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 5) @llvm.riscv.vlseg5.triscv.vector.tuple_nxv4i8_5t(target("riscv.vector.tuple", <vscale x 4 x i8>, 5) undef, ptr %base, i32 %vl, i32 3) - %1 = call <vscale x 4 x i8> @llvm.riscv.tuple.extract.nxv4i8.triscv.vector.tuple_nxv4i8_5t(target("riscv.vector.tuple", <vscale x 4 x i8>, 5) %0, i32 1) - ret <vscale x 4 x i8> %1 + ret target("riscv.vector.tuple", <vscale x 4 x i8>, 5) %0 } - -define <vscale x 4 x i8> @test_vlseg5_mask_nxv4i8_triscv.vector.tuple_nxv4i8_5t(ptr %base, i32 %vl, <vscale x 4 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 4 x i8>, 5) @test_vlseg5_mask_nxv4i8_triscv.vector.tuple_nxv4i8_5t(ptr %base, i32 %vl, <vscale x 4 x i1> %mask) { ; CHECK-LABEL: test_vlseg5_mask_nxv4i8_triscv.vector.tuple_nxv4i8_5t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, ma -; CHECK-NEXT: vlseg5e8.v v7, (a0), v0.t +; CHECK-NEXT: vlseg5e8.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 5) @llvm.riscv.vlseg5.mask.triscv.vector.tuple_nxv4i8_5t.nxv4i1(target("riscv.vector.tuple", <vscale x 4 x i8>, 5) undef, ptr %base, <vscale x 4 x i1> %mask, i32 %vl, i32 1, i32 3) - %1 = call <vscale x 4 x i8> @llvm.riscv.tuple.extract.nxv4i8.triscv.vector.tuple_nxv4i8_5t(target("riscv.vector.tuple", <vscale x 4 x i8>, 5) %0, i32 1) - ret <vscale x 4 x i8> %1 + ret target("riscv.vector.tuple", <vscale x 4 x i8>, 5) %0 } - -declare target("riscv.vector.tuple", <vscale x 8 x i8>, 5) @llvm.riscv.vlseg5.triscv.vector.tuple_nxv8i8_5t(target("riscv.vector.tuple", <vscale x 8 x i8>, 5), ptr, i32, i32) -declare target("riscv.vector.tuple", <vscale x 8 x i8>, 5) @llvm.riscv.vlseg5.mask.triscv.vector.tuple_nxv8i8_5t.nxv8i1(target("riscv.vector.tuple", <vscale x 8 x i8>, 5), ptr, <vscale x 8 x i1>, i32, i32, i32) - -define <vscale x 8 x i8> @test_vlseg5_nxv8i8_triscv.vector.tuple_nxv8i8_5t(ptr %base, i32 %vl) { +define target("riscv.vector.tuple", <vscale x 8 x i8>, 5) @test_vlseg5_nxv8i8_triscv.vector.tuple_nxv8i8_5t(ptr %base, i32 %vl) { ; CHECK-LABEL: test_vlseg5_nxv8i8_triscv.vector.tuple_nxv8i8_5t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, ma -; CHECK-NEXT: vlseg5e8.v v7, (a0) +; CHECK-NEXT: vlseg5e8.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 5) @llvm.riscv.vlseg5.triscv.vector.tuple_nxv8i8_5t(target("riscv.vector.tuple", <vscale x 8 x i8>, 5) undef, ptr %base, i32 %vl, i32 3) - %1 = call <vscale x 8 x i8> @llvm.riscv.tuple.extract.nxv8i8.triscv.vector.tuple_nxv8i8_5t(target("riscv.vector.tuple", <vscale x 8 x i8>, 5) %0, i32 1) - ret <vscale x 8 x i8> %1 + ret target("riscv.vector.tuple", <vscale x 8 x i8>, 5) %0 } - -define <vscale x 8 x i8> @test_vlseg5_mask_nxv8i8_triscv.vector.tuple_nxv8i8_5t(ptr %base, i32 %vl, <vscale x 8 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 8 x i8>, 5) @test_vlseg5_mask_nxv8i8_triscv.vector.tuple_nxv8i8_5t(ptr %base, i32 %vl, <vscale x 8 x i1> %mask) { ; CHECK-LABEL: test_vlseg5_mask_nxv8i8_triscv.vector.tuple_nxv8i8_5t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, ma -; CHECK-NEXT: vlseg5e8.v v7, (a0), v0.t +; CHECK-NEXT: vlseg5e8.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 5) @llvm.riscv.vlseg5.mask.triscv.vector.tuple_nxv8i8_5t.nxv8i1(target("riscv.vector.tuple", <vscale x 8 x i8>, 5) undef, ptr %base, <vscale x 8 x i1> %mask, i32 %vl, i32 1, i32 3) - %1 = call <vscale x 8 x i8> @llvm.riscv.tuple.extract.nxv8i8.triscv.vector.tuple_nxv8i8_5t(target("riscv.vector.tuple", <vscale x 8 x i8>, 5) %0, i32 1) - ret <vscale x 8 x i8> %1 + ret target("riscv.vector.tuple", <vscale x 8 x i8>, 5) %0 } - -declare target("riscv.vector.tuple", <vscale x 1 x i8>, 6) @llvm.riscv.vlseg6.triscv.vector.tuple_nxv1i8_6t(target("riscv.vector.tuple", <vscale x 1 x i8>, 6), ptr, i32, i32) -declare target("riscv.vector.tuple", <vscale x 1 x i8>, 6) @llvm.riscv.vlseg6.mask.triscv.vector.tuple_nxv1i8_6t.nxv1i1(target("riscv.vector.tuple", <vscale x 1 x i8>, 6), ptr, <vscale x 1 x i1>, i32, i32, i32) - -define <vscale x 1 x i8> @test_vlseg6_nxv1i8_triscv.vector.tuple_nxv1i8_6t(ptr %base, i32 %vl) { +define target("riscv.vector.tuple", <vscale x 1 x i8>, 6) @test_vlseg6_nxv1i8_triscv.vector.tuple_nxv1i8_6t(ptr %base, i32 %vl) { ; CHECK-LABEL: test_vlseg6_nxv1i8_triscv.vector.tuple_nxv1i8_6t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, ma -; CHECK-NEXT: vlseg6e8.v v7, (a0) +; CHECK-NEXT: vlseg6e8.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 1 x i8>, 6) @llvm.riscv.vlseg6.triscv.vector.tuple_nxv1i8_6t(target("riscv.vector.tuple", <vscale x 1 x i8>, 6) undef, ptr %base, i32 %vl, i32 3) - %1 = call <vscale x 1 x i8> @llvm.riscv.tuple.extract.nxv1i8.triscv.vector.tuple_nxv1i8_6t(target("riscv.vector.tuple", <vscale x 1 x i8>, 6) %0, i32 1) - ret <vscale x 1 x i8> %1 + ret target("riscv.vector.tuple", <vscale x 1 x i8>, 6) %0 } - -define <vscale x 1 x i8> @test_vlseg6_mask_nxv1i8_triscv.vector.tuple_nxv1i8_6t(ptr %base, i32 %vl, <vscale x 1 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 1 x i8>, 6) @test_vlseg6_mask_nxv1i8_triscv.vector.tuple_nxv1i8_6t(ptr %base, i32 %vl, <vscale x 1 x i1> %mask) { ; CHECK-LABEL: test_vlseg6_mask_nxv1i8_triscv.vector.tuple_nxv1i8_6t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, ma -; CHECK-NEXT: vlseg6e8.v v7, (a0), v0.t +; CHECK-NEXT: vlseg6e8.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 1 x i8>, 6) @llvm.riscv.vlseg6.mask.triscv.vector.tuple_nxv1i8_6t.nxv1i1(target("riscv.vector.tuple", <vscale x 1 x i8>, 6) undef, ptr %base, <vscale x 1 x i1> %mask, i32 %vl, i32 1, i32 3) - %1 = call <vscale x 1 x i8> @llvm.riscv.tuple.extract.nxv1i8.triscv.vector.tuple_nxv1i8_6t(target("riscv.vector.tuple", <vscale x 1 x i8>, 6) %0, i32 1) - ret <vscale x 1 x i8> %1 + ret target("riscv.vector.tuple", <vscale x 1 x i8>, 6) %0 } - -declare target("riscv.vector.tuple", <vscale x 2 x i8>, 6) @llvm.riscv.vlseg6.triscv.vector.tuple_nxv2i8_6t(target("riscv.vector.tuple", <vscale x 2 x i8>, 6), ptr, i32, i32) -declare target("riscv.vector.tuple", <vscale x 2 x i8>, 6) @llvm.riscv.vlseg6.mask.triscv.vector.tuple_nxv2i8_6t.nxv2i1(target("riscv.vector.tuple", <vscale x 2 x i8>, 6), ptr, <vscale x 2 x i1>, i32, i32, i32) - -define <vscale x 2 x i8> @test_vlseg6_nxv2i8_triscv.vector.tuple_nxv2i8_6t(ptr %base, i32 %vl) { +define target("riscv.vector.tuple", <vscale x 2 x i8>, 6) @test_vlseg6_nxv2i8_triscv.vector.tuple_nxv2i8_6t(ptr %base, i32 %vl) { ; CHECK-LABEL: test_vlseg6_nxv2i8_triscv.vector.tuple_nxv2i8_6t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, ma -; CHECK-NEXT: vlseg6e8.v v7, (a0) +; CHECK-NEXT: vlseg6e8.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 2 x i8>, 6) @llvm.riscv.vlseg6.triscv.vector.tuple_nxv2i8_6t(target("riscv.vector.tuple", <vscale x 2 x i8>, 6) undef, ptr %base, i32 %vl, i32 3) - %1 = call <vscale x 2 x i8> @llvm.riscv.tuple.extract.nxv2i8.triscv.vector.tuple_nxv2i8_6t(target("riscv.vector.tuple", <vscale x 2 x i8>, 6) %0, i32 1) - ret <vscale x 2 x i8> %1 + ret target("riscv.vector.tuple", <vscale x 2 x i8>, 6) %0 } - -define <vscale x 2 x i8> @test_vlseg6_mask_nxv2i8_triscv.vector.tuple_nxv2i8_6t(ptr %base, i32 %vl, <vscale x 2 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 2 x i8>, 6) @test_vlseg6_mask_nxv2i8_triscv.vector.tuple_nxv2i8_6t(ptr %base, i32 %vl, <vscale x 2 x i1> %mask) { ; CHECK-LABEL: test_vlseg6_mask_nxv2i8_triscv.vector.tuple_nxv2i8_6t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, ma -; CHECK-NEXT: vlseg6e8.v v7, (a0), v0.t +; CHECK-NEXT: vlseg6e8.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 2 x i8>, 6) @llvm.riscv.vlseg6.mask.triscv.vector.tuple_nxv2i8_6t.nxv2i1(target("riscv.vector.tuple", <vscale x 2 x i8>, 6) undef, ptr %base, <vscale x 2 x i1> %mask, i32 %vl, i32 1, i32 3) - %1 = call <vscale x 2 x i8> @llvm.riscv.tuple.extract.nxv2i8.triscv.vector.tuple_nxv2i8_6t(target("riscv.vector.tuple", <vscale x 2 x i8>, 6) %0, i32 1) - ret <vscale x 2 x i8> %1 + ret target("riscv.vector.tuple", <vscale x 2 x i8>, 6) %0 } - -declare target("riscv.vector.tuple", <vscale x 4 x i8>, 6) @llvm.riscv.vlseg6.triscv.vector.tuple_nxv4i8_6t(target("riscv.vector.tuple", <vscale x 4 x i8>, 6), ptr, i32, i32) -declare target("riscv.vector.tuple", <vscale x 4 x i8>, 6) @llvm.riscv.vlseg6.mask.triscv.vector.tuple_nxv4i8_6t.nxv4i1(target("riscv.vector.tuple", <vscale x 4 x i8>, 6), ptr, <vscale x 4 x i1>, i32, i32, i32) - -define <vscale x 4 x i8> @test_vlseg6_nxv4i8_triscv.vector.tuple_nxv4i8_6t(ptr %base, i32 %vl) { +define target("riscv.vector.tuple", <vscale x 4 x i8>, 6) @test_vlseg6_nxv4i8_triscv.vector.tuple_nxv4i8_6t(ptr %base, i32 %vl) { ; CHECK-LABEL: test_vlseg6_nxv4i8_triscv.vector.tuple_nxv4i8_6t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, ma -; CHECK-NEXT: vlseg6e8.v v7, (a0) +; CHECK-NEXT: vlseg6e8.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 6) @llvm.riscv.vlseg6.triscv.vector.tuple_nxv4i8_6t(target("riscv.vector.tuple", <vscale x 4 x i8>, 6) undef, ptr %base, i32 %vl, i32 3) - %1 = call <vscale x 4 x i8> @llvm.riscv.tuple.extract.nxv4i8.triscv.vector.tuple_nxv4i8_6t(target("riscv.vector.tuple", <vscale x 4 x i8>, 6) %0, i32 1) - ret <vscale x 4 x i8> %1 + ret target("riscv.vector.tuple", <vscale x 4 x i8>, 6) %0 } - -define <vscale x 4 x i8> @test_vlseg6_mask_nxv4i8_triscv.vector.tuple_nxv4i8_6t(ptr %base, i32 %vl, <vscale x 4 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 4 x i8>, 6) @test_vlseg6_mask_nxv4i8_triscv.vector.tuple_nxv4i8_6t(ptr %base, i32 %vl, <vscale x 4 x i1> %mask) { ; CHECK-LABEL: test_vlseg6_mask_nxv4i8_triscv.vector.tuple_nxv4i8_6t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, ma -; CHECK-NEXT: vlseg6e8.v v7, (a0), v0.t +; CHECK-NEXT: vlseg6e8.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 6) @llvm.riscv.vlseg6.mask.triscv.vector.tuple_nxv4i8_6t.nxv4i1(target("riscv.vector.tuple", <vscale x 4 x i8>, 6) undef, ptr %base, <vscale x 4 x i1> %mask, i32 %vl, i32 1, i32 3) - %1 = call <vscale x 4 x i8> @llvm.riscv.tuple.extract.nxv4i8.triscv.vector.tuple_nxv4i8_6t(target("riscv.vector.tuple", <vscale x 4 x i8>, 6) %0, i32 1) - ret <vscale x 4 x i8> %1 + ret target("riscv.vector.tuple", <vscale x 4 x i8>, 6) %0 } - -declare target("riscv.vector.tuple", <vscale x 8 x i8>, 6) @llvm.riscv.vlseg6.triscv.vector.tuple_nxv8i8_6t(target("riscv.vector.tuple", <vscale x 8 x i8>, 6), ptr, i32, i32) -declare target("riscv.vector.tuple", <vscale x 8 x i8>, 6) @llvm.riscv.vlseg6.mask.triscv.vector.tuple_nxv8i8_6t.nxv8i1(target("riscv.vector.tuple", <vscale x 8 x i8>, 6), ptr, <vscale x 8 x i1>, i32, i32, i32) - -define <vscale x 8 x i8> @test_vlseg6_nxv8i8_triscv.vector.tuple_nxv8i8_6t(ptr %base, i32 %vl) { +define target("riscv.vector.tuple", <vscale x 8 x i8>, 6) @test_vlseg6_nxv8i8_triscv.vector.tuple_nxv8i8_6t(ptr %base, i32 %vl) { ; CHECK-LABEL: test_vlseg6_nxv8i8_triscv.vector.tuple_nxv8i8_6t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, ma -; CHECK-NEXT: vlseg6e8.v v7, (a0) +; CHECK-NEXT: vlseg6e8.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 6) @llvm.riscv.vlseg6.triscv.vector.tuple_nxv8i8_6t(target("riscv.vector.tuple", <vscale x 8 x i8>, 6) undef, ptr %base, i32 %vl, i32 3) - %1 = call <vscale x 8 x i8> @llvm.riscv.tuple.extract.nxv8i8.triscv.vector.tuple_nxv8i8_6t(target("riscv.vector.tuple", <vscale x 8 x i8>, 6) %0, i32 1) - ret <vscale x 8 x i8> %1 + ret target("riscv.vector.tuple", <vscale x 8 x i8>, 6) %0 } - -define <vscale x 8 x i8> @test_vlseg6_mask_nxv8i8_triscv.vector.tuple_nxv8i8_6t(ptr %base, i32 %vl, <vscale x 8 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 8 x i8>, 6) @test_vlseg6_mask_nxv8i8_triscv.vector.tuple_nxv8i8_6t(ptr %base, i32 %vl, <vscale x 8 x i1> %mask) { ; CHECK-LABEL: test_vlseg6_mask_nxv8i8_triscv.vector.tuple_nxv8i8_6t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, ma -; CHECK-NEXT: vlseg6e8.v v7, (a0), v0.t +; CHECK-NEXT: vlseg6e8.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 6) @llvm.riscv.vlseg6.mask.triscv.vector.tuple_nxv8i8_6t.nxv8i1(target("riscv.vector.tuple", <vscale x 8 x i8>, 6) undef, ptr %base, <vscale x 8 x i1> %mask, i32 %vl, i32 1, i32 3) - %1 = call <vscale x 8 x i8> @llvm.riscv.tuple.extract.nxv8i8.triscv.vector.tuple_nxv8i8_6t(target("riscv.vector.tuple", <vscale x 8 x i8>, 6) %0, i32 1) - ret <vscale x 8 x i8> %1 + ret target("riscv.vector.tuple", <vscale x 8 x i8>, 6) %0 } - -declare target("riscv.vector.tuple", <vscale x 1 x i8>, 7) @llvm.riscv.vlseg7.triscv.vector.tuple_nxv1i8_7t(target("riscv.vector.tuple", <vscale x 1 x i8>, 7), ptr, i32, i32) -declare target("riscv.vector.tuple", <vscale x 1 x i8>, 7) @llvm.riscv.vlseg7.mask.triscv.vector.tuple_nxv1i8_7t.nxv1i1(target("riscv.vector.tuple", <vscale x 1 x i8>, 7), ptr, <vscale x 1 x i1>, i32, i32, i32) - -define <vscale x 1 x i8> @test_vlseg7_nxv1i8_triscv.vector.tuple_nxv1i8_7t(ptr %base, i32 %vl) { +define target("riscv.vector.tuple", <vscale x 1 x i8>, 7) @test_vlseg7_nxv1i8_triscv.vector.tuple_nxv1i8_7t(ptr %base, i32 %vl) { ; CHECK-LABEL: test_vlseg7_nxv1i8_triscv.vector.tuple_nxv1i8_7t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, ma -; CHECK-NEXT: vlseg7e8.v v7, (a0) +; CHECK-NEXT: vlseg7e8.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 1 x i8>, 7) @llvm.riscv.vlseg7.triscv.vector.tuple_nxv1i8_7t(target("riscv.vector.tuple", <vscale x 1 x i8>, 7) undef, ptr %base, i32 %vl, i32 3) - %1 = call <vscale x 1 x i8> @llvm.riscv.tuple.extract.nxv1i8.triscv.vector.tuple_nxv1i8_7t(target("riscv.vector.tuple", <vscale x 1 x i8>, 7) %0, i32 1) - ret <vscale x 1 x i8> %1 + ret target("riscv.vector.tuple", <vscale x 1 x i8>, 7) %0 } - -define <vscale x 1 x i8> @test_vlseg7_mask_nxv1i8_triscv.vector.tuple_nxv1i8_7t(ptr %base, i32 %vl, <vscale x 1 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 1 x i8>, 7) @test_vlseg7_mask_nxv1i8_triscv.vector.tuple_nxv1i8_7t(ptr %base, i32 %vl, <vscale x 1 x i1> %mask) { ; CHECK-LABEL: test_vlseg7_mask_nxv1i8_triscv.vector.tuple_nxv1i8_7t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, ma -; CHECK-NEXT: vlseg7e8.v v7, (a0), v0.t +; CHECK-NEXT: vlseg7e8.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 1 x i8>, 7) @llvm.riscv.vlseg7.mask.triscv.vector.tuple_nxv1i8_7t.nxv1i1(target("riscv.vector.tuple", <vscale x 1 x i8>, 7) undef, ptr %base, <vscale x 1 x i1> %mask, i32 %vl, i32 1, i32 3) - %1 = call <vscale x 1 x i8> @llvm.riscv.tuple.extract.nxv1i8.triscv.vector.tuple_nxv1i8_7t(target("riscv.vector.tuple", <vscale x 1 x i8>, 7) %0, i32 1) - ret <vscale x 1 x i8> %1 + ret target("riscv.vector.tuple", <vscale x 1 x i8>, 7) %0 } - -declare target("riscv.vector.tuple", <vscale x 2 x i8>, 7) @llvm.riscv.vlseg7.triscv.vector.tuple_nxv2i8_7t(target("riscv.vector.tuple", <vscale x 2 x i8>, 7), ptr, i32, i32) -declare target("riscv.vector.tuple", <vscale x 2 x i8>, 7) @llvm.riscv.vlseg7.mask.triscv.vector.tuple_nxv2i8_7t.nxv2i1(target("riscv.vector.tuple", <vscale x 2 x i8>, 7), ptr, <vscale x 2 x i1>, i32, i32, i32) - -define <vscale x 2 x i8> @test_vlseg7_nxv2i8_triscv.vector.tuple_nxv2i8_7t(ptr %base, i32 %vl) { +define target("riscv.vector.tuple", <vscale x 2 x i8>, 7) @test_vlseg7_nxv2i8_triscv.vector.tuple_nxv2i8_7t(ptr %base, i32 %vl) { ; CHECK-LABEL: test_vlseg7_nxv2i8_triscv.vector.tuple_nxv2i8_7t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, ma -; CHECK-NEXT: vlseg7e8.v v7, (a0) +; CHECK-NEXT: vlseg7e8.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 2 x i8>, 7) @llvm.riscv.vlseg7.triscv.vector.tuple_nxv2i8_7t(target("riscv.vector.tuple", <vscale x 2 x i8>, 7) undef, ptr %base, i32 %vl, i32 3) - %1 = call <vscale x 2 x i8> @llvm.riscv.tuple.extract.nxv2i8.triscv.vector.tuple_nxv2i8_7t(target("riscv.vector.tuple", <vscale x 2 x i8>, 7) %0, i32 1) - ret <vscale x 2 x i8> %1 + ret target("riscv.vector.tuple", <vscale x 2 x i8>, 7) %0 } - -define <vscale x 2 x i8> @test_vlseg7_mask_nxv2i8_triscv.vector.tuple_nxv2i8_7t(ptr %base, i32 %vl, <vscale x 2 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 2 x i8>, 7) @test_vlseg7_mask_nxv2i8_triscv.vector.tuple_nxv2i8_7t(ptr %base, i32 %vl, <vscale x 2 x i1> %mask) { ; CHECK-LABEL: test_vlseg7_mask_nxv2i8_triscv.vector.tuple_nxv2i8_7t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, ma -; CHECK-NEXT: vlseg7e8.v v7, (a0), v0.t +; CHECK-NEXT: vlseg7e8.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 2 x i8>, 7) @llvm.riscv.vlseg7.mask.triscv.vector.tuple_nxv2i8_7t.nxv2i1(target("riscv.vector.tuple", <vscale x 2 x i8>, 7) undef, ptr %base, <vscale x 2 x i1> %mask, i32 %vl, i32 1, i32 3) - %1 = call <vscale x 2 x i8> @llvm.riscv.tuple.extract.nxv2i8.triscv.vector.tuple_nxv2i8_7t(target("riscv.vector.tuple", <vscale x 2 x i8>, 7) %0, i32 1) - ret <vscale x 2 x i8> %1 + ret target("riscv.vector.tuple", <vscale x 2 x i8>, 7) %0 } - -declare target("riscv.vector.tuple", <vscale x 4 x i8>, 7) @llvm.riscv.vlseg7.triscv.vector.tuple_nxv4i8_7t(target("riscv.vector.tuple", <vscale x 4 x i8>, 7), ptr, i32, i32) -declare target("riscv.vector.tuple", <vscale x 4 x i8>, 7) @llvm.riscv.vlseg7.mask.triscv.vector.tuple_nxv4i8_7t.nxv4i1(target("riscv.vector.tuple", <vscale x 4 x i8>, 7), ptr, <vscale x 4 x i1>, i32, i32, i32) - -define <vscale x 4 x i8> @test_vlseg7_nxv4i8_triscv.vector.tuple_nxv4i8_7t(ptr %base, i32 %vl) { +define target("riscv.vector.tuple", <vscale x 4 x i8>, 7) @test_vlseg7_nxv4i8_triscv.vector.tuple_nxv4i8_7t(ptr %base, i32 %vl) { ; CHECK-LABEL: test_vlseg7_nxv4i8_triscv.vector.tuple_nxv4i8_7t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, ma -; CHECK-NEXT: vlseg7e8.v v7, (a0) +; CHECK-NEXT: vlseg7e8.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 7) @llvm.riscv.vlseg7.triscv.vector.tuple_nxv4i8_7t(target("riscv.vector.tuple", <vscale x 4 x i8>, 7) undef, ptr %base, i32 %vl, i32 3) - %1 = call <vscale x 4 x i8> @llvm.riscv.tuple.extract.nxv4i8.triscv.vector.tuple_nxv4i8_7t(target("riscv.vector.tuple", <vscale x 4 x i8>, 7) %0, i32 1) - ret <vscale x 4 x i8> %1 + ret target("riscv.vector.tuple", <vscale x 4 x i8>, 7) %0 } - -define <vscale x 4 x i8> @test_vlseg7_mask_nxv4i8_triscv.vector.tuple_nxv4i8_7t(ptr %base, i32 %vl, <vscale x 4 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 4 x i8>, 7) @test_vlseg7_mask_nxv4i8_triscv.vector.tuple_nxv4i8_7t(ptr %base, i32 %vl, <vscale x 4 x i1> %mask) { ; CHECK-LABEL: test_vlseg7_mask_nxv4i8_triscv.vector.tuple_nxv4i8_7t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, ma -; CHECK-NEXT: vlseg7e8.v v7, (a0), v0.t +; CHECK-NEXT: vlseg7e8.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 7) @llvm.riscv.vlseg7.mask.triscv.vector.tuple_nxv4i8_7t.nxv4i1(target("riscv.vector.tuple", <vscale x 4 x i8>, 7) undef, ptr %base, <vscale x 4 x i1> %mask, i32 %vl, i32 1, i32 3) - %1 = call <vscale x 4 x i8> @llvm.riscv.tuple.extract.nxv4i8.triscv.vector.tuple_nxv4i8_7t(target("riscv.vector.tuple", <vscale x 4 x i8>, 7) %0, i32 1) - ret <vscale x 4 x i8> %1 + ret target("riscv.vector.tuple", <vscale x 4 x i8>, 7) %0 } - -declare target("riscv.vector.tuple", <vscale x 8 x i8>, 7) @llvm.riscv.vlseg7.triscv.vector.tuple_nxv8i8_7t(target("riscv.vector.tuple", <vscale x 8 x i8>, 7), ptr, i32, i32) -declare target("riscv.vector.tuple", <vscale x 8 x i8>, 7) @llvm.riscv.vlseg7.mask.triscv.vector.tuple_nxv8i8_7t.nxv8i1(target("riscv.vector.tuple", <vscale x 8 x i8>, 7), ptr, <vscale x 8 x i1>, i32, i32, i32) - -define <vscale x 8 x i8> @test_vlseg7_nxv8i8_triscv.vector.tuple_nxv8i8_7t(ptr %base, i32 %vl) { +define target("riscv.vector.tuple", <vscale x 8 x i8>, 7) @test_vlseg7_nxv8i8_triscv.vector.tuple_nxv8i8_7t(ptr %base, i32 %vl) { ; CHECK-LABEL: test_vlseg7_nxv8i8_triscv.vector.tuple_nxv8i8_7t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, ma -; CHECK-NEXT: vlseg7e8.v v7, (a0) +; CHECK-NEXT: vlseg7e8.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 7) @llvm.riscv.vlseg7.triscv.vector.tuple_nxv8i8_7t(target("riscv.vector.tuple", <vscale x 8 x i8>, 7) undef, ptr %base, i32 %vl, i32 3) - %1 = call <vscale x 8 x i8> @llvm.riscv.tuple.extract.nxv8i8.triscv.vector.tuple_nxv8i8_7t(target("riscv.vector.tuple", <vscale x 8 x i8>, 7) %0, i32 1) - ret <vscale x 8 x i8> %1 + ret target("riscv.vector.tuple", <vscale x 8 x i8>, 7) %0 } - -define <vscale x 8 x i8> @test_vlseg7_mask_nxv8i8_triscv.vector.tuple_nxv8i8_7t(ptr %base, i32 %vl, <vscale x 8 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 8 x i8>, 7) @test_vlseg7_mask_nxv8i8_triscv.vector.tuple_nxv8i8_7t(ptr %base, i32 %vl, <vscale x 8 x i1> %mask) { ; CHECK-LABEL: test_vlseg7_mask_nxv8i8_triscv.vector.tuple_nxv8i8_7t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, ma -; CHECK-NEXT: vlseg7e8.v v7, (a0), v0.t +; CHECK-NEXT: vlseg7e8.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 7) @llvm.riscv.vlseg7.mask.triscv.vector.tuple_nxv8i8_7t.nxv8i1(target("riscv.vector.tuple", <vscale x 8 x i8>, 7) undef, ptr %base, <vscale x 8 x i1> %mask, i32 %vl, i32 1, i32 3) - %1 = call <vscale x 8 x i8> @llvm.riscv.tuple.extract.nxv8i8.triscv.vector.tuple_nxv8i8_7t(target("riscv.vector.tuple", <vscale x 8 x i8>, 7) %0, i32 1) - ret <vscale x 8 x i8> %1 + ret target("riscv.vector.tuple", <vscale x 8 x i8>, 7) %0 } - -declare target("riscv.vector.tuple", <vscale x 1 x i8>, 8) @llvm.riscv.vlseg8.triscv.vector.tuple_nxv1i8_8t(target("riscv.vector.tuple", <vscale x 1 x i8>, 8), ptr, i32, i32) -declare target("riscv.vector.tuple", <vscale x 1 x i8>, 8) @llvm.riscv.vlseg8.mask.triscv.vector.tuple_nxv1i8_8t.nxv1i1(target("riscv.vector.tuple", <vscale x 1 x i8>, 8), ptr, <vscale x 1 x i1>, i32, i32, i32) - -define <vscale x 1 x i8> @test_vlseg8_nxv1i8_triscv.vector.tuple_nxv1i8_8t(ptr %base, i32 %vl) { +define target("riscv.vector.tuple", <vscale x 1 x i8>, 8) @test_vlseg8_nxv1i8_triscv.vector.tuple_nxv1i8_8t(ptr %base, i32 %vl) { ; CHECK-LABEL: test_vlseg8_nxv1i8_triscv.vector.tuple_nxv1i8_8t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, ma -; CHECK-NEXT: vlseg8e8.v v7, (a0) +; CHECK-NEXT: vlseg8e8.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 1 x i8>, 8) @llvm.riscv.vlseg8.triscv.vector.tuple_nxv1i8_8t(target("riscv.vector.tuple", <vscale x 1 x i8>, 8) undef, ptr %base, i32 %vl, i32 3) - %1 = call <vscale x 1 x i8> @llvm.riscv.tuple.extract.nxv1i8.triscv.vector.tuple_nxv1i8_8t(target("riscv.vector.tuple", <vscale x 1 x i8>, 8) %0, i32 1) - ret <vscale x 1 x i8> %1 + ret target("riscv.vector.tuple", <vscale x 1 x i8>, 8) %0 } - -define <vscale x 1 x i8> @test_vlseg8_mask_nxv1i8_triscv.vector.tuple_nxv1i8_8t(ptr %base, i32 %vl, <vscale x 1 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 1 x i8>, 8) @test_vlseg8_mask_nxv1i8_triscv.vector.tuple_nxv1i8_8t(ptr %base, i32 %vl, <vscale x 1 x i1> %mask) { ; CHECK-LABEL: test_vlseg8_mask_nxv1i8_triscv.vector.tuple_nxv1i8_8t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, ma -; CHECK-NEXT: vlseg8e8.v v7, (a0), v0.t +; CHECK-NEXT: vlseg8e8.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 1 x i8>, 8) @llvm.riscv.vlseg8.mask.triscv.vector.tuple_nxv1i8_8t.nxv1i1(target("riscv.vector.tuple", <vscale x 1 x i8>, 8) undef, ptr %base, <vscale x 1 x i1> %mask, i32 %vl, i32 1, i32 3) - %1 = call <vscale x 1 x i8> @llvm.riscv.tuple.extract.nxv1i8.triscv.vector.tuple_nxv1i8_8t(target("riscv.vector.tuple", <vscale x 1 x i8>, 8) %0, i32 1) - ret <vscale x 1 x i8> %1 + ret target("riscv.vector.tuple", <vscale x 1 x i8>, 8) %0 } - -declare target("riscv.vector.tuple", <vscale x 2 x i8>, 8) @llvm.riscv.vlseg8.triscv.vector.tuple_nxv2i8_8t(target("riscv.vector.tuple", <vscale x 2 x i8>, 8), ptr, i32, i32) -declare target("riscv.vector.tuple", <vscale x 2 x i8>, 8) @llvm.riscv.vlseg8.mask.triscv.vector.tuple_nxv2i8_8t.nxv2i1(target("riscv.vector.tuple", <vscale x 2 x i8>, 8), ptr, <vscale x 2 x i1>, i32, i32, i32) - -define <vscale x 2 x i8> @test_vlseg8_nxv2i8_triscv.vector.tuple_nxv2i8_8t(ptr %base, i32 %vl) { +define target("riscv.vector.tuple", <vscale x 2 x i8>, 8) @test_vlseg8_nxv2i8_triscv.vector.tuple_nxv2i8_8t(ptr %base, i32 %vl) { ; CHECK-LABEL: test_vlseg8_nxv2i8_triscv.vector.tuple_nxv2i8_8t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, ma -; CHECK-NEXT: vlseg8e8.v v7, (a0) +; CHECK-NEXT: vlseg8e8.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 2 x i8>, 8) @llvm.riscv.vlseg8.triscv.vector.tuple_nxv2i8_8t(target("riscv.vector.tuple", <vscale x 2 x i8>, 8) undef, ptr %base, i32 %vl, i32 3) - %1 = call <vscale x 2 x i8> @llvm.riscv.tuple.extract.nxv2i8.triscv.vector.tuple_nxv2i8_8t(target("riscv.vector.tuple", <vscale x 2 x i8>, 8) %0, i32 1) - ret <vscale x 2 x i8> %1 + ret target("riscv.vector.tuple", <vscale x 2 x i8>, 8) %0 } - -define <vscale x 2 x i8> @test_vlseg8_mask_nxv2i8_triscv.vector.tuple_nxv2i8_8t(ptr %base, i32 %vl, <vscale x 2 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 2 x i8>, 8) @test_vlseg8_mask_nxv2i8_triscv.vector.tuple_nxv2i8_8t(ptr %base, i32 %vl, <vscale x 2 x i1> %mask) { ; CHECK-LABEL: test_vlseg8_mask_nxv2i8_triscv.vector.tuple_nxv2i8_8t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, ma -; CHECK-NEXT: vlseg8e8.v v7, (a0), v0.t +; CHECK-NEXT: vlseg8e8.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 2 x i8>, 8) @llvm.riscv.vlseg8.mask.triscv.vector.tuple_nxv2i8_8t.nxv2i1(target("riscv.vector.tuple", <vscale x 2 x i8>, 8) undef, ptr %base, <vscale x 2 x i1> %mask, i32 %vl, i32 1, i32 3) - %1 = call <vscale x 2 x i8> @llvm.riscv.tuple.extract.nxv2i8.triscv.vector.tuple_nxv2i8_8t(target("riscv.vector.tuple", <vscale x 2 x i8>, 8) %0, i32 1) - ret <vscale x 2 x i8> %1 + ret target("riscv.vector.tuple", <vscale x 2 x i8>, 8) %0 } - -declare target("riscv.vector.tuple", <vscale x 4 x i8>, 8) @llvm.riscv.vlseg8.triscv.vector.tuple_nxv4i8_8t(target("riscv.vector.tuple", <vscale x 4 x i8>, 8), ptr, i32, i32) -declare target("riscv.vector.tuple", <vscale x 4 x i8>, 8) @llvm.riscv.vlseg8.mask.triscv.vector.tuple_nxv4i8_8t.nxv4i1(target("riscv.vector.tuple", <vscale x 4 x i8>, 8), ptr, <vscale x 4 x i1>, i32, i32, i32) - -define <vscale x 4 x i8> @test_vlseg8_nxv4i8_triscv.vector.tuple_nxv4i8_8t(ptr %base, i32 %vl) { +define target("riscv.vector.tuple", <vscale x 4 x i8>, 8) @test_vlseg8_nxv4i8_triscv.vector.tuple_nxv4i8_8t(ptr %base, i32 %vl) { ; CHECK-LABEL: test_vlseg8_nxv4i8_triscv.vector.tuple_nxv4i8_8t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, ma -; CHECK-NEXT: vlseg8e8.v v7, (a0) +; CHECK-NEXT: vlseg8e8.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 8) @llvm.riscv.vlseg8.triscv.vector.tuple_nxv4i8_8t(target("riscv.vector.tuple", <vscale x 4 x i8>, 8) undef, ptr %base, i32 %vl, i32 3) - %1 = call <vscale x 4 x i8> @llvm.riscv.tuple.extract.nxv4i8.triscv.vector.tuple_nxv4i8_8t(target("riscv.vector.tuple", <vscale x 4 x i8>, 8) %0, i32 1) - ret <vscale x 4 x i8> %1 + ret target("riscv.vector.tuple", <vscale x 4 x i8>, 8) %0 } - -define <vscale x 4 x i8> @test_vlseg8_mask_nxv4i8_triscv.vector.tuple_nxv4i8_8t(ptr %base, i32 %vl, <vscale x 4 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 4 x i8>, 8) @test_vlseg8_mask_nxv4i8_triscv.vector.tuple_nxv4i8_8t(ptr %base, i32 %vl, <vscale x 4 x i1> %mask) { ; CHECK-LABEL: test_vlseg8_mask_nxv4i8_triscv.vector.tuple_nxv4i8_8t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, ma -; CHECK-NEXT: vlseg8e8.v v7, (a0), v0.t +; CHECK-NEXT: vlseg8e8.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 8) @llvm.riscv.vlseg8.mask.triscv.vector.tuple_nxv4i8_8t.nxv4i1(target("riscv.vector.tuple", <vscale x 4 x i8>, 8) undef, ptr %base, <vscale x 4 x i1> %mask, i32 %vl, i32 1, i32 3) - %1 = call <vscale x 4 x i8> @llvm.riscv.tuple.extract.nxv4i8.triscv.vector.tuple_nxv4i8_8t(target("riscv.vector.tuple", <vscale x 4 x i8>, 8) %0, i32 1) - ret <vscale x 4 x i8> %1 + ret target("riscv.vector.tuple", <vscale x 4 x i8>, 8) %0 } - -declare target("riscv.vector.tuple", <vscale x 8 x i8>, 8) @llvm.riscv.vlseg8.triscv.vector.tuple_nxv8i8_8t(target("riscv.vector.tuple", <vscale x 8 x i8>, 8), ptr, i32, i32) -declare target("riscv.vector.tuple", <vscale x 8 x i8>, 8) @llvm.riscv.vlseg8.mask.triscv.vector.tuple_nxv8i8_8t.nxv8i1(target("riscv.vector.tuple", <vscale x 8 x i8>, 8), ptr, <vscale x 8 x i1>, i32, i32, i32) - -define <vscale x 8 x i8> @test_vlseg8_nxv8i8_triscv.vector.tuple_nxv8i8_8t(ptr %base, i32 %vl) { +define target("riscv.vector.tuple", <vscale x 8 x i8>, 8) @test_vlseg8_nxv8i8_triscv.vector.tuple_nxv8i8_8t(ptr %base, i32 %vl) { ; CHECK-LABEL: test_vlseg8_nxv8i8_triscv.vector.tuple_nxv8i8_8t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, ma -; CHECK-NEXT: vlseg8e8.v v7, (a0) +; CHECK-NEXT: vlseg8e8.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 8) @llvm.riscv.vlseg8.triscv.vector.tuple_nxv8i8_8t(target("riscv.vector.tuple", <vscale x 8 x i8>, 8) undef, ptr %base, i32 %vl, i32 3) - %1 = call <vscale x 8 x i8> @llvm.riscv.tuple.extract.nxv8i8.triscv.vector.tuple_nxv8i8_8t(target("riscv.vector.tuple", <vscale x 8 x i8>, 8) %0, i32 1) - ret <vscale x 8 x i8> %1 + ret target("riscv.vector.tuple", <vscale x 8 x i8>, 8) %0 } - -define <vscale x 8 x i8> @test_vlseg8_mask_nxv8i8_triscv.vector.tuple_nxv8i8_8t(ptr %base, i32 %vl, <vscale x 8 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 8 x i8>, 8) @test_vlseg8_mask_nxv8i8_triscv.vector.tuple_nxv8i8_8t(ptr %base, i32 %vl, <vscale x 8 x i1> %mask) { ; CHECK-LABEL: test_vlseg8_mask_nxv8i8_triscv.vector.tuple_nxv8i8_8t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, ma -; CHECK-NEXT: vlseg8e8.v v7, (a0), v0.t +; CHECK-NEXT: vlseg8e8.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 8) @llvm.riscv.vlseg8.mask.triscv.vector.tuple_nxv8i8_8t.nxv8i1(target("riscv.vector.tuple", <vscale x 8 x i8>, 8) undef, ptr %base, <vscale x 8 x i1> %mask, i32 %vl, i32 1, i32 3) - %1 = call <vscale x 8 x i8> @llvm.riscv.tuple.extract.nxv8i8.triscv.vector.tuple_nxv8i8_8t(target("riscv.vector.tuple", <vscale x 8 x i8>, 8) %0, i32 1) - ret <vscale x 8 x i8> %1 + ret target("riscv.vector.tuple", <vscale x 8 x i8>, 8) %0 } - -declare target("riscv.vector.tuple", <vscale x 2 x i8>, 2) @llvm.riscv.vlseg2.mask.triscv.vector.tuple_nxv2i8_2t.nxv1i1(target("riscv.vector.tuple", <vscale x 2 x i8>, 2), ptr, <vscale x 1 x i1>, i32, i32, i32) - -define <vscale x 1 x i16> @test_vlseg2_nxv1i16_triscv.vector.tuple_nxv2i8_2t(ptr %base, i32 %vl) { +define target("riscv.vector.tuple", <vscale x 2 x i8>, 2) @test_vlseg2_nxv1i16_triscv.vector.tuple_nxv2i8_2t(ptr %base, i32 %vl) { ; CHECK-LABEL: test_vlseg2_nxv1i16_triscv.vector.tuple_nxv2i8_2t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, ma -; CHECK-NEXT: vlseg2e16.v v7, (a0) +; CHECK-NEXT: vlseg2e16.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 2 x i8>, 2) @llvm.riscv.vlseg2.triscv.vector.tuple_nxv2i8_2t(target("riscv.vector.tuple", <vscale x 2 x i8>, 2) undef, ptr %base, i32 %vl, i32 4) - %1 = call <vscale x 1 x i16> @llvm.riscv.tuple.extract.nxv1i16.triscv.vector.tuple_nxv2i8_2t(target("riscv.vector.tuple", <vscale x 2 x i8>, 2) %0, i32 1) - ret <vscale x 1 x i16> %1 + ret target("riscv.vector.tuple", <vscale x 2 x i8>, 2) %0 } - -define <vscale x 1 x i16> @test_vlseg2_mask_nxv1i16_triscv.vector.tuple_nxv2i8_2t(ptr %base, i32 %vl, <vscale x 1 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 2 x i8>, 2) @test_vlseg2_mask_nxv1i16_triscv.vector.tuple_nxv2i8_2t(ptr %base, i32 %vl, <vscale x 1 x i1> %mask) { ; CHECK-LABEL: test_vlseg2_mask_nxv1i16_triscv.vector.tuple_nxv2i8_2t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, ma -; CHECK-NEXT: vlseg2e16.v v7, (a0), v0.t +; CHECK-NEXT: vlseg2e16.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 2 x i8>, 2) @llvm.riscv.vlseg2.mask.triscv.vector.tuple_nxv2i8_2t.nxv1i1(target("riscv.vector.tuple", <vscale x 2 x i8>, 2) undef, ptr %base, <vscale x 1 x i1> %mask, i32 %vl, i32 1, i32 4) - %1 = call <vscale x 1 x i16> @llvm.riscv.tuple.extract.nxv1i16.triscv.vector.tuple_nxv2i8_2t(target("riscv.vector.tuple", <vscale x 2 x i8>, 2) %0, i32 1) - ret <vscale x 1 x i16> %1 + ret target("riscv.vector.tuple", <vscale x 2 x i8>, 2) %0 } - -declare target("riscv.vector.tuple", <vscale x 4 x i8>, 2) @llvm.riscv.vlseg2.mask.triscv.vector.tuple_nxv4i8_2t.nxv2i1(target("riscv.vector.tuple", <vscale x 4 x i8>, 2), ptr, <vscale x 2 x i1>, i32, i32, i32) - -define <vscale x 2 x i16> @test_vlseg2_nxv2i16_triscv.vector.tuple_nxv4i8_2t(ptr %base, i32 %vl) { +define target("riscv.vector.tuple", <vscale x 4 x i8>, 2) @test_vlseg2_nxv2i16_triscv.vector.tuple_nxv4i8_2t(ptr %base, i32 %vl) { ; CHECK-LABEL: test_vlseg2_nxv2i16_triscv.vector.tuple_nxv4i8_2t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, ma -; CHECK-NEXT: vlseg2e16.v v7, (a0) +; CHECK-NEXT: vlseg2e16.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 2) @llvm.riscv.vlseg2.triscv.vector.tuple_nxv4i8_2t(target("riscv.vector.tuple", <vscale x 4 x i8>, 2) undef, ptr %base, i32 %vl, i32 4) - %1 = call <vscale x 2 x i16> @llvm.riscv.tuple.extract.nxv2i16.triscv.vector.tuple_nxv4i8_2t(target("riscv.vector.tuple", <vscale x 4 x i8>, 2) %0, i32 1) - ret <vscale x 2 x i16> %1 + ret target("riscv.vector.tuple", <vscale x 4 x i8>, 2) %0 } - -define <vscale x 2 x i16> @test_vlseg2_mask_nxv2i16_triscv.vector.tuple_nxv4i8_2t(ptr %base, i32 %vl, <vscale x 2 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 4 x i8>, 2) @test_vlseg2_mask_nxv2i16_triscv.vector.tuple_nxv4i8_2t(ptr %base, i32 %vl, <vscale x 2 x i1> %mask) { ; CHECK-LABEL: test_vlseg2_mask_nxv2i16_triscv.vector.tuple_nxv4i8_2t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, ma -; CHECK-NEXT: vlseg2e16.v v7, (a0), v0.t +; CHECK-NEXT: vlseg2e16.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 2) @llvm.riscv.vlseg2.mask.triscv.vector.tuple_nxv4i8_2t.nxv2i1(target("riscv.vector.tuple", <vscale x 4 x i8>, 2) undef, ptr %base, <vscale x 2 x i1> %mask, i32 %vl, i32 1, i32 4) - %1 = call <vscale x 2 x i16> @llvm.riscv.tuple.extract.nxv2i16.triscv.vector.tuple_nxv4i8_2t(target("riscv.vector.tuple", <vscale x 4 x i8>, 2) %0, i32 1) - ret <vscale x 2 x i16> %1 + ret target("riscv.vector.tuple", <vscale x 4 x i8>, 2) %0 } - -declare target("riscv.vector.tuple", <vscale x 8 x i8>, 2) @llvm.riscv.vlseg2.mask.triscv.vector.tuple_nxv8i8_2t.nxv4i1(target("riscv.vector.tuple", <vscale x 8 x i8>, 2), ptr, <vscale x 4 x i1>, i32, i32, i32) - -define <vscale x 4 x i16> @test_vlseg2_nxv4i16_triscv.vector.tuple_nxv8i8_2t(ptr %base, i32 %vl) { +define target("riscv.vector.tuple", <vscale x 8 x i8>, 2) @test_vlseg2_nxv4i16_triscv.vector.tuple_nxv8i8_2t(ptr %base, i32 %vl) { ; CHECK-LABEL: test_vlseg2_nxv4i16_triscv.vector.tuple_nxv8i8_2t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma -; CHECK-NEXT: vlseg2e16.v v7, (a0) +; CHECK-NEXT: vlseg2e16.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 2) @llvm.riscv.vlseg2.triscv.vector.tuple_nxv8i8_2t(target("riscv.vector.tuple", <vscale x 8 x i8>, 2) undef, ptr %base, i32 %vl, i32 4) - %1 = call <vscale x 4 x i16> @llvm.riscv.tuple.extract.nxv4i16.triscv.vector.tuple_nxv8i8_2t(target("riscv.vector.tuple", <vscale x 8 x i8>, 2) %0, i32 1) - ret <vscale x 4 x i16> %1 + ret target("riscv.vector.tuple", <vscale x 8 x i8>, 2) %0 } - -define <vscale x 4 x i16> @test_vlseg2_mask_nxv4i16_triscv.vector.tuple_nxv8i8_2t(ptr %base, i32 %vl, <vscale x 4 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 8 x i8>, 2) @test_vlseg2_mask_nxv4i16_triscv.vector.tuple_nxv8i8_2t(ptr %base, i32 %vl, <vscale x 4 x i1> %mask) { ; CHECK-LABEL: test_vlseg2_mask_nxv4i16_triscv.vector.tuple_nxv8i8_2t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma -; CHECK-NEXT: vlseg2e16.v v7, (a0), v0.t +; CHECK-NEXT: vlseg2e16.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 2) @llvm.riscv.vlseg2.mask.triscv.vector.tuple_nxv8i8_2t.nxv4i1(target("riscv.vector.tuple", <vscale x 8 x i8>, 2) undef, ptr %base, <vscale x 4 x i1> %mask, i32 %vl, i32 1, i32 4) - %1 = call <vscale x 4 x i16> @llvm.riscv.tuple.extract.nxv4i16.triscv.vector.tuple_nxv8i8_2t(target("riscv.vector.tuple", <vscale x 8 x i8>, 2) %0, i32 1) - ret <vscale x 4 x i16> %1 + ret target("riscv.vector.tuple", <vscale x 8 x i8>, 2) %0 } - -declare target("riscv.vector.tuple", <vscale x 16 x i8>, 2) @llvm.riscv.vlseg2.mask.triscv.vector.tuple_nxv16i8_2t.nxv8i1(target("riscv.vector.tuple", <vscale x 16 x i8>, 2), ptr, <vscale x 8 x i1>, i32, i32, i32) - -define <vscale x 8 x i16> @test_vlseg2_nxv8i16_triscv.vector.tuple_nxv16i8_2t(ptr %base, i32 %vl) { +define target("riscv.vector.tuple", <vscale x 16 x i8>, 2) @test_vlseg2_nxv8i16_triscv.vector.tuple_nxv16i8_2t(ptr %base, i32 %vl) { ; CHECK-LABEL: test_vlseg2_nxv8i16_triscv.vector.tuple_nxv16i8_2t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, ma -; CHECK-NEXT: vlseg2e16.v v6, (a0) +; CHECK-NEXT: vlseg2e16.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 2) @llvm.riscv.vlseg2.triscv.vector.tuple_nxv16i8_2t(target("riscv.vector.tuple", <vscale x 16 x i8>, 2) undef, ptr %base, i32 %vl, i32 4) - %1 = call <vscale x 8 x i16> @llvm.riscv.tuple.extract.nxv8i16.triscv.vector.tuple_nxv16i8_2t(target("riscv.vector.tuple", <vscale x 16 x i8>, 2) %0, i32 1) - ret <vscale x 8 x i16> %1 + ret target("riscv.vector.tuple", <vscale x 16 x i8>, 2) %0 } - -define <vscale x 8 x i16> @test_vlseg2_mask_nxv8i16_triscv.vector.tuple_nxv16i8_2t(ptr %base, i32 %vl, <vscale x 8 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 16 x i8>, 2) @test_vlseg2_mask_nxv8i16_triscv.vector.tuple_nxv16i8_2t(ptr %base, i32 %vl, <vscale x 8 x i1> %mask) { ; CHECK-LABEL: test_vlseg2_mask_nxv8i16_triscv.vector.tuple_nxv16i8_2t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, ma -; CHECK-NEXT: vlseg2e16.v v6, (a0), v0.t +; CHECK-NEXT: vlseg2e16.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 2) @llvm.riscv.vlseg2.mask.triscv.vector.tuple_nxv16i8_2t.nxv8i1(target("riscv.vector.tuple", <vscale x 16 x i8>, 2) undef, ptr %base, <vscale x 8 x i1> %mask, i32 %vl, i32 1, i32 4) - %1 = call <vscale x 8 x i16> @llvm.riscv.tuple.extract.nxv8i16.triscv.vector.tuple_nxv16i8_2t(target("riscv.vector.tuple", <vscale x 16 x i8>, 2) %0, i32 1) - ret <vscale x 8 x i16> %1 + ret target("riscv.vector.tuple", <vscale x 16 x i8>, 2) %0 } - -declare target("riscv.vector.tuple", <vscale x 32 x i8>, 2) @llvm.riscv.vlseg2.mask.triscv.vector.tuple_nxv32i8_2t.nxv16i1(target("riscv.vector.tuple", <vscale x 32 x i8>, 2), ptr, <vscale x 16 x i1>, i32, i32, i32) - -define <vscale x 16 x i16> @test_vlseg2_nxv16i16_triscv.vector.tuple_nxv32i8_2t(ptr %base, i32 %vl) { +define target("riscv.vector.tuple", <vscale x 32 x i8>, 2) @test_vlseg2_nxv16i16_triscv.vector.tuple_nxv32i8_2t(ptr %base, i32 %vl) { ; CHECK-LABEL: test_vlseg2_nxv16i16_triscv.vector.tuple_nxv32i8_2t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, ma -; CHECK-NEXT: vlseg2e16.v v4, (a0) +; CHECK-NEXT: vlseg2e16.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 32 x i8>, 2) @llvm.riscv.vlseg2.triscv.vector.tuple_nxv32i8_2t(target("riscv.vector.tuple", <vscale x 32 x i8>, 2) undef, ptr %base, i32 %vl, i32 4) - %1 = call <vscale x 16 x i16> @llvm.riscv.tuple.extract.nxv16i16.triscv.vector.tuple_nxv32i8_2t(target("riscv.vector.tuple", <vscale x 32 x i8>, 2) %0, i32 1) - ret <vscale x 16 x i16> %1 + ret target("riscv.vector.tuple", <vscale x 32 x i8>, 2) %0 } - -define <vscale x 16 x i16> @test_vlseg2_mask_nxv16i16_triscv.vector.tuple_nxv32i8_2t(ptr %base, i32 %vl, <vscale x 16 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 32 x i8>, 2) @test_vlseg2_mask_nxv16i16_triscv.vector.tuple_nxv32i8_2t(ptr %base, i32 %vl, <vscale x 16 x i1> %mask) { ; CHECK-LABEL: test_vlseg2_mask_nxv16i16_triscv.vector.tuple_nxv32i8_2t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, ma -; CHECK-NEXT: vlseg2e16.v v4, (a0), v0.t +; CHECK-NEXT: vlseg2e16.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 32 x i8>, 2) @llvm.riscv.vlseg2.mask.triscv.vector.tuple_nxv32i8_2t.nxv16i1(target("riscv.vector.tuple", <vscale x 32 x i8>, 2) undef, ptr %base, <vscale x 16 x i1> %mask, i32 %vl, i32 1, i32 4) - %1 = call <vscale x 16 x i16> @llvm.riscv.tuple.extract.nxv16i16.triscv.vector.tuple_nxv32i8_2t(target("riscv.vector.tuple", <vscale x 32 x i8>, 2) %0, i32 1) - ret <vscale x 16 x i16> %1 + ret target("riscv.vector.tuple", <vscale x 32 x i8>, 2) %0 } - -declare target("riscv.vector.tuple", <vscale x 2 x i8>, 3) @llvm.riscv.vlseg3.mask.triscv.vector.tuple_nxv2i8_3t.nxv1i1(target("riscv.vector.tuple", <vscale x 2 x i8>, 3), ptr, <vscale x 1 x i1>, i32, i32, i32) - -define <vscale x 1 x i16> @test_vlseg3_nxv1i16_triscv.vector.tuple_nxv2i8_3t(ptr %base, i32 %vl) { +define target("riscv.vector.tuple", <vscale x 2 x i8>, 3) @test_vlseg3_nxv1i16_triscv.vector.tuple_nxv2i8_3t(ptr %base, i32 %vl) { ; CHECK-LABEL: test_vlseg3_nxv1i16_triscv.vector.tuple_nxv2i8_3t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, ma -; CHECK-NEXT: vlseg3e16.v v7, (a0) +; CHECK-NEXT: vlseg3e16.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 2 x i8>, 3) @llvm.riscv.vlseg3.triscv.vector.tuple_nxv2i8_3t(target("riscv.vector.tuple", <vscale x 2 x i8>, 3) undef, ptr %base, i32 %vl, i32 4) - %1 = call <vscale x 1 x i16> @llvm.riscv.tuple.extract.nxv1i16.triscv.vector.tuple_nxv2i8_3t(target("riscv.vector.tuple", <vscale x 2 x i8>, 3) %0, i32 1) - ret <vscale x 1 x i16> %1 + ret target("riscv.vector.tuple", <vscale x 2 x i8>, 3) %0 } - -define <vscale x 1 x i16> @test_vlseg3_mask_nxv1i16_triscv.vector.tuple_nxv2i8_3t(ptr %base, i32 %vl, <vscale x 1 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 2 x i8>, 3) @test_vlseg3_mask_nxv1i16_triscv.vector.tuple_nxv2i8_3t(ptr %base, i32 %vl, <vscale x 1 x i1> %mask) { ; CHECK-LABEL: test_vlseg3_mask_nxv1i16_triscv.vector.tuple_nxv2i8_3t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, ma -; CHECK-NEXT: vlseg3e16.v v7, (a0), v0.t +; CHECK-NEXT: vlseg3e16.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 2 x i8>, 3) @llvm.riscv.vlseg3.mask.triscv.vector.tuple_nxv2i8_3t.nxv1i1(target("riscv.vector.tuple", <vscale x 2 x i8>, 3) undef, ptr %base, <vscale x 1 x i1> %mask, i32 %vl, i32 1, i32 4) - %1 = call <vscale x 1 x i16> @llvm.riscv.tuple.extract.nxv1i16.triscv.vector.tuple_nxv2i8_3t(target("riscv.vector.tuple", <vscale x 2 x i8>, 3) %0, i32 1) - ret <vscale x 1 x i16> %1 + ret target("riscv.vector.tuple", <vscale x 2 x i8>, 3) %0 } - -declare target("riscv.vector.tuple", <vscale x 4 x i8>, 3) @llvm.riscv.vlseg3.mask.triscv.vector.tuple_nxv4i8_3t.nxv2i1(target("riscv.vector.tuple", <vscale x 4 x i8>, 3), ptr, <vscale x 2 x i1>, i32, i32, i32) - -define <vscale x 2 x i16> @test_vlseg3_nxv2i16_triscv.vector.tuple_nxv4i8_3t(ptr %base, i32 %vl) { +define target("riscv.vector.tuple", <vscale x 4 x i8>, 3) @test_vlseg3_nxv2i16_triscv.vector.tuple_nxv4i8_3t(ptr %base, i32 %vl) { ; CHECK-LABEL: test_vlseg3_nxv2i16_triscv.vector.tuple_nxv4i8_3t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, ma -; CHECK-NEXT: vlseg3e16.v v7, (a0) +; CHECK-NEXT: vlseg3e16.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 3) @llvm.riscv.vlseg3.triscv.vector.tuple_nxv4i8_3t(target("riscv.vector.tuple", <vscale x 4 x i8>, 3) undef, ptr %base, i32 %vl, i32 4) - %1 = call <vscale x 2 x i16> @llvm.riscv.tuple.extract.nxv2i16.triscv.vector.tuple_nxv4i8_3t(target("riscv.vector.tuple", <vscale x 4 x i8>, 3) %0, i32 1) - ret <vscale x 2 x i16> %1 + ret target("riscv.vector.tuple", <vscale x 4 x i8>, 3) %0 } - -define <vscale x 2 x i16> @test_vlseg3_mask_nxv2i16_triscv.vector.tuple_nxv4i8_3t(ptr %base, i32 %vl, <vscale x 2 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 4 x i8>, 3) @test_vlseg3_mask_nxv2i16_triscv.vector.tuple_nxv4i8_3t(ptr %base, i32 %vl, <vscale x 2 x i1> %mask) { ; CHECK-LABEL: test_vlseg3_mask_nxv2i16_triscv.vector.tuple_nxv4i8_3t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, ma -; CHECK-NEXT: vlseg3e16.v v7, (a0), v0.t +; CHECK-NEXT: vlseg3e16.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 3) @llvm.riscv.vlseg3.mask.triscv.vector.tuple_nxv4i8_3t.nxv2i1(target("riscv.vector.tuple", <vscale x 4 x i8>, 3) undef, ptr %base, <vscale x 2 x i1> %mask, i32 %vl, i32 1, i32 4) - %1 = call <vscale x 2 x i16> @llvm.riscv.tuple.extract.nxv2i16.triscv.vector.tuple_nxv4i8_3t(target("riscv.vector.tuple", <vscale x 4 x i8>, 3) %0, i32 1) - ret <vscale x 2 x i16> %1 + ret target("riscv.vector.tuple", <vscale x 4 x i8>, 3) %0 } - -declare target("riscv.vector.tuple", <vscale x 8 x i8>, 3) @llvm.riscv.vlseg3.mask.triscv.vector.tuple_nxv8i8_3t.nxv4i1(target("riscv.vector.tuple", <vscale x 8 x i8>, 3), ptr, <vscale x 4 x i1>, i32, i32, i32) - -define <vscale x 4 x i16> @test_vlseg3_nxv4i16_triscv.vector.tuple_nxv8i8_3t(ptr %base, i32 %vl) { +define target("riscv.vector.tuple", <vscale x 8 x i8>, 3) @test_vlseg3_nxv4i16_triscv.vector.tuple_nxv8i8_3t(ptr %base, i32 %vl) { ; CHECK-LABEL: test_vlseg3_nxv4i16_triscv.vector.tuple_nxv8i8_3t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma -; CHECK-NEXT: vlseg3e16.v v7, (a0) +; CHECK-NEXT: vlseg3e16.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 3) @llvm.riscv.vlseg3.triscv.vector.tuple_nxv8i8_3t(target("riscv.vector.tuple", <vscale x 8 x i8>, 3) undef, ptr %base, i32 %vl, i32 4) - %1 = call <vscale x 4 x i16> @llvm.riscv.tuple.extract.nxv4i16.triscv.vector.tuple_nxv8i8_3t(target("riscv.vector.tuple", <vscale x 8 x i8>, 3) %0, i32 1) - ret <vscale x 4 x i16> %1 + ret target("riscv.vector.tuple", <vscale x 8 x i8>, 3) %0 } - -define <vscale x 4 x i16> @test_vlseg3_mask_nxv4i16_triscv.vector.tuple_nxv8i8_3t(ptr %base, i32 %vl, <vscale x 4 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 8 x i8>, 3) @test_vlseg3_mask_nxv4i16_triscv.vector.tuple_nxv8i8_3t(ptr %base, i32 %vl, <vscale x 4 x i1> %mask) { ; CHECK-LABEL: test_vlseg3_mask_nxv4i16_triscv.vector.tuple_nxv8i8_3t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma -; CHECK-NEXT: vlseg3e16.v v7, (a0), v0.t +; CHECK-NEXT: vlseg3e16.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 3) @llvm.riscv.vlseg3.mask.triscv.vector.tuple_nxv8i8_3t.nxv4i1(target("riscv.vector.tuple", <vscale x 8 x i8>, 3) undef, ptr %base, <vscale x 4 x i1> %mask, i32 %vl, i32 1, i32 4) - %1 = call <vscale x 4 x i16> @llvm.riscv.tuple.extract.nxv4i16.triscv.vector.tuple_nxv8i8_3t(target("riscv.vector.tuple", <vscale x 8 x i8>, 3) %0, i32 1) - ret <vscale x 4 x i16> %1 + ret target("riscv.vector.tuple", <vscale x 8 x i8>, 3) %0 } - -declare target("riscv.vector.tuple", <vscale x 16 x i8>, 3) @llvm.riscv.vlseg3.mask.triscv.vector.tuple_nxv16i8_3t.nxv8i1(target("riscv.vector.tuple", <vscale x 16 x i8>, 3), ptr, <vscale x 8 x i1>, i32, i32, i32) - -define <vscale x 8 x i16> @test_vlseg3_nxv8i16_triscv.vector.tuple_nxv16i8_3t(ptr %base, i32 %vl) { +define target("riscv.vector.tuple", <vscale x 16 x i8>, 3) @test_vlseg3_nxv8i16_triscv.vector.tuple_nxv16i8_3t(ptr %base, i32 %vl) { ; CHECK-LABEL: test_vlseg3_nxv8i16_triscv.vector.tuple_nxv16i8_3t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, ma -; CHECK-NEXT: vlseg3e16.v v6, (a0) +; CHECK-NEXT: vlseg3e16.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 3) @llvm.riscv.vlseg3.triscv.vector.tuple_nxv16i8_3t(target("riscv.vector.tuple", <vscale x 16 x i8>, 3) undef, ptr %base, i32 %vl, i32 4) - %1 = call <vscale x 8 x i16> @llvm.riscv.tuple.extract.nxv8i16.triscv.vector.tuple_nxv16i8_3t(target("riscv.vector.tuple", <vscale x 16 x i8>, 3) %0, i32 1) - ret <vscale x 8 x i16> %1 + ret target("riscv.vector.tuple", <vscale x 16 x i8>, 3) %0 } - -define <vscale x 8 x i16> @test_vlseg3_mask_nxv8i16_triscv.vector.tuple_nxv16i8_3t(ptr %base, i32 %vl, <vscale x 8 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 16 x i8>, 3) @test_vlseg3_mask_nxv8i16_triscv.vector.tuple_nxv16i8_3t(ptr %base, i32 %vl, <vscale x 8 x i1> %mask) { ; CHECK-LABEL: test_vlseg3_mask_nxv8i16_triscv.vector.tuple_nxv16i8_3t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, ma -; CHECK-NEXT: vlseg3e16.v v6, (a0), v0.t +; CHECK-NEXT: vlseg3e16.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 3) @llvm.riscv.vlseg3.mask.triscv.vector.tuple_nxv16i8_3t.nxv8i1(target("riscv.vector.tuple", <vscale x 16 x i8>, 3) undef, ptr %base, <vscale x 8 x i1> %mask, i32 %vl, i32 1, i32 4) - %1 = call <vscale x 8 x i16> @llvm.riscv.tuple.extract.nxv8i16.triscv.vector.tuple_nxv16i8_3t(target("riscv.vector.tuple", <vscale x 16 x i8>, 3) %0, i32 1) - ret <vscale x 8 x i16> %1 + ret target("riscv.vector.tuple", <vscale x 16 x i8>, 3) %0 } - -declare target("riscv.vector.tuple", <vscale x 2 x i8>, 4) @llvm.riscv.vlseg4.mask.triscv.vector.tuple_nxv2i8_4t.nxv1i1(target("riscv.vector.tuple", <vscale x 2 x i8>, 4), ptr, <vscale x 1 x i1>, i32, i32, i32) - -define <vscale x 1 x i16> @test_vlseg4_nxv1i16_triscv.vector.tuple_nxv2i8_4t(ptr %base, i32 %vl) { +define target("riscv.vector.tuple", <vscale x 2 x i8>, 4) @test_vlseg4_nxv1i16_triscv.vector.tuple_nxv2i8_4t(ptr %base, i32 %vl) { ; CHECK-LABEL: test_vlseg4_nxv1i16_triscv.vector.tuple_nxv2i8_4t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, ma -; CHECK-NEXT: vlseg4e16.v v7, (a0) +; CHECK-NEXT: vlseg4e16.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 2 x i8>, 4) @llvm.riscv.vlseg4.triscv.vector.tuple_nxv2i8_4t(target("riscv.vector.tuple", <vscale x 2 x i8>, 4) undef, ptr %base, i32 %vl, i32 4) - %1 = call <vscale x 1 x i16> @llvm.riscv.tuple.extract.nxv1i16.triscv.vector.tuple_nxv2i8_4t(target("riscv.vector.tuple", <vscale x 2 x i8>, 4) %0, i32 1) - ret <vscale x 1 x i16> %1 + ret target("riscv.vector.tuple", <vscale x 2 x i8>, 4) %0 } - -define <vscale x 1 x i16> @test_vlseg4_mask_nxv1i16_triscv.vector.tuple_nxv2i8_4t(ptr %base, i32 %vl, <vscale x 1 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 2 x i8>, 4) @test_vlseg4_mask_nxv1i16_triscv.vector.tuple_nxv2i8_4t(ptr %base, i32 %vl, <vscale x 1 x i1> %mask) { ; CHECK-LABEL: test_vlseg4_mask_nxv1i16_triscv.vector.tuple_nxv2i8_4t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, ma -; CHECK-NEXT: vlseg4e16.v v7, (a0), v0.t +; CHECK-NEXT: vlseg4e16.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 2 x i8>, 4) @llvm.riscv.vlseg4.mask.triscv.vector.tuple_nxv2i8_4t.nxv1i1(target("riscv.vector.tuple", <vscale x 2 x i8>, 4) undef, ptr %base, <vscale x 1 x i1> %mask, i32 %vl, i32 1, i32 4) - %1 = call <vscale x 1 x i16> @llvm.riscv.tuple.extract.nxv1i16.triscv.vector.tuple_nxv2i8_4t(target("riscv.vector.tuple", <vscale x 2 x i8>, 4) %0, i32 1) - ret <vscale x 1 x i16> %1 + ret target("riscv.vector.tuple", <vscale x 2 x i8>, 4) %0 } - -declare target("riscv.vector.tuple", <vscale x 4 x i8>, 4) @llvm.riscv.vlseg4.mask.triscv.vector.tuple_nxv4i8_4t.nxv2i1(target("riscv.vector.tuple", <vscale x 4 x i8>, 4), ptr, <vscale x 2 x i1>, i32, i32, i32) - -define <vscale x 2 x i16> @test_vlseg4_nxv2i16_triscv.vector.tuple_nxv4i8_4t(ptr %base, i32 %vl) { +define target("riscv.vector.tuple", <vscale x 4 x i8>, 4) @test_vlseg4_nxv2i16_triscv.vector.tuple_nxv4i8_4t(ptr %base, i32 %vl) { ; CHECK-LABEL: test_vlseg4_nxv2i16_triscv.vector.tuple_nxv4i8_4t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, ma -; CHECK-NEXT: vlseg4e16.v v7, (a0) +; CHECK-NEXT: vlseg4e16.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 4) @llvm.riscv.vlseg4.triscv.vector.tuple_nxv4i8_4t(target("riscv.vector.tuple", <vscale x 4 x i8>, 4) undef, ptr %base, i32 %vl, i32 4) - %1 = call <vscale x 2 x i16> @llvm.riscv.tuple.extract.nxv2i16.triscv.vector.tuple_nxv4i8_4t(target("riscv.vector.tuple", <vscale x 4 x i8>, 4) %0, i32 1) - ret <vscale x 2 x i16> %1 + ret target("riscv.vector.tuple", <vscale x 4 x i8>, 4) %0 } - -define <vscale x 2 x i16> @test_vlseg4_mask_nxv2i16_triscv.vector.tuple_nxv4i8_4t(ptr %base, i32 %vl, <vscale x 2 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 4 x i8>, 4) @test_vlseg4_mask_nxv2i16_triscv.vector.tuple_nxv4i8_4t(ptr %base, i32 %vl, <vscale x 2 x i1> %mask) { ; CHECK-LABEL: test_vlseg4_mask_nxv2i16_triscv.vector.tuple_nxv4i8_4t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, ma -; CHECK-NEXT: vlseg4e16.v v7, (a0), v0.t +; CHECK-NEXT: vlseg4e16.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 4) @llvm.riscv.vlseg4.mask.triscv.vector.tuple_nxv4i8_4t.nxv2i1(target("riscv.vector.tuple", <vscale x 4 x i8>, 4) undef, ptr %base, <vscale x 2 x i1> %mask, i32 %vl, i32 1, i32 4) - %1 = call <vscale x 2 x i16> @llvm.riscv.tuple.extract.nxv2i16.triscv.vector.tuple_nxv4i8_4t(target("riscv.vector.tuple", <vscale x 4 x i8>, 4) %0, i32 1) - ret <vscale x 2 x i16> %1 + ret target("riscv.vector.tuple", <vscale x 4 x i8>, 4) %0 } - -declare target("riscv.vector.tuple", <vscale x 8 x i8>, 4) @llvm.riscv.vlseg4.mask.triscv.vector.tuple_nxv8i8_4t.nxv4i1(target("riscv.vector.tuple", <vscale x 8 x i8>, 4), ptr, <vscale x 4 x i1>, i32, i32, i32) - -define <vscale x 4 x i16> @test_vlseg4_nxv4i16_triscv.vector.tuple_nxv8i8_4t(ptr %base, i32 %vl) { +define target("riscv.vector.tuple", <vscale x 8 x i8>, 4) @test_vlseg4_nxv4i16_triscv.vector.tuple_nxv8i8_4t(ptr %base, i32 %vl) { ; CHECK-LABEL: test_vlseg4_nxv4i16_triscv.vector.tuple_nxv8i8_4t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma -; CHECK-NEXT: vlseg4e16.v v7, (a0) +; CHECK-NEXT: vlseg4e16.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 4) @llvm.riscv.vlseg4.triscv.vector.tuple_nxv8i8_4t(target("riscv.vector.tuple", <vscale x 8 x i8>, 4) undef, ptr %base, i32 %vl, i32 4) - %1 = call <vscale x 4 x i16> @llvm.riscv.tuple.extract.nxv4i16.triscv.vector.tuple_nxv8i8_4t(target("riscv.vector.tuple", <vscale x 8 x i8>, 4) %0, i32 1) - ret <vscale x 4 x i16> %1 + ret target("riscv.vector.tuple", <vscale x 8 x i8>, 4) %0 } - -define <vscale x 4 x i16> @test_vlseg4_mask_nxv4i16_triscv.vector.tuple_nxv8i8_4t(ptr %base, i32 %vl, <vscale x 4 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 8 x i8>, 4) @test_vlseg4_mask_nxv4i16_triscv.vector.tuple_nxv8i8_4t(ptr %base, i32 %vl, <vscale x 4 x i1> %mask) { ; CHECK-LABEL: test_vlseg4_mask_nxv4i16_triscv.vector.tuple_nxv8i8_4t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma -; CHECK-NEXT: vlseg4e16.v v7, (a0), v0.t +; CHECK-NEXT: vlseg4e16.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 4) @llvm.riscv.vlseg4.mask.triscv.vector.tuple_nxv8i8_4t.nxv4i1(target("riscv.vector.tuple", <vscale x 8 x i8>, 4) undef, ptr %base, <vscale x 4 x i1> %mask, i32 %vl, i32 1, i32 4) - %1 = call <vscale x 4 x i16> @llvm.riscv.tuple.extract.nxv4i16.triscv.vector.tuple_nxv8i8_4t(target("riscv.vector.tuple", <vscale x 8 x i8>, 4) %0, i32 1) - ret <vscale x 4 x i16> %1 + ret target("riscv.vector.tuple", <vscale x 8 x i8>, 4) %0 } - -declare target("riscv.vector.tuple", <vscale x 16 x i8>, 4) @llvm.riscv.vlseg4.mask.triscv.vector.tuple_nxv16i8_4t.nxv8i1(target("riscv.vector.tuple", <vscale x 16 x i8>, 4), ptr, <vscale x 8 x i1>, i32, i32, i32) - -define <vscale x 8 x i16> @test_vlseg4_nxv8i16_triscv.vector.tuple_nxv16i8_4t(ptr %base, i32 %vl) { +define target("riscv.vector.tuple", <vscale x 16 x i8>, 4) @test_vlseg4_nxv8i16_triscv.vector.tuple_nxv16i8_4t(ptr %base, i32 %vl) { ; CHECK-LABEL: test_vlseg4_nxv8i16_triscv.vector.tuple_nxv16i8_4t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, ma -; CHECK-NEXT: vlseg4e16.v v6, (a0) +; CHECK-NEXT: vlseg4e16.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 4) @llvm.riscv.vlseg4.triscv.vector.tuple_nxv16i8_4t(target("riscv.vector.tuple", <vscale x 16 x i8>, 4) undef, ptr %base, i32 %vl, i32 4) - %1 = call <vscale x 8 x i16> @llvm.riscv.tuple.extract.nxv8i16.triscv.vector.tuple_nxv16i8_4t(target("riscv.vector.tuple", <vscale x 16 x i8>, 4) %0, i32 1) - ret <vscale x 8 x i16> %1 + ret target("riscv.vector.tuple", <vscale x 16 x i8>, 4) %0 } - -define <vscale x 8 x i16> @test_vlseg4_mask_nxv8i16_triscv.vector.tuple_nxv16i8_4t(ptr %base, i32 %vl, <vscale x 8 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 16 x i8>, 4) @test_vlseg4_mask_nxv8i16_triscv.vector.tuple_nxv16i8_4t(ptr %base, i32 %vl, <vscale x 8 x i1> %mask) { ; CHECK-LABEL: test_vlseg4_mask_nxv8i16_triscv.vector.tuple_nxv16i8_4t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, ma -; CHECK-NEXT: vlseg4e16.v v6, (a0), v0.t +; CHECK-NEXT: vlseg4e16.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 4) @llvm.riscv.vlseg4.mask.triscv.vector.tuple_nxv16i8_4t.nxv8i1(target("riscv.vector.tuple", <vscale x 16 x i8>, 4) undef, ptr %base, <vscale x 8 x i1> %mask, i32 %vl, i32 1, i32 4) - %1 = call <vscale x 8 x i16> @llvm.riscv.tuple.extract.nxv8i16.triscv.vector.tuple_nxv16i8_4t(target("riscv.vector.tuple", <vscale x 16 x i8>, 4) %0, i32 1) - ret <vscale x 8 x i16> %1 + ret target("riscv.vector.tuple", <vscale x 16 x i8>, 4) %0 } - -declare target("riscv.vector.tuple", <vscale x 2 x i8>, 5) @llvm.riscv.vlseg5.mask.triscv.vector.tuple_nxv2i8_5t.nxv1i1(target("riscv.vector.tuple", <vscale x 2 x i8>, 5), ptr, <vscale x 1 x i1>, i32, i32, i32) - -define <vscale x 1 x i16> @test_vlseg5_nxv1i16_triscv.vector.tuple_nxv2i8_5t(ptr %base, i32 %vl) { +define target("riscv.vector.tuple", <vscale x 2 x i8>, 5) @test_vlseg5_nxv1i16_triscv.vector.tuple_nxv2i8_5t(ptr %base, i32 %vl) { ; CHECK-LABEL: test_vlseg5_nxv1i16_triscv.vector.tuple_nxv2i8_5t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, ma -; CHECK-NEXT: vlseg5e16.v v7, (a0) +; CHECK-NEXT: vlseg5e16.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 2 x i8>, 5) @llvm.riscv.vlseg5.triscv.vector.tuple_nxv2i8_5t(target("riscv.vector.tuple", <vscale x 2 x i8>, 5) undef, ptr %base, i32 %vl, i32 4) - %1 = call <vscale x 1 x i16> @llvm.riscv.tuple.extract.nxv1i16.triscv.vector.tuple_nxv2i8_5t(target("riscv.vector.tuple", <vscale x 2 x i8>, 5) %0, i32 1) - ret <vscale x 1 x i16> %1 + ret target("riscv.vector.tuple", <vscale x 2 x i8>, 5) %0 } - -define <vscale x 1 x i16> @test_vlseg5_mask_nxv1i16_triscv.vector.tuple_nxv2i8_5t(ptr %base, i32 %vl, <vscale x 1 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 2 x i8>, 5) @test_vlseg5_mask_nxv1i16_triscv.vector.tuple_nxv2i8_5t(ptr %base, i32 %vl, <vscale x 1 x i1> %mask) { ; CHECK-LABEL: test_vlseg5_mask_nxv1i16_triscv.vector.tuple_nxv2i8_5t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, ma -; CHECK-NEXT: vlseg5e16.v v7, (a0), v0.t +; CHECK-NEXT: vlseg5e16.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 2 x i8>, 5) @llvm.riscv.vlseg5.mask.triscv.vector.tuple_nxv2i8_5t.nxv1i1(target("riscv.vector.tuple", <vscale x 2 x i8>, 5) undef, ptr %base, <vscale x 1 x i1> %mask, i32 %vl, i32 1, i32 4) - %1 = call <vscale x 1 x i16> @llvm.riscv.tuple.extract.nxv1i16.triscv.vector.tuple_nxv2i8_5t(target("riscv.vector.tuple", <vscale x 2 x i8>, 5) %0, i32 1) - ret <vscale x 1 x i16> %1 + ret target("riscv.vector.tuple", <vscale x 2 x i8>, 5) %0 } - -declare target("riscv.vector.tuple", <vscale x 4 x i8>, 5) @llvm.riscv.vlseg5.mask.triscv.vector.tuple_nxv4i8_5t.nxv2i1(target("riscv.vector.tuple", <vscale x 4 x i8>, 5), ptr, <vscale x 2 x i1>, i32, i32, i32) - -define <vscale x 2 x i16> @test_vlseg5_nxv2i16_triscv.vector.tuple_nxv4i8_5t(ptr %base, i32 %vl) { +define target("riscv.vector.tuple", <vscale x 4 x i8>, 5) @test_vlseg5_nxv2i16_triscv.vector.tuple_nxv4i8_5t(ptr %base, i32 %vl) { ; CHECK-LABEL: test_vlseg5_nxv2i16_triscv.vector.tuple_nxv4i8_5t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, ma -; CHECK-NEXT: vlseg5e16.v v7, (a0) +; CHECK-NEXT: vlseg5e16.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 5) @llvm.riscv.vlseg5.triscv.vector.tuple_nxv4i8_5t(target("riscv.vector.tuple", <vscale x 4 x i8>, 5) undef, ptr %base, i32 %vl, i32 4) - %1 = call <vscale x 2 x i16> @llvm.riscv.tuple.extract.nxv2i16.triscv.vector.tuple_nxv4i8_5t(target("riscv.vector.tuple", <vscale x 4 x i8>, 5) %0, i32 1) - ret <vscale x 2 x i16> %1 + ret target("riscv.vector.tuple", <vscale x 4 x i8>, 5) %0 } - -define <vscale x 2 x i16> @test_vlseg5_mask_nxv2i16_triscv.vector.tuple_nxv4i8_5t(ptr %base, i32 %vl, <vscale x 2 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 4 x i8>, 5) @test_vlseg5_mask_nxv2i16_triscv.vector.tuple_nxv4i8_5t(ptr %base, i32 %vl, <vscale x 2 x i1> %mask) { ; CHECK-LABEL: test_vlseg5_mask_nxv2i16_triscv.vector.tuple_nxv4i8_5t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, ma -; CHECK-NEXT: vlseg5e16.v v7, (a0), v0.t +; CHECK-NEXT: vlseg5e16.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 5) @llvm.riscv.vlseg5.mask.triscv.vector.tuple_nxv4i8_5t.nxv2i1(target("riscv.vector.tuple", <vscale x 4 x i8>, 5) undef, ptr %base, <vscale x 2 x i1> %mask, i32 %vl, i32 1, i32 4) - %1 = call <vscale x 2 x i16> @llvm.riscv.tuple.extract.nxv2i16.triscv.vector.tuple_nxv4i8_5t(target("riscv.vector.tuple", <vscale x 4 x i8>, 5) %0, i32 1) - ret <vscale x 2 x i16> %1 + ret target("riscv.vector.tuple", <vscale x 4 x i8>, 5) %0 } - -declare target("riscv.vector.tuple", <vscale x 8 x i8>, 5) @llvm.riscv.vlseg5.mask.triscv.vector.tuple_nxv8i8_5t.nxv4i1(target("riscv.vector.tuple", <vscale x 8 x i8>, 5), ptr, <vscale x 4 x i1>, i32, i32, i32) - -define <vscale x 4 x i16> @test_vlseg5_nxv4i16_triscv.vector.tuple_nxv8i8_5t(ptr %base, i32 %vl) { +define target("riscv.vector.tuple", <vscale x 8 x i8>, 5) @test_vlseg5_nxv4i16_triscv.vector.tuple_nxv8i8_5t(ptr %base, i32 %vl) { ; CHECK-LABEL: test_vlseg5_nxv4i16_triscv.vector.tuple_nxv8i8_5t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma -; CHECK-NEXT: vlseg5e16.v v7, (a0) +; CHECK-NEXT: vlseg5e16.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 5) @llvm.riscv.vlseg5.triscv.vector.tuple_nxv8i8_5t(target("riscv.vector.tuple", <vscale x 8 x i8>, 5) undef, ptr %base, i32 %vl, i32 4) - %1 = call <vscale x 4 x i16> @llvm.riscv.tuple.extract.nxv4i16.triscv.vector.tuple_nxv8i8_5t(target("riscv.vector.tuple", <vscale x 8 x i8>, 5) %0, i32 1) - ret <vscale x 4 x i16> %1 + ret target("riscv.vector.tuple", <vscale x 8 x i8>, 5) %0 } - -define <vscale x 4 x i16> @test_vlseg5_mask_nxv4i16_triscv.vector.tuple_nxv8i8_5t(ptr %base, i32 %vl, <vscale x 4 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 8 x i8>, 5) @test_vlseg5_mask_nxv4i16_triscv.vector.tuple_nxv8i8_5t(ptr %base, i32 %vl, <vscale x 4 x i1> %mask) { ; CHECK-LABEL: test_vlseg5_mask_nxv4i16_triscv.vector.tuple_nxv8i8_5t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma -; CHECK-NEXT: vlseg5e16.v v7, (a0), v0.t +; CHECK-NEXT: vlseg5e16.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 5) @llvm.riscv.vlseg5.mask.triscv.vector.tuple_nxv8i8_5t.nxv4i1(target("riscv.vector.tuple", <vscale x 8 x i8>, 5) undef, ptr %base, <vscale x 4 x i1> %mask, i32 %vl, i32 1, i32 4) - %1 = call <vscale x 4 x i16> @llvm.riscv.tuple.extract.nxv4i16.triscv.vector.tuple_nxv8i8_5t(target("riscv.vector.tuple", <vscale x 8 x i8>, 5) %0, i32 1) - ret <vscale x 4 x i16> %1 + ret target("riscv.vector.tuple", <vscale x 8 x i8>, 5) %0 } - -declare target("riscv.vector.tuple", <vscale x 2 x i8>, 6) @llvm.riscv.vlseg6.mask.triscv.vector.tuple_nxv2i8_6t.nxv1i1(target("riscv.vector.tuple", <vscale x 2 x i8>, 6), ptr, <vscale x 1 x i1>, i32, i32, i32) - -define <vscale x 1 x i16> @test_vlseg6_nxv1i16_triscv.vector.tuple_nxv2i8_6t(ptr %base, i32 %vl) { +define target("riscv.vector.tuple", <vscale x 2 x i8>, 6) @test_vlseg6_nxv1i16_triscv.vector.tuple_nxv2i8_6t(ptr %base, i32 %vl) { ; CHECK-LABEL: test_vlseg6_nxv1i16_triscv.vector.tuple_nxv2i8_6t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, ma -; CHECK-NEXT: vlseg6e16.v v7, (a0) +; CHECK-NEXT: vlseg6e16.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 2 x i8>, 6) @llvm.riscv.vlseg6.triscv.vector.tuple_nxv2i8_6t(target("riscv.vector.tuple", <vscale x 2 x i8>, 6) undef, ptr %base, i32 %vl, i32 4) - %1 = call <vscale x 1 x i16> @llvm.riscv.tuple.extract.nxv1i16.triscv.vector.tuple_nxv2i8_6t(target("riscv.vector.tuple", <vscale x 2 x i8>, 6) %0, i32 1) - ret <vscale x 1 x i16> %1 + ret target("riscv.vector.tuple", <vscale x 2 x i8>, 6) %0 } - -define <vscale x 1 x i16> @test_vlseg6_mask_nxv1i16_triscv.vector.tuple_nxv2i8_6t(ptr %base, i32 %vl, <vscale x 1 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 2 x i8>, 6) @test_vlseg6_mask_nxv1i16_triscv.vector.tuple_nxv2i8_6t(ptr %base, i32 %vl, <vscale x 1 x i1> %mask) { ; CHECK-LABEL: test_vlseg6_mask_nxv1i16_triscv.vector.tuple_nxv2i8_6t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, ma -; CHECK-NEXT: vlseg6e16.v v7, (a0), v0.t +; CHECK-NEXT: vlseg6e16.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 2 x i8>, 6) @llvm.riscv.vlseg6.mask.triscv.vector.tuple_nxv2i8_6t.nxv1i1(target("riscv.vector.tuple", <vscale x 2 x i8>, 6) undef, ptr %base, <vscale x 1 x i1> %mask, i32 %vl, i32 1, i32 4) - %1 = call <vscale x 1 x i16> @llvm.riscv.tuple.extract.nxv1i16.triscv.vector.tuple_nxv2i8_6t(target("riscv.vector.tuple", <vscale x 2 x i8>, 6) %0, i32 1) - ret <vscale x 1 x i16> %1 + ret target("riscv.vector.tuple", <vscale x 2 x i8>, 6) %0 } - -declare target("riscv.vector.tuple", <vscale x 4 x i8>, 6) @llvm.riscv.vlseg6.mask.triscv.vector.tuple_nxv4i8_6t.nxv2i1(target("riscv.vector.tuple", <vscale x 4 x i8>, 6), ptr, <vscale x 2 x i1>, i32, i32, i32) - -define <vscale x 2 x i16> @test_vlseg6_nxv2i16_triscv.vector.tuple_nxv4i8_6t(ptr %base, i32 %vl) { +define target("riscv.vector.tuple", <vscale x 4 x i8>, 6) @test_vlseg6_nxv2i16_triscv.vector.tuple_nxv4i8_6t(ptr %base, i32 %vl) { ; CHECK-LABEL: test_vlseg6_nxv2i16_triscv.vector.tuple_nxv4i8_6t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, ma -; CHECK-NEXT: vlseg6e16.v v7, (a0) +; CHECK-NEXT: vlseg6e16.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 6) @llvm.riscv.vlseg6.triscv.vector.tuple_nxv4i8_6t(target("riscv.vector.tuple", <vscale x 4 x i8>, 6) undef, ptr %base, i32 %vl, i32 4) - %1 = call <vscale x 2 x i16> @llvm.riscv.tuple.extract.nxv2i16.triscv.vector.tuple_nxv4i8_6t(target("riscv.vector.tuple", <vscale x 4 x i8>, 6) %0, i32 1) - ret <vscale x 2 x i16> %1 + ret target("riscv.vector.tuple", <vscale x 4 x i8>, 6) %0 } - -define <vscale x 2 x i16> @test_vlseg6_mask_nxv2i16_triscv.vector.tuple_nxv4i8_6t(ptr %base, i32 %vl, <vscale x 2 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 4 x i8>, 6) @test_vlseg6_mask_nxv2i16_triscv.vector.tuple_nxv4i8_6t(ptr %base, i32 %vl, <vscale x 2 x i1> %mask) { ; CHECK-LABEL: test_vlseg6_mask_nxv2i16_triscv.vector.tuple_nxv4i8_6t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, ma -; CHECK-NEXT: vlseg6e16.v v7, (a0), v0.t +; CHECK-NEXT: vlseg6e16.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 6) @llvm.riscv.vlseg6.mask.triscv.vector.tuple_nxv4i8_6t.nxv2i1(target("riscv.vector.tuple", <vscale x 4 x i8>, 6) undef, ptr %base, <vscale x 2 x i1> %mask, i32 %vl, i32 1, i32 4) - %1 = call <vscale x 2 x i16> @llvm.riscv.tuple.extract.nxv2i16.triscv.vector.tuple_nxv4i8_6t(target("riscv.vector.tuple", <vscale x 4 x i8>, 6) %0, i32 1) - ret <vscale x 2 x i16> %1 + ret target("riscv.vector.tuple", <vscale x 4 x i8>, 6) %0 } - -declare target("riscv.vector.tuple", <vscale x 8 x i8>, 6) @llvm.riscv.vlseg6.mask.triscv.vector.tuple_nxv8i8_6t.nxv4i1(target("riscv.vector.tuple", <vscale x 8 x i8>, 6), ptr, <vscale x 4 x i1>, i32, i32, i32) - -define <vscale x 4 x i16> @test_vlseg6_nxv4i16_triscv.vector.tuple_nxv8i8_6t(ptr %base, i32 %vl) { +define target("riscv.vector.tuple", <vscale x 8 x i8>, 6) @test_vlseg6_nxv4i16_triscv.vector.tuple_nxv8i8_6t(ptr %base, i32 %vl) { ; CHECK-LABEL: test_vlseg6_nxv4i16_triscv.vector.tuple_nxv8i8_6t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma -; CHECK-NEXT: vlseg6e16.v v7, (a0) +; CHECK-NEXT: vlseg6e16.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 6) @llvm.riscv.vlseg6.triscv.vector.tuple_nxv8i8_6t(target("riscv.vector.tuple", <vscale x 8 x i8>, 6) undef, ptr %base, i32 %vl, i32 4) - %1 = call <vscale x 4 x i16> @llvm.riscv.tuple.extract.nxv4i16.triscv.vector.tuple_nxv8i8_6t(target("riscv.vector.tuple", <vscale x 8 x i8>, 6) %0, i32 1) - ret <vscale x 4 x i16> %1 + ret target("riscv.vector.tuple", <vscale x 8 x i8>, 6) %0 } - -define <vscale x 4 x i16> @test_vlseg6_mask_nxv4i16_triscv.vector.tuple_nxv8i8_6t(ptr %base, i32 %vl, <vscale x 4 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 8 x i8>, 6) @test_vlseg6_mask_nxv4i16_triscv.vector.tuple_nxv8i8_6t(ptr %base, i32 %vl, <vscale x 4 x i1> %mask) { ; CHECK-LABEL: test_vlseg6_mask_nxv4i16_triscv.vector.tuple_nxv8i8_6t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma -; CHECK-NEXT: vlseg6e16.v v7, (a0), v0.t +; CHECK-NEXT: vlseg6e16.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 6) @llvm.riscv.vlseg6.mask.triscv.vector.tuple_nxv8i8_6t.nxv4i1(target("riscv.vector.tuple", <vscale x 8 x i8>, 6) undef, ptr %base, <vscale x 4 x i1> %mask, i32 %vl, i32 1, i32 4) - %1 = call <vscale x 4 x i16> @llvm.riscv.tuple.extract.nxv4i16.triscv.vector.tuple_nxv8i8_6t(target("riscv.vector.tuple", <vscale x 8 x i8>, 6) %0, i32 1) - ret <vscale x 4 x i16> %1 + ret target("riscv.vector.tuple", <vscale x 8 x i8>, 6) %0 } - -declare target("riscv.vector.tuple", <vscale x 2 x i8>, 7) @llvm.riscv.vlseg7.mask.triscv.vector.tuple_nxv2i8_7t.nxv1i1(target("riscv.vector.tuple", <vscale x 2 x i8>, 7), ptr, <vscale x 1 x i1>, i32, i32, i32) - -define <vscale x 1 x i16> @test_vlseg7_nxv1i16_triscv.vector.tuple_nxv2i8_7t(ptr %base, i32 %vl) { +define target("riscv.vector.tuple", <vscale x 2 x i8>, 7) @test_vlseg7_nxv1i16_triscv.vector.tuple_nxv2i8_7t(ptr %base, i32 %vl) { ; CHECK-LABEL: test_vlseg7_nxv1i16_triscv.vector.tuple_nxv2i8_7t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, ma -; CHECK-NEXT: vlseg7e16.v v7, (a0) +; CHECK-NEXT: vlseg7e16.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 2 x i8>, 7) @llvm.riscv.vlseg7.triscv.vector.tuple_nxv2i8_7t(target("riscv.vector.tuple", <vscale x 2 x i8>, 7) undef, ptr %base, i32 %vl, i32 4) - %1 = call <vscale x 1 x i16> @llvm.riscv.tuple.extract.nxv1i16.triscv.vector.tuple_nxv2i8_7t(target("riscv.vector.tuple", <vscale x 2 x i8>, 7) %0, i32 1) - ret <vscale x 1 x i16> %1 + ret target("riscv.vector.tuple", <vscale x 2 x i8>, 7) %0 } - -define <vscale x 1 x i16> @test_vlseg7_mask_nxv1i16_triscv.vector.tuple_nxv2i8_7t(ptr %base, i32 %vl, <vscale x 1 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 2 x i8>, 7) @test_vlseg7_mask_nxv1i16_triscv.vector.tuple_nxv2i8_7t(ptr %base, i32 %vl, <vscale x 1 x i1> %mask) { ; CHECK-LABEL: test_vlseg7_mask_nxv1i16_triscv.vector.tuple_nxv2i8_7t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, ma -; CHECK-NEXT: vlseg7e16.v v7, (a0), v0.t +; CHECK-NEXT: vlseg7e16.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 2 x i8>, 7) @llvm.riscv.vlseg7.mask.triscv.vector.tuple_nxv2i8_7t.nxv1i1(target("riscv.vector.tuple", <vscale x 2 x i8>, 7) undef, ptr %base, <vscale x 1 x i1> %mask, i32 %vl, i32 1, i32 4) - %1 = call <vscale x 1 x i16> @llvm.riscv.tuple.extract.nxv1i16.triscv.vector.tuple_nxv2i8_7t(target("riscv.vector.tuple", <vscale x 2 x i8>, 7) %0, i32 1) - ret <vscale x 1 x i16> %1 + ret target("riscv.vector.tuple", <vscale x 2 x i8>, 7) %0 } - -declare target("riscv.vector.tuple", <vscale x 4 x i8>, 7) @llvm.riscv.vlseg7.mask.triscv.vector.tuple_nxv4i8_7t.nxv2i1(target("riscv.vector.tuple", <vscale x 4 x i8>, 7), ptr, <vscale x 2 x i1>, i32, i32, i32) - -define <vscale x 2 x i16> @test_vlseg7_nxv2i16_triscv.vector.tuple_nxv4i8_7t(ptr %base, i32 %vl) { +define target("riscv.vector.tuple", <vscale x 4 x i8>, 7) @test_vlseg7_nxv2i16_triscv.vector.tuple_nxv4i8_7t(ptr %base, i32 %vl) { ; CHECK-LABEL: test_vlseg7_nxv2i16_triscv.vector.tuple_nxv4i8_7t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, ma -; CHECK-NEXT: vlseg7e16.v v7, (a0) +; CHECK-NEXT: vlseg7e16.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 7) @llvm.riscv.vlseg7.triscv.vector.tuple_nxv4i8_7t(target("riscv.vector.tuple", <vscale x 4 x i8>, 7) undef, ptr %base, i32 %vl, i32 4) - %1 = call <vscale x 2 x i16> @llvm.riscv.tuple.extract.nxv2i16.triscv.vector.tuple_nxv4i8_7t(target("riscv.vector.tuple", <vscale x 4 x i8>, 7) %0, i32 1) - ret <vscale x 2 x i16> %1 + ret target("riscv.vector.tuple", <vscale x 4 x i8>, 7) %0 } - -define <vscale x 2 x i16> @test_vlseg7_mask_nxv2i16_triscv.vector.tuple_nxv4i8_7t(ptr %base, i32 %vl, <vscale x 2 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 4 x i8>, 7) @test_vlseg7_mask_nxv2i16_triscv.vector.tuple_nxv4i8_7t(ptr %base, i32 %vl, <vscale x 2 x i1> %mask) { ; CHECK-LABEL: test_vlseg7_mask_nxv2i16_triscv.vector.tuple_nxv4i8_7t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, ma -; CHECK-NEXT: vlseg7e16.v v7, (a0), v0.t +; CHECK-NEXT: vlseg7e16.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 7) @llvm.riscv.vlseg7.mask.triscv.vector.tuple_nxv4i8_7t.nxv2i1(target("riscv.vector.tuple", <vscale x 4 x i8>, 7) undef, ptr %base, <vscale x 2 x i1> %mask, i32 %vl, i32 1, i32 4) - %1 = call <vscale x 2 x i16> @llvm.riscv.tuple.extract.nxv2i16.triscv.vector.tuple_nxv4i8_7t(target("riscv.vector.tuple", <vscale x 4 x i8>, 7) %0, i32 1) - ret <vscale x 2 x i16> %1 + ret target("riscv.vector.tuple", <vscale x 4 x i8>, 7) %0 } - -declare target("riscv.vector.tuple", <vscale x 8 x i8>, 7) @llvm.riscv.vlseg7.mask.triscv.vector.tuple_nxv8i8_7t.nxv4i1(target("riscv.vector.tuple", <vscale x 8 x i8>, 7), ptr, <vscale x 4 x i1>, i32, i32, i32) - -define <vscale x 4 x i16> @test_vlseg7_nxv4i16_triscv.vector.tuple_nxv8i8_7t(ptr %base, i32 %vl) { +define target("riscv.vector.tuple", <vscale x 8 x i8>, 7) @test_vlseg7_nxv4i16_triscv.vector.tuple_nxv8i8_7t(ptr %base, i32 %vl) { ; CHECK-LABEL: test_vlseg7_nxv4i16_triscv.vector.tuple_nxv8i8_7t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma -; CHECK-NEXT: vlseg7e16.v v7, (a0) +; CHECK-NEXT: vlseg7e16.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 7) @llvm.riscv.vlseg7.triscv.vector.tuple_nxv8i8_7t(target("riscv.vector.tuple", <vscale x 8 x i8>, 7) undef, ptr %base, i32 %vl, i32 4) - %1 = call <vscale x 4 x i16> @llvm.riscv.tuple.extract.nxv4i16.triscv.vector.tuple_nxv8i8_7t(target("riscv.vector.tuple", <vscale x 8 x i8>, 7) %0, i32 1) - ret <vscale x 4 x i16> %1 + ret target("riscv.vector.tuple", <vscale x 8 x i8>, 7) %0 } - -define <vscale x 4 x i16> @test_vlseg7_mask_nxv4i16_triscv.vector.tuple_nxv8i8_7t(ptr %base, i32 %vl, <vscale x 4 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 8 x i8>, 7) @test_vlseg7_mask_nxv4i16_triscv.vector.tuple_nxv8i8_7t(ptr %base, i32 %vl, <vscale x 4 x i1> %mask) { ; CHECK-LABEL: test_vlseg7_mask_nxv4i16_triscv.vector.tuple_nxv8i8_7t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma -; CHECK-NEXT: vlseg7e16.v v7, (a0), v0.t +; CHECK-NEXT: vlseg7e16.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 7) @llvm.riscv.vlseg7.mask.triscv.vector.tuple_nxv8i8_7t.nxv4i1(target("riscv.vector.tuple", <vscale x 8 x i8>, 7) undef, ptr %base, <vscale x 4 x i1> %mask, i32 %vl, i32 1, i32 4) - %1 = call <vscale x 4 x i16> @llvm.riscv.tuple.extract.nxv4i16.triscv.vector.tuple_nxv8i8_7t(target("riscv.vector.tuple", <vscale x 8 x i8>, 7) %0, i32 1) - ret <vscale x 4 x i16> %1 + ret target("riscv.vector.tuple", <vscale x 8 x i8>, 7) %0 } - -declare target("riscv.vector.tuple", <vscale x 2 x i8>, 8) @llvm.riscv.vlseg8.mask.triscv.vector.tuple_nxv2i8_8t.nxv1i1(target("riscv.vector.tuple", <vscale x 2 x i8>, 8), ptr, <vscale x 1 x i1>, i32, i32, i32) - -define <vscale x 1 x i16> @test_vlseg8_nxv1i16_triscv.vector.tuple_nxv2i8_8t(ptr %base, i32 %vl) { +define target("riscv.vector.tuple", <vscale x 2 x i8>, 8) @test_vlseg8_nxv1i16_triscv.vector.tuple_nxv2i8_8t(ptr %base, i32 %vl) { ; CHECK-LABEL: test_vlseg8_nxv1i16_triscv.vector.tuple_nxv2i8_8t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, ma -; CHECK-NEXT: vlseg8e16.v v7, (a0) +; CHECK-NEXT: vlseg8e16.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 2 x i8>, 8) @llvm.riscv.vlseg8.triscv.vector.tuple_nxv2i8_8t(target("riscv.vector.tuple", <vscale x 2 x i8>, 8) undef, ptr %base, i32 %vl, i32 4) - %1 = call <vscale x 1 x i16> @llvm.riscv.tuple.extract.nxv1i16.triscv.vector.tuple_nxv2i8_8t(target("riscv.vector.tuple", <vscale x 2 x i8>, 8) %0, i32 1) - ret <vscale x 1 x i16> %1 + ret target("riscv.vector.tuple", <vscale x 2 x i8>, 8) %0 } - -define <vscale x 1 x i16> @test_vlseg8_mask_nxv1i16_triscv.vector.tuple_nxv2i8_8t(ptr %base, i32 %vl, <vscale x 1 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 2 x i8>, 8) @test_vlseg8_mask_nxv1i16_triscv.vector.tuple_nxv2i8_8t(ptr %base, i32 %vl, <vscale x 1 x i1> %mask) { ; CHECK-LABEL: test_vlseg8_mask_nxv1i16_triscv.vector.tuple_nxv2i8_8t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, ma -; CHECK-NEXT: vlseg8e16.v v7, (a0), v0.t +; CHECK-NEXT: vlseg8e16.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 2 x i8>, 8) @llvm.riscv.vlseg8.mask.triscv.vector.tuple_nxv2i8_8t.nxv1i1(target("riscv.vector.tuple", <vscale x 2 x i8>, 8) undef, ptr %base, <vscale x 1 x i1> %mask, i32 %vl, i32 1, i32 4) - %1 = call <vscale x 1 x i16> @llvm.riscv.tuple.extract.nxv1i16.triscv.vector.tuple_nxv2i8_8t(target("riscv.vector.tuple", <vscale x 2 x i8>, 8) %0, i32 1) - ret <vscale x 1 x i16> %1 + ret target("riscv.vector.tuple", <vscale x 2 x i8>, 8) %0 } - -declare target("riscv.vector.tuple", <vscale x 4 x i8>, 8) @llvm.riscv.vlseg8.mask.triscv.vector.tuple_nxv4i8_8t.nxv2i1(target("riscv.vector.tuple", <vscale x 4 x i8>, 8), ptr, <vscale x 2 x i1>, i32, i32, i32) - -define <vscale x 2 x i16> @test_vlseg8_nxv2i16_triscv.vector.tuple_nxv4i8_8t(ptr %base, i32 %vl) { +define target("riscv.vector.tuple", <vscale x 4 x i8>, 8) @test_vlseg8_nxv2i16_triscv.vector.tuple_nxv4i8_8t(ptr %base, i32 %vl) { ; CHECK-LABEL: test_vlseg8_nxv2i16_triscv.vector.tuple_nxv4i8_8t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, ma -; CHECK-NEXT: vlseg8e16.v v7, (a0) +; CHECK-NEXT: vlseg8e16.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 8) @llvm.riscv.vlseg8.triscv.vector.tuple_nxv4i8_8t(target("riscv.vector.tuple", <vscale x 4 x i8>, 8) undef, ptr %base, i32 %vl, i32 4) - %1 = call <vscale x 2 x i16> @llvm.riscv.tuple.extract.nxv2i16.triscv.vector.tuple_nxv4i8_8t(target("riscv.vector.tuple", <vscale x 4 x i8>, 8) %0, i32 1) - ret <vscale x 2 x i16> %1 + ret target("riscv.vector.tuple", <vscale x 4 x i8>, 8) %0 } - -define <vscale x 2 x i16> @test_vlseg8_mask_nxv2i16_triscv.vector.tuple_nxv4i8_8t(ptr %base, i32 %vl, <vscale x 2 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 4 x i8>, 8) @test_vlseg8_mask_nxv2i16_triscv.vector.tuple_nxv4i8_8t(ptr %base, i32 %vl, <vscale x 2 x i1> %mask) { ; CHECK-LABEL: test_vlseg8_mask_nxv2i16_triscv.vector.tuple_nxv4i8_8t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, ma -; CHECK-NEXT: vlseg8e16.v v7, (a0), v0.t +; CHECK-NEXT: vlseg8e16.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 8) @llvm.riscv.vlseg8.mask.triscv.vector.tuple_nxv4i8_8t.nxv2i1(target("riscv.vector.tuple", <vscale x 4 x i8>, 8) undef, ptr %base, <vscale x 2 x i1> %mask, i32 %vl, i32 1, i32 4) - %1 = call <vscale x 2 x i16> @llvm.riscv.tuple.extract.nxv2i16.triscv.vector.tuple_nxv4i8_8t(target("riscv.vector.tuple", <vscale x 4 x i8>, 8) %0, i32 1) - ret <vscale x 2 x i16> %1 + ret target("riscv.vector.tuple", <vscale x 4 x i8>, 8) %0 } - -declare target("riscv.vector.tuple", <vscale x 8 x i8>, 8) @llvm.riscv.vlseg8.mask.triscv.vector.tuple_nxv8i8_8t.nxv4i1(target("riscv.vector.tuple", <vscale x 8 x i8>, 8), ptr, <vscale x 4 x i1>, i32, i32, i32) - -define <vscale x 4 x i16> @test_vlseg8_nxv4i16_triscv.vector.tuple_nxv8i8_8t(ptr %base, i32 %vl) { +define target("riscv.vector.tuple", <vscale x 8 x i8>, 8) @test_vlseg8_nxv4i16_triscv.vector.tuple_nxv8i8_8t(ptr %base, i32 %vl) { ; CHECK-LABEL: test_vlseg8_nxv4i16_triscv.vector.tuple_nxv8i8_8t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma -; CHECK-NEXT: vlseg8e16.v v7, (a0) +; CHECK-NEXT: vlseg8e16.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 8) @llvm.riscv.vlseg8.triscv.vector.tuple_nxv8i8_8t(target("riscv.vector.tuple", <vscale x 8 x i8>, 8) undef, ptr %base, i32 %vl, i32 4) - %1 = call <vscale x 4 x i16> @llvm.riscv.tuple.extract.nxv4i16.triscv.vector.tuple_nxv8i8_8t(target("riscv.vector.tuple", <vscale x 8 x i8>, 8) %0, i32 1) - ret <vscale x 4 x i16> %1 + ret target("riscv.vector.tuple", <vscale x 8 x i8>, 8) %0 } - -define <vscale x 4 x i16> @test_vlseg8_mask_nxv4i16_triscv.vector.tuple_nxv8i8_8t(ptr %base, i32 %vl, <vscale x 4 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 8 x i8>, 8) @test_vlseg8_mask_nxv4i16_triscv.vector.tuple_nxv8i8_8t(ptr %base, i32 %vl, <vscale x 4 x i1> %mask) { ; CHECK-LABEL: test_vlseg8_mask_nxv4i16_triscv.vector.tuple_nxv8i8_8t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma -; CHECK-NEXT: vlseg8e16.v v7, (a0), v0.t +; CHECK-NEXT: vlseg8e16.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 8) @llvm.riscv.vlseg8.mask.triscv.vector.tuple_nxv8i8_8t.nxv4i1(target("riscv.vector.tuple", <vscale x 8 x i8>, 8) undef, ptr %base, <vscale x 4 x i1> %mask, i32 %vl, i32 1, i32 4) - %1 = call <vscale x 4 x i16> @llvm.riscv.tuple.extract.nxv4i16.triscv.vector.tuple_nxv8i8_8t(target("riscv.vector.tuple", <vscale x 8 x i8>, 8) %0, i32 1) - ret <vscale x 4 x i16> %1 + ret target("riscv.vector.tuple", <vscale x 8 x i8>, 8) %0 } - -declare target("riscv.vector.tuple", <vscale x 4 x i8>, 2) @llvm.riscv.vlseg2.mask.triscv.vector.tuple_nxv4i8_2t.nxv1i1(target("riscv.vector.tuple", <vscale x 4 x i8>, 2), ptr, <vscale x 1 x i1>, i32, i32, i32) - -define <vscale x 1 x i32> @test_vlseg2_nxv1i32_triscv.vector.tuple_nxv4i8_2t(ptr %base, i32 %vl) { +define target("riscv.vector.tuple", <vscale x 4 x i8>, 2) @test_vlseg2_nxv1i32_triscv.vector.tuple_nxv4i8_2t(ptr %base, i32 %vl) { ; CHECK-LABEL: test_vlseg2_nxv1i32_triscv.vector.tuple_nxv4i8_2t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, ma -; CHECK-NEXT: vlseg2e32.v v7, (a0) +; CHECK-NEXT: vlseg2e32.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 2) @llvm.riscv.vlseg2.triscv.vector.tuple_nxv4i8_2t(target("riscv.vector.tuple", <vscale x 4 x i8>, 2) undef, ptr %base, i32 %vl, i32 5) - %1 = call <vscale x 1 x i32> @llvm.riscv.tuple.extract.nxv1i32.triscv.vector.tuple_nxv4i8_2t(target("riscv.vector.tuple", <vscale x 4 x i8>, 2) %0, i32 1) - ret <vscale x 1 x i32> %1 + ret target("riscv.vector.tuple", <vscale x 4 x i8>, 2) %0 } - -define <vscale x 1 x i32> @test_vlseg2_mask_nxv1i32_triscv.vector.tuple_nxv4i8_2t(ptr %base, i32 %vl, <vscale x 1 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 4 x i8>, 2) @test_vlseg2_mask_nxv1i32_triscv.vector.tuple_nxv4i8_2t(ptr %base, i32 %vl, <vscale x 1 x i1> %mask) { ; CHECK-LABEL: test_vlseg2_mask_nxv1i32_triscv.vector.tuple_nxv4i8_2t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, ma -; CHECK-NEXT: vlseg2e32.v v7, (a0), v0.t +; CHECK-NEXT: vlseg2e32.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 2) @llvm.riscv.vlseg2.mask.triscv.vector.tuple_nxv4i8_2t.nxv1i1(target("riscv.vector.tuple", <vscale x 4 x i8>, 2) undef, ptr %base, <vscale x 1 x i1> %mask, i32 %vl, i32 1, i32 5) - %1 = call <vscale x 1 x i32> @llvm.riscv.tuple.extract.nxv1i32.triscv.vector.tuple_nxv4i8_2t(target("riscv.vector.tuple", <vscale x 4 x i8>, 2) %0, i32 1) - ret <vscale x 1 x i32> %1 + ret target("riscv.vector.tuple", <vscale x 4 x i8>, 2) %0 } - -declare target("riscv.vector.tuple", <vscale x 8 x i8>, 2) @llvm.riscv.vlseg2.mask.triscv.vector.tuple_nxv8i8_2t.nxv2i1(target("riscv.vector.tuple", <vscale x 8 x i8>, 2), ptr, <vscale x 2 x i1>, i32, i32, i32) - -define <vscale x 2 x i32> @test_vlseg2_nxv2i32_triscv.vector.tuple_nxv8i8_2t(ptr %base, i32 %vl) { +define target("riscv.vector.tuple", <vscale x 8 x i8>, 2) @test_vlseg2_nxv2i32_triscv.vector.tuple_nxv8i8_2t(ptr %base, i32 %vl) { ; CHECK-LABEL: test_vlseg2_nxv2i32_triscv.vector.tuple_nxv8i8_2t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma -; CHECK-NEXT: vlseg2e32.v v7, (a0) +; CHECK-NEXT: vlseg2e32.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 2) @llvm.riscv.vlseg2.triscv.vector.tuple_nxv8i8_2t(target("riscv.vector.tuple", <vscale x 8 x i8>, 2) undef, ptr %base, i32 %vl, i32 5) - %1 = call <vscale x 2 x i32> @llvm.riscv.tuple.extract.nxv2i32.triscv.vector.tuple_nxv8i8_2t(target("riscv.vector.tuple", <vscale x 8 x i8>, 2) %0, i32 1) - ret <vscale x 2 x i32> %1 + ret target("riscv.vector.tuple", <vscale x 8 x i8>, 2) %0 } - -define <vscale x 2 x i32> @test_vlseg2_mask_nxv2i32_triscv.vector.tuple_nxv8i8_2t(ptr %base, i32 %vl, <vscale x 2 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 8 x i8>, 2) @test_vlseg2_mask_nxv2i32_triscv.vector.tuple_nxv8i8_2t(ptr %base, i32 %vl, <vscale x 2 x i1> %mask) { ; CHECK-LABEL: test_vlseg2_mask_nxv2i32_triscv.vector.tuple_nxv8i8_2t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma -; CHECK-NEXT: vlseg2e32.v v7, (a0), v0.t +; CHECK-NEXT: vlseg2e32.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 2) @llvm.riscv.vlseg2.mask.triscv.vector.tuple_nxv8i8_2t.nxv2i1(target("riscv.vector.tuple", <vscale x 8 x i8>, 2) undef, ptr %base, <vscale x 2 x i1> %mask, i32 %vl, i32 1, i32 5) - %1 = call <vscale x 2 x i32> @llvm.riscv.tuple.extract.nxv2i32.triscv.vector.tuple_nxv8i8_2t(target("riscv.vector.tuple", <vscale x 8 x i8>, 2) %0, i32 1) - ret <vscale x 2 x i32> %1 + ret target("riscv.vector.tuple", <vscale x 8 x i8>, 2) %0 } - -declare target("riscv.vector.tuple", <vscale x 16 x i8>, 2) @llvm.riscv.vlseg2.mask.triscv.vector.tuple_nxv16i8_2t.nxv4i1(target("riscv.vector.tuple", <vscale x 16 x i8>, 2), ptr, <vscale x 4 x i1>, i32, i32, i32) - -define <vscale x 4 x i32> @test_vlseg2_nxv4i32_triscv.vector.tuple_nxv16i8_2t(ptr %base, i32 %vl) { +define target("riscv.vector.tuple", <vscale x 16 x i8>, 2) @test_vlseg2_nxv4i32_triscv.vector.tuple_nxv16i8_2t(ptr %base, i32 %vl) { ; CHECK-LABEL: test_vlseg2_nxv4i32_triscv.vector.tuple_nxv16i8_2t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, ma -; CHECK-NEXT: vlseg2e32.v v6, (a0) +; CHECK-NEXT: vlseg2e32.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 2) @llvm.riscv.vlseg2.triscv.vector.tuple_nxv16i8_2t(target("riscv.vector.tuple", <vscale x 16 x i8>, 2) undef, ptr %base, i32 %vl, i32 5) - %1 = call <vscale x 4 x i32> @llvm.riscv.tuple.extract.nxv4i32.triscv.vector.tuple_nxv16i8_2t(target("riscv.vector.tuple", <vscale x 16 x i8>, 2) %0, i32 1) - ret <vscale x 4 x i32> %1 + ret target("riscv.vector.tuple", <vscale x 16 x i8>, 2) %0 } - -define <vscale x 4 x i32> @test_vlseg2_mask_nxv4i32_triscv.vector.tuple_nxv16i8_2t(ptr %base, i32 %vl, <vscale x 4 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 16 x i8>, 2) @test_vlseg2_mask_nxv4i32_triscv.vector.tuple_nxv16i8_2t(ptr %base, i32 %vl, <vscale x 4 x i1> %mask) { ; CHECK-LABEL: test_vlseg2_mask_nxv4i32_triscv.vector.tuple_nxv16i8_2t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, ma -; CHECK-NEXT: vlseg2e32.v v6, (a0), v0.t +; CHECK-NEXT: vlseg2e32.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 2) @llvm.riscv.vlseg2.mask.triscv.vector.tuple_nxv16i8_2t.nxv4i1(target("riscv.vector.tuple", <vscale x 16 x i8>, 2) undef, ptr %base, <vscale x 4 x i1> %mask, i32 %vl, i32 1, i32 5) - %1 = call <vscale x 4 x i32> @llvm.riscv.tuple.extract.nxv4i32.triscv.vector.tuple_nxv16i8_2t(target("riscv.vector.tuple", <vscale x 16 x i8>, 2) %0, i32 1) - ret <vscale x 4 x i32> %1 + ret target("riscv.vector.tuple", <vscale x 16 x i8>, 2) %0 } - -declare target("riscv.vector.tuple", <vscale x 32 x i8>, 2) @llvm.riscv.vlseg2.mask.triscv.vector.tuple_nxv32i8_2t.nxv8i1(target("riscv.vector.tuple", <vscale x 32 x i8>, 2), ptr, <vscale x 8 x i1>, i32, i32, i32) - -define <vscale x 8 x i32> @test_vlseg2_nxv8i32_triscv.vector.tuple_nxv32i8_2t(ptr %base, i32 %vl) { +define target("riscv.vector.tuple", <vscale x 32 x i8>, 2) @test_vlseg2_nxv8i32_triscv.vector.tuple_nxv32i8_2t(ptr %base, i32 %vl) { ; CHECK-LABEL: test_vlseg2_nxv8i32_triscv.vector.tuple_nxv32i8_2t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, ma -; CHECK-NEXT: vlseg2e32.v v4, (a0) +; CHECK-NEXT: vlseg2e32.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 32 x i8>, 2) @llvm.riscv.vlseg2.triscv.vector.tuple_nxv32i8_2t(target("riscv.vector.tuple", <vscale x 32 x i8>, 2) undef, ptr %base, i32 %vl, i32 5) - %1 = call <vscale x 8 x i32> @llvm.riscv.tuple.extract.nxv8i32.triscv.vector.tuple_nxv32i8_2t(target("riscv.vector.tuple", <vscale x 32 x i8>, 2) %0, i32 1) - ret <vscale x 8 x i32> %1 + ret target("riscv.vector.tuple", <vscale x 32 x i8>, 2) %0 } - -define <vscale x 8 x i32> @test_vlseg2_mask_nxv8i32_triscv.vector.tuple_nxv32i8_2t(ptr %base, i32 %vl, <vscale x 8 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 32 x i8>, 2) @test_vlseg2_mask_nxv8i32_triscv.vector.tuple_nxv32i8_2t(ptr %base, i32 %vl, <vscale x 8 x i1> %mask) { ; CHECK-LABEL: test_vlseg2_mask_nxv8i32_triscv.vector.tuple_nxv32i8_2t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, ma -; CHECK-NEXT: vlseg2e32.v v4, (a0), v0.t +; CHECK-NEXT: vlseg2e32.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 32 x i8>, 2) @llvm.riscv.vlseg2.mask.triscv.vector.tuple_nxv32i8_2t.nxv8i1(target("riscv.vector.tuple", <vscale x 32 x i8>, 2) undef, ptr %base, <vscale x 8 x i1> %mask, i32 %vl, i32 1, i32 5) - %1 = call <vscale x 8 x i32> @llvm.riscv.tuple.extract.nxv8i32.triscv.vector.tuple_nxv32i8_2t(target("riscv.vector.tuple", <vscale x 32 x i8>, 2) %0, i32 1) - ret <vscale x 8 x i32> %1 + ret target("riscv.vector.tuple", <vscale x 32 x i8>, 2) %0 } - -declare target("riscv.vector.tuple", <vscale x 4 x i8>, 3) @llvm.riscv.vlseg3.mask.triscv.vector.tuple_nxv4i8_3t.nxv1i1(target("riscv.vector.tuple", <vscale x 4 x i8>, 3), ptr, <vscale x 1 x i1>, i32, i32, i32) - -define <vscale x 1 x i32> @test_vlseg3_nxv1i32_triscv.vector.tuple_nxv4i8_3t(ptr %base, i32 %vl) { +define target("riscv.vector.tuple", <vscale x 4 x i8>, 3) @test_vlseg3_nxv1i32_triscv.vector.tuple_nxv4i8_3t(ptr %base, i32 %vl) { ; CHECK-LABEL: test_vlseg3_nxv1i32_triscv.vector.tuple_nxv4i8_3t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, ma -; CHECK-NEXT: vlseg3e32.v v7, (a0) +; CHECK-NEXT: vlseg3e32.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 3) @llvm.riscv.vlseg3.triscv.vector.tuple_nxv4i8_3t(target("riscv.vector.tuple", <vscale x 4 x i8>, 3) undef, ptr %base, i32 %vl, i32 5) - %1 = call <vscale x 1 x i32> @llvm.riscv.tuple.extract.nxv1i32.triscv.vector.tuple_nxv4i8_3t(target("riscv.vector.tuple", <vscale x 4 x i8>, 3) %0, i32 1) - ret <vscale x 1 x i32> %1 + ret target("riscv.vector.tuple", <vscale x 4 x i8>, 3) %0 } - -define <vscale x 1 x i32> @test_vlseg3_mask_nxv1i32_triscv.vector.tuple_nxv4i8_3t(ptr %base, i32 %vl, <vscale x 1 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 4 x i8>, 3) @test_vlseg3_mask_nxv1i32_triscv.vector.tuple_nxv4i8_3t(ptr %base, i32 %vl, <vscale x 1 x i1> %mask) { ; CHECK-LABEL: test_vlseg3_mask_nxv1i32_triscv.vector.tuple_nxv4i8_3t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, ma -; CHECK-NEXT: vlseg3e32.v v7, (a0), v0.t +; CHECK-NEXT: vlseg3e32.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 3) @llvm.riscv.vlseg3.mask.triscv.vector.tuple_nxv4i8_3t.nxv1i1(target("riscv.vector.tuple", <vscale x 4 x i8>, 3) undef, ptr %base, <vscale x 1 x i1> %mask, i32 %vl, i32 1, i32 5) - %1 = call <vscale x 1 x i32> @llvm.riscv.tuple.extract.nxv1i32.triscv.vector.tuple_nxv4i8_3t(target("riscv.vector.tuple", <vscale x 4 x i8>, 3) %0, i32 1) - ret <vscale x 1 x i32> %1 + ret target("riscv.vector.tuple", <vscale x 4 x i8>, 3) %0 } - -declare target("riscv.vector.tuple", <vscale x 8 x i8>, 3) @llvm.riscv.vlseg3.mask.triscv.vector.tuple_nxv8i8_3t.nxv2i1(target("riscv.vector.tuple", <vscale x 8 x i8>, 3), ptr, <vscale x 2 x i1>, i32, i32, i32) - -define <vscale x 2 x i32> @test_vlseg3_nxv2i32_triscv.vector.tuple_nxv8i8_3t(ptr %base, i32 %vl) { +define target("riscv.vector.tuple", <vscale x 8 x i8>, 3) @test_vlseg3_nxv2i32_triscv.vector.tuple_nxv8i8_3t(ptr %base, i32 %vl) { ; CHECK-LABEL: test_vlseg3_nxv2i32_triscv.vector.tuple_nxv8i8_3t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma -; CHECK-NEXT: vlseg3e32.v v7, (a0) +; CHECK-NEXT: vlseg3e32.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 3) @llvm.riscv.vlseg3.triscv.vector.tuple_nxv8i8_3t(target("riscv.vector.tuple", <vscale x 8 x i8>, 3) undef, ptr %base, i32 %vl, i32 5) - %1 = call <vscale x 2 x i32> @llvm.riscv.tuple.extract.nxv2i32.triscv.vector.tuple_nxv8i8_3t(target("riscv.vector.tuple", <vscale x 8 x i8>, 3) %0, i32 1) - ret <vscale x 2 x i32> %1 + ret target("riscv.vector.tuple", <vscale x 8 x i8>, 3) %0 } - -define <vscale x 2 x i32> @test_vlseg3_mask_nxv2i32_triscv.vector.tuple_nxv8i8_3t(ptr %base, i32 %vl, <vscale x 2 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 8 x i8>, 3) @test_vlseg3_mask_nxv2i32_triscv.vector.tuple_nxv8i8_3t(ptr %base, i32 %vl, <vscale x 2 x i1> %mask) { ; CHECK-LABEL: test_vlseg3_mask_nxv2i32_triscv.vector.tuple_nxv8i8_3t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma -; CHECK-NEXT: vlseg3e32.v v7, (a0), v0.t +; CHECK-NEXT: vlseg3e32.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 3) @llvm.riscv.vlseg3.mask.triscv.vector.tuple_nxv8i8_3t.nxv2i1(target("riscv.vector.tuple", <vscale x 8 x i8>, 3) undef, ptr %base, <vscale x 2 x i1> %mask, i32 %vl, i32 1, i32 5) - %1 = call <vscale x 2 x i32> @llvm.riscv.tuple.extract.nxv2i32.triscv.vector.tuple_nxv8i8_3t(target("riscv.vector.tuple", <vscale x 8 x i8>, 3) %0, i32 1) - ret <vscale x 2 x i32> %1 + ret target("riscv.vector.tuple", <vscale x 8 x i8>, 3) %0 } - -declare target("riscv.vector.tuple", <vscale x 16 x i8>, 3) @llvm.riscv.vlseg3.mask.triscv.vector.tuple_nxv16i8_3t.nxv4i1(target("riscv.vector.tuple", <vscale x 16 x i8>, 3), ptr, <vscale x 4 x i1>, i32, i32, i32) - -define <vscale x 4 x i32> @test_vlseg3_nxv4i32_triscv.vector.tuple_nxv16i8_3t(ptr %base, i32 %vl) { +define target("riscv.vector.tuple", <vscale x 16 x i8>, 3) @test_vlseg3_nxv4i32_triscv.vector.tuple_nxv16i8_3t(ptr %base, i32 %vl) { ; CHECK-LABEL: test_vlseg3_nxv4i32_triscv.vector.tuple_nxv16i8_3t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, ma -; CHECK-NEXT: vlseg3e32.v v6, (a0) +; CHECK-NEXT: vlseg3e32.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 3) @llvm.riscv.vlseg3.triscv.vector.tuple_nxv16i8_3t(target("riscv.vector.tuple", <vscale x 16 x i8>, 3) undef, ptr %base, i32 %vl, i32 5) - %1 = call <vscale x 4 x i32> @llvm.riscv.tuple.extract.nxv4i32.triscv.vector.tuple_nxv16i8_3t(target("riscv.vector.tuple", <vscale x 16 x i8>, 3) %0, i32 1) - ret <vscale x 4 x i32> %1 + ret target("riscv.vector.tuple", <vscale x 16 x i8>, 3) %0 } - -define <vscale x 4 x i32> @test_vlseg3_mask_nxv4i32_triscv.vector.tuple_nxv16i8_3t(ptr %base, i32 %vl, <vscale x 4 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 16 x i8>, 3) @test_vlseg3_mask_nxv4i32_triscv.vector.tuple_nxv16i8_3t(ptr %base, i32 %vl, <vscale x 4 x i1> %mask) { ; CHECK-LABEL: test_vlseg3_mask_nxv4i32_triscv.vector.tuple_nxv16i8_3t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, ma -; CHECK-NEXT: vlseg3e32.v v6, (a0), v0.t +; CHECK-NEXT: vlseg3e32.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 3) @llvm.riscv.vlseg3.mask.triscv.vector.tuple_nxv16i8_3t.nxv4i1(target("riscv.vector.tuple", <vscale x 16 x i8>, 3) undef, ptr %base, <vscale x 4 x i1> %mask, i32 %vl, i32 1, i32 5) - %1 = call <vscale x 4 x i32> @llvm.riscv.tuple.extract.nxv4i32.triscv.vector.tuple_nxv16i8_3t(target("riscv.vector.tuple", <vscale x 16 x i8>, 3) %0, i32 1) - ret <vscale x 4 x i32> %1 + ret target("riscv.vector.tuple", <vscale x 16 x i8>, 3) %0 } - -declare target("riscv.vector.tuple", <vscale x 4 x i8>, 4) @llvm.riscv.vlseg4.mask.triscv.vector.tuple_nxv4i8_4t.nxv1i1(target("riscv.vector.tuple", <vscale x 4 x i8>, 4), ptr, <vscale x 1 x i1>, i32, i32, i32) - -define <vscale x 1 x i32> @test_vlseg4_nxv1i32_triscv.vector.tuple_nxv4i8_4t(ptr %base, i32 %vl) { +define target("riscv.vector.tuple", <vscale x 4 x i8>, 4) @test_vlseg4_nxv1i32_triscv.vector.tuple_nxv4i8_4t(ptr %base, i32 %vl) { ; CHECK-LABEL: test_vlseg4_nxv1i32_triscv.vector.tuple_nxv4i8_4t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, ma -; CHECK-NEXT: vlseg4e32.v v7, (a0) +; CHECK-NEXT: vlseg4e32.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 4) @llvm.riscv.vlseg4.triscv.vector.tuple_nxv4i8_4t(target("riscv.vector.tuple", <vscale x 4 x i8>, 4) undef, ptr %base, i32 %vl, i32 5) - %1 = call <vscale x 1 x i32> @llvm.riscv.tuple.extract.nxv1i32.triscv.vector.tuple_nxv4i8_4t(target("riscv.vector.tuple", <vscale x 4 x i8>, 4) %0, i32 1) - ret <vscale x 1 x i32> %1 + ret target("riscv.vector.tuple", <vscale x 4 x i8>, 4) %0 } - -define <vscale x 1 x i32> @test_vlseg4_mask_nxv1i32_triscv.vector.tuple_nxv4i8_4t(ptr %base, i32 %vl, <vscale x 1 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 4 x i8>, 4) @test_vlseg4_mask_nxv1i32_triscv.vector.tuple_nxv4i8_4t(ptr %base, i32 %vl, <vscale x 1 x i1> %mask) { ; CHECK-LABEL: test_vlseg4_mask_nxv1i32_triscv.vector.tuple_nxv4i8_4t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, ma -; CHECK-NEXT: vlseg4e32.v v7, (a0), v0.t +; CHECK-NEXT: vlseg4e32.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 4) @llvm.riscv.vlseg4.mask.triscv.vector.tuple_nxv4i8_4t.nxv1i1(target("riscv.vector.tuple", <vscale x 4 x i8>, 4) undef, ptr %base, <vscale x 1 x i1> %mask, i32 %vl, i32 1, i32 5) - %1 = call <vscale x 1 x i32> @llvm.riscv.tuple.extract.nxv1i32.triscv.vector.tuple_nxv4i8_4t(target("riscv.vector.tuple", <vscale x 4 x i8>, 4) %0, i32 1) - ret <vscale x 1 x i32> %1 + ret target("riscv.vector.tuple", <vscale x 4 x i8>, 4) %0 } - -declare target("riscv.vector.tuple", <vscale x 8 x i8>, 4) @llvm.riscv.vlseg4.mask.triscv.vector.tuple_nxv8i8_4t.nxv2i1(target("riscv.vector.tuple", <vscale x 8 x i8>, 4), ptr, <vscale x 2 x i1>, i32, i32, i32) - -define <vscale x 2 x i32> @test_vlseg4_nxv2i32_triscv.vector.tuple_nxv8i8_4t(ptr %base, i32 %vl) { +define target("riscv.vector.tuple", <vscale x 8 x i8>, 4) @test_vlseg4_nxv2i32_triscv.vector.tuple_nxv8i8_4t(ptr %base, i32 %vl) { ; CHECK-LABEL: test_vlseg4_nxv2i32_triscv.vector.tuple_nxv8i8_4t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma -; CHECK-NEXT: vlseg4e32.v v7, (a0) +; CHECK-NEXT: vlseg4e32.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 4) @llvm.riscv.vlseg4.triscv.vector.tuple_nxv8i8_4t(target("riscv.vector.tuple", <vscale x 8 x i8>, 4) undef, ptr %base, i32 %vl, i32 5) - %1 = call <vscale x 2 x i32> @llvm.riscv.tuple.extract.nxv2i32.triscv.vector.tuple_nxv8i8_4t(target("riscv.vector.tuple", <vscale x 8 x i8>, 4) %0, i32 1) - ret <vscale x 2 x i32> %1 + ret target("riscv.vector.tuple", <vscale x 8 x i8>, 4) %0 } - -define <vscale x 2 x i32> @test_vlseg4_mask_nxv2i32_triscv.vector.tuple_nxv8i8_4t(ptr %base, i32 %vl, <vscale x 2 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 8 x i8>, 4) @test_vlseg4_mask_nxv2i32_triscv.vector.tuple_nxv8i8_4t(ptr %base, i32 %vl, <vscale x 2 x i1> %mask) { ; CHECK-LABEL: test_vlseg4_mask_nxv2i32_triscv.vector.tuple_nxv8i8_4t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma -; CHECK-NEXT: vlseg4e32.v v7, (a0), v0.t +; CHECK-NEXT: vlseg4e32.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 4) @llvm.riscv.vlseg4.mask.triscv.vector.tuple_nxv8i8_4t.nxv2i1(target("riscv.vector.tuple", <vscale x 8 x i8>, 4) undef, ptr %base, <vscale x 2 x i1> %mask, i32 %vl, i32 1, i32 5) - %1 = call <vscale x 2 x i32> @llvm.riscv.tuple.extract.nxv2i32.triscv.vector.tuple_nxv8i8_4t(target("riscv.vector.tuple", <vscale x 8 x i8>, 4) %0, i32 1) - ret <vscale x 2 x i32> %1 + ret target("riscv.vector.tuple", <vscale x 8 x i8>, 4) %0 } - -declare target("riscv.vector.tuple", <vscale x 16 x i8>, 4) @llvm.riscv.vlseg4.mask.triscv.vector.tuple_nxv16i8_4t.nxv4i1(target("riscv.vector.tuple", <vscale x 16 x i8>, 4), ptr, <vscale x 4 x i1>, i32, i32, i32) - -define <vscale x 4 x i32> @test_vlseg4_nxv4i32_triscv.vector.tuple_nxv16i8_4t(ptr %base, i32 %vl) { +define target("riscv.vector.tuple", <vscale x 16 x i8>, 4) @test_vlseg4_nxv4i32_triscv.vector.tuple_nxv16i8_4t(ptr %base, i32 %vl) { ; CHECK-LABEL: test_vlseg4_nxv4i32_triscv.vector.tuple_nxv16i8_4t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, ma -; CHECK-NEXT: vlseg4e32.v v6, (a0) +; CHECK-NEXT: vlseg4e32.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 4) @llvm.riscv.vlseg4.triscv.vector.tuple_nxv16i8_4t(target("riscv.vector.tuple", <vscale x 16 x i8>, 4) undef, ptr %base, i32 %vl, i32 5) - %1 = call <vscale x 4 x i32> @llvm.riscv.tuple.extract.nxv4i32.triscv.vector.tuple_nxv16i8_4t(target("riscv.vector.tuple", <vscale x 16 x i8>, 4) %0, i32 1) - ret <vscale x 4 x i32> %1 + ret target("riscv.vector.tuple", <vscale x 16 x i8>, 4) %0 } - -define <vscale x 4 x i32> @test_vlseg4_mask_nxv4i32_triscv.vector.tuple_nxv16i8_4t(ptr %base, i32 %vl, <vscale x 4 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 16 x i8>, 4) @test_vlseg4_mask_nxv4i32_triscv.vector.tuple_nxv16i8_4t(ptr %base, i32 %vl, <vscale x 4 x i1> %mask) { ; CHECK-LABEL: test_vlseg4_mask_nxv4i32_triscv.vector.tuple_nxv16i8_4t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, ma -; CHECK-NEXT: vlseg4e32.v v6, (a0), v0.t +; CHECK-NEXT: vlseg4e32.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 4) @llvm.riscv.vlseg4.mask.triscv.vector.tuple_nxv16i8_4t.nxv4i1(target("riscv.vector.tuple", <vscale x 16 x i8>, 4) undef, ptr %base, <vscale x 4 x i1> %mask, i32 %vl, i32 1, i32 5) - %1 = call <vscale x 4 x i32> @llvm.riscv.tuple.extract.nxv4i32.triscv.vector.tuple_nxv16i8_4t(target("riscv.vector.tuple", <vscale x 16 x i8>, 4) %0, i32 1) - ret <vscale x 4 x i32> %1 + ret target("riscv.vector.tuple", <vscale x 16 x i8>, 4) %0 } - -declare target("riscv.vector.tuple", <vscale x 4 x i8>, 5) @llvm.riscv.vlseg5.mask.triscv.vector.tuple_nxv4i8_5t.nxv1i1(target("riscv.vector.tuple", <vscale x 4 x i8>, 5), ptr, <vscale x 1 x i1>, i32, i32, i32) - -define <vscale x 1 x i32> @test_vlseg5_nxv1i32_triscv.vector.tuple_nxv4i8_5t(ptr %base, i32 %vl) { +define target("riscv.vector.tuple", <vscale x 4 x i8>, 5) @test_vlseg5_nxv1i32_triscv.vector.tuple_nxv4i8_5t(ptr %base, i32 %vl) { ; CHECK-LABEL: test_vlseg5_nxv1i32_triscv.vector.tuple_nxv4i8_5t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, ma -; CHECK-NEXT: vlseg5e32.v v7, (a0) +; CHECK-NEXT: vlseg5e32.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 5) @llvm.riscv.vlseg5.triscv.vector.tuple_nxv4i8_5t(target("riscv.vector.tuple", <vscale x 4 x i8>, 5) undef, ptr %base, i32 %vl, i32 5) - %1 = call <vscale x 1 x i32> @llvm.riscv.tuple.extract.nxv1i32.triscv.vector.tuple_nxv4i8_5t(target("riscv.vector.tuple", <vscale x 4 x i8>, 5) %0, i32 1) - ret <vscale x 1 x i32> %1 + ret target("riscv.vector.tuple", <vscale x 4 x i8>, 5) %0 } - -define <vscale x 1 x i32> @test_vlseg5_mask_nxv1i32_triscv.vector.tuple_nxv4i8_5t(ptr %base, i32 %vl, <vscale x 1 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 4 x i8>, 5) @test_vlseg5_mask_nxv1i32_triscv.vector.tuple_nxv4i8_5t(ptr %base, i32 %vl, <vscale x 1 x i1> %mask) { ; CHECK-LABEL: test_vlseg5_mask_nxv1i32_triscv.vector.tuple_nxv4i8_5t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, ma -; CHECK-NEXT: vlseg5e32.v v7, (a0), v0.t +; CHECK-NEXT: vlseg5e32.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 5) @llvm.riscv.vlseg5.mask.triscv.vector.tuple_nxv4i8_5t.nxv1i1(target("riscv.vector.tuple", <vscale x 4 x i8>, 5) undef, ptr %base, <vscale x 1 x i1> %mask, i32 %vl, i32 1, i32 5) - %1 = call <vscale x 1 x i32> @llvm.riscv.tuple.extract.nxv1i32.triscv.vector.tuple_nxv4i8_5t(target("riscv.vector.tuple", <vscale x 4 x i8>, 5) %0, i32 1) - ret <vscale x 1 x i32> %1 + ret target("riscv.vector.tuple", <vscale x 4 x i8>, 5) %0 } - -declare target("riscv.vector.tuple", <vscale x 8 x i8>, 5) @llvm.riscv.vlseg5.mask.triscv.vector.tuple_nxv8i8_5t.nxv2i1(target("riscv.vector.tuple", <vscale x 8 x i8>, 5), ptr, <vscale x 2 x i1>, i32, i32, i32) - -define <vscale x 2 x i32> @test_vlseg5_nxv2i32_triscv.vector.tuple_nxv8i8_5t(ptr %base, i32 %vl) { +define target("riscv.vector.tuple", <vscale x 8 x i8>, 5) @test_vlseg5_nxv2i32_triscv.vector.tuple_nxv8i8_5t(ptr %base, i32 %vl) { ; CHECK-LABEL: test_vlseg5_nxv2i32_triscv.vector.tuple_nxv8i8_5t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma -; CHECK-NEXT: vlseg5e32.v v7, (a0) +; CHECK-NEXT: vlseg5e32.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 5) @llvm.riscv.vlseg5.triscv.vector.tuple_nxv8i8_5t(target("riscv.vector.tuple", <vscale x 8 x i8>, 5) undef, ptr %base, i32 %vl, i32 5) - %1 = call <vscale x 2 x i32> @llvm.riscv.tuple.extract.nxv2i32.triscv.vector.tuple_nxv8i8_5t(target("riscv.vector.tuple", <vscale x 8 x i8>, 5) %0, i32 1) - ret <vscale x 2 x i32> %1 + ret target("riscv.vector.tuple", <vscale x 8 x i8>, 5) %0 } - -define <vscale x 2 x i32> @test_vlseg5_mask_nxv2i32_triscv.vector.tuple_nxv8i8_5t(ptr %base, i32 %vl, <vscale x 2 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 8 x i8>, 5) @test_vlseg5_mask_nxv2i32_triscv.vector.tuple_nxv8i8_5t(ptr %base, i32 %vl, <vscale x 2 x i1> %mask) { ; CHECK-LABEL: test_vlseg5_mask_nxv2i32_triscv.vector.tuple_nxv8i8_5t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma -; CHECK-NEXT: vlseg5e32.v v7, (a0), v0.t +; CHECK-NEXT: vlseg5e32.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 5) @llvm.riscv.vlseg5.mask.triscv.vector.tuple_nxv8i8_5t.nxv2i1(target("riscv.vector.tuple", <vscale x 8 x i8>, 5) undef, ptr %base, <vscale x 2 x i1> %mask, i32 %vl, i32 1, i32 5) - %1 = call <vscale x 2 x i32> @llvm.riscv.tuple.extract.nxv2i32.triscv.vector.tuple_nxv8i8_5t(target("riscv.vector.tuple", <vscale x 8 x i8>, 5) %0, i32 1) - ret <vscale x 2 x i32> %1 + ret target("riscv.vector.tuple", <vscale x 8 x i8>, 5) %0 } - -declare target("riscv.vector.tuple", <vscale x 4 x i8>, 6) @llvm.riscv.vlseg6.mask.triscv.vector.tuple_nxv4i8_6t.nxv1i1(target("riscv.vector.tuple", <vscale x 4 x i8>, 6), ptr, <vscale x 1 x i1>, i32, i32, i32) - -define <vscale x 1 x i32> @test_vlseg6_nxv1i32_triscv.vector.tuple_nxv4i8_6t(ptr %base, i32 %vl) { +define target("riscv.vector.tuple", <vscale x 4 x i8>, 6) @test_vlseg6_nxv1i32_triscv.vector.tuple_nxv4i8_6t(ptr %base, i32 %vl) { ; CHECK-LABEL: test_vlseg6_nxv1i32_triscv.vector.tuple_nxv4i8_6t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, ma -; CHECK-NEXT: vlseg6e32.v v7, (a0) +; CHECK-NEXT: vlseg6e32.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 6) @llvm.riscv.vlseg6.triscv.vector.tuple_nxv4i8_6t(target("riscv.vector.tuple", <vscale x 4 x i8>, 6) undef, ptr %base, i32 %vl, i32 5) - %1 = call <vscale x 1 x i32> @llvm.riscv.tuple.extract.nxv1i32.triscv.vector.tuple_nxv4i8_6t(target("riscv.vector.tuple", <vscale x 4 x i8>, 6) %0, i32 1) - ret <vscale x 1 x i32> %1 + ret target("riscv.vector.tuple", <vscale x 4 x i8>, 6) %0 } - -define <vscale x 1 x i32> @test_vlseg6_mask_nxv1i32_triscv.vector.tuple_nxv4i8_6t(ptr %base, i32 %vl, <vscale x 1 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 4 x i8>, 6) @test_vlseg6_mask_nxv1i32_triscv.vector.tuple_nxv4i8_6t(ptr %base, i32 %vl, <vscale x 1 x i1> %mask) { ; CHECK-LABEL: test_vlseg6_mask_nxv1i32_triscv.vector.tuple_nxv4i8_6t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, ma -; CHECK-NEXT: vlseg6e32.v v7, (a0), v0.t +; CHECK-NEXT: vlseg6e32.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 6) @llvm.riscv.vlseg6.mask.triscv.vector.tuple_nxv4i8_6t.nxv1i1(target("riscv.vector.tuple", <vscale x 4 x i8>, 6) undef, ptr %base, <vscale x 1 x i1> %mask, i32 %vl, i32 1, i32 5) - %1 = call <vscale x 1 x i32> @llvm.riscv.tuple.extract.nxv1i32.triscv.vector.tuple_nxv4i8_6t(target("riscv.vector.tuple", <vscale x 4 x i8>, 6) %0, i32 1) - ret <vscale x 1 x i32> %1 + ret target("riscv.vector.tuple", <vscale x 4 x i8>, 6) %0 } - -declare target("riscv.vector.tuple", <vscale x 8 x i8>, 6) @llvm.riscv.vlseg6.mask.triscv.vector.tuple_nxv8i8_6t.nxv2i1(target("riscv.vector.tuple", <vscale x 8 x i8>, 6), ptr, <vscale x 2 x i1>, i32, i32, i32) - -define <vscale x 2 x i32> @test_vlseg6_nxv2i32_triscv.vector.tuple_nxv8i8_6t(ptr %base, i32 %vl) { +define target("riscv.vector.tuple", <vscale x 8 x i8>, 6) @test_vlseg6_nxv2i32_triscv.vector.tuple_nxv8i8_6t(ptr %base, i32 %vl) { ; CHECK-LABEL: test_vlseg6_nxv2i32_triscv.vector.tuple_nxv8i8_6t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma -; CHECK-NEXT: vlseg6e32.v v7, (a0) +; CHECK-NEXT: vlseg6e32.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 6) @llvm.riscv.vlseg6.triscv.vector.tuple_nxv8i8_6t(target("riscv.vector.tuple", <vscale x 8 x i8>, 6) undef, ptr %base, i32 %vl, i32 5) - %1 = call <vscale x 2 x i32> @llvm.riscv.tuple.extract.nxv2i32.triscv.vector.tuple_nxv8i8_6t(target("riscv.vector.tuple", <vscale x 8 x i8>, 6) %0, i32 1) - ret <vscale x 2 x i32> %1 + ret target("riscv.vector.tuple", <vscale x 8 x i8>, 6) %0 } - -define <vscale x 2 x i32> @test_vlseg6_mask_nxv2i32_triscv.vector.tuple_nxv8i8_6t(ptr %base, i32 %vl, <vscale x 2 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 8 x i8>, 6) @test_vlseg6_mask_nxv2i32_triscv.vector.tuple_nxv8i8_6t(ptr %base, i32 %vl, <vscale x 2 x i1> %mask) { ; CHECK-LABEL: test_vlseg6_mask_nxv2i32_triscv.vector.tuple_nxv8i8_6t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma -; CHECK-NEXT: vlseg6e32.v v7, (a0), v0.t +; CHECK-NEXT: vlseg6e32.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 6) @llvm.riscv.vlseg6.mask.triscv.vector.tuple_nxv8i8_6t.nxv2i1(target("riscv.vector.tuple", <vscale x 8 x i8>, 6) undef, ptr %base, <vscale x 2 x i1> %mask, i32 %vl, i32 1, i32 5) - %1 = call <vscale x 2 x i32> @llvm.riscv.tuple.extract.nxv2i32.triscv.vector.tuple_nxv8i8_6t(target("riscv.vector.tuple", <vscale x 8 x i8>, 6) %0, i32 1) - ret <vscale x 2 x i32> %1 + ret target("riscv.vector.tuple", <vscale x 8 x i8>, 6) %0 } - -declare target("riscv.vector.tuple", <vscale x 4 x i8>, 7) @llvm.riscv.vlseg7.mask.triscv.vector.tuple_nxv4i8_7t.nxv1i1(target("riscv.vector.tuple", <vscale x 4 x i8>, 7), ptr, <vscale x 1 x i1>, i32, i32, i32) - -define <vscale x 1 x i32> @test_vlseg7_nxv1i32_triscv.vector.tuple_nxv4i8_7t(ptr %base, i32 %vl) { +define target("riscv.vector.tuple", <vscale x 4 x i8>, 7) @test_vlseg7_nxv1i32_triscv.vector.tuple_nxv4i8_7t(ptr %base, i32 %vl) { ; CHECK-LABEL: test_vlseg7_nxv1i32_triscv.vector.tuple_nxv4i8_7t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, ma -; CHECK-NEXT: vlseg7e32.v v7, (a0) +; CHECK-NEXT: vlseg7e32.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 7) @llvm.riscv.vlseg7.triscv.vector.tuple_nxv4i8_7t(target("riscv.vector.tuple", <vscale x 4 x i8>, 7) undef, ptr %base, i32 %vl, i32 5) - %1 = call <vscale x 1 x i32> @llvm.riscv.tuple.extract.nxv1i32.triscv.vector.tuple_nxv4i8_7t(target("riscv.vector.tuple", <vscale x 4 x i8>, 7) %0, i32 1) - ret <vscale x 1 x i32> %1 + ret target("riscv.vector.tuple", <vscale x 4 x i8>, 7) %0 } - -define <vscale x 1 x i32> @test_vlseg7_mask_nxv1i32_triscv.vector.tuple_nxv4i8_7t(ptr %base, i32 %vl, <vscale x 1 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 4 x i8>, 7) @test_vlseg7_mask_nxv1i32_triscv.vector.tuple_nxv4i8_7t(ptr %base, i32 %vl, <vscale x 1 x i1> %mask) { ; CHECK-LABEL: test_vlseg7_mask_nxv1i32_triscv.vector.tuple_nxv4i8_7t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, ma -; CHECK-NEXT: vlseg7e32.v v7, (a0), v0.t +; CHECK-NEXT: vlseg7e32.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 7) @llvm.riscv.vlseg7.mask.triscv.vector.tuple_nxv4i8_7t.nxv1i1(target("riscv.vector.tuple", <vscale x 4 x i8>, 7) undef, ptr %base, <vscale x 1 x i1> %mask, i32 %vl, i32 1, i32 5) - %1 = call <vscale x 1 x i32> @llvm.riscv.tuple.extract.nxv1i32.triscv.vector.tuple_nxv4i8_7t(target("riscv.vector.tuple", <vscale x 4 x i8>, 7) %0, i32 1) - ret <vscale x 1 x i32> %1 + ret target("riscv.vector.tuple", <vscale x 4 x i8>, 7) %0 } - -declare target("riscv.vector.tuple", <vscale x 8 x i8>, 7) @llvm.riscv.vlseg7.mask.triscv.vector.tuple_nxv8i8_7t.nxv2i1(target("riscv.vector.tuple", <vscale x 8 x i8>, 7), ptr, <vscale x 2 x i1>, i32, i32, i32) - -define <vscale x 2 x i32> @test_vlseg7_nxv2i32_triscv.vector.tuple_nxv8i8_7t(ptr %base, i32 %vl) { +define target("riscv.vector.tuple", <vscale x 8 x i8>, 7) @test_vlseg7_nxv2i32_triscv.vector.tuple_nxv8i8_7t(ptr %base, i32 %vl) { ; CHECK-LABEL: test_vlseg7_nxv2i32_triscv.vector.tuple_nxv8i8_7t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma -; CHECK-NEXT: vlseg7e32.v v7, (a0) +; CHECK-NEXT: vlseg7e32.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 7) @llvm.riscv.vlseg7.triscv.vector.tuple_nxv8i8_7t(target("riscv.vector.tuple", <vscale x 8 x i8>, 7) undef, ptr %base, i32 %vl, i32 5) - %1 = call <vscale x 2 x i32> @llvm.riscv.tuple.extract.nxv2i32.triscv.vector.tuple_nxv8i8_7t(target("riscv.vector.tuple", <vscale x 8 x i8>, 7) %0, i32 1) - ret <vscale x 2 x i32> %1 + ret target("riscv.vector.tuple", <vscale x 8 x i8>, 7) %0 } - -define <vscale x 2 x i32> @test_vlseg7_mask_nxv2i32_triscv.vector.tuple_nxv8i8_7t(ptr %base, i32 %vl, <vscale x 2 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 8 x i8>, 7) @test_vlseg7_mask_nxv2i32_triscv.vector.tuple_nxv8i8_7t(ptr %base, i32 %vl, <vscale x 2 x i1> %mask) { ; CHECK-LABEL: test_vlseg7_mask_nxv2i32_triscv.vector.tuple_nxv8i8_7t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma -; CHECK-NEXT: vlseg7e32.v v7, (a0), v0.t +; CHECK-NEXT: vlseg7e32.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 7) @llvm.riscv.vlseg7.mask.triscv.vector.tuple_nxv8i8_7t.nxv2i1(target("riscv.vector.tuple", <vscale x 8 x i8>, 7) undef, ptr %base, <vscale x 2 x i1> %mask, i32 %vl, i32 1, i32 5) - %1 = call <vscale x 2 x i32> @llvm.riscv.tuple.extract.nxv2i32.triscv.vector.tuple_nxv8i8_7t(target("riscv.vector.tuple", <vscale x 8 x i8>, 7) %0, i32 1) - ret <vscale x 2 x i32> %1 + ret target("riscv.vector.tuple", <vscale x 8 x i8>, 7) %0 } - -declare target("riscv.vector.tuple", <vscale x 4 x i8>, 8) @llvm.riscv.vlseg8.mask.triscv.vector.tuple_nxv4i8_8t.nxv1i1(target("riscv.vector.tuple", <vscale x 4 x i8>, 8), ptr, <vscale x 1 x i1>, i32, i32, i32) - -define <vscale x 1 x i32> @test_vlseg8_nxv1i32_triscv.vector.tuple_nxv4i8_8t(ptr %base, i32 %vl) { +define target("riscv.vector.tuple", <vscale x 4 x i8>, 8) @test_vlseg8_nxv1i32_triscv.vector.tuple_nxv4i8_8t(ptr %base, i32 %vl) { ; CHECK-LABEL: test_vlseg8_nxv1i32_triscv.vector.tuple_nxv4i8_8t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, ma -; CHECK-NEXT: vlseg8e32.v v7, (a0) +; CHECK-NEXT: vlseg8e32.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 8) @llvm.riscv.vlseg8.triscv.vector.tuple_nxv4i8_8t(target("riscv.vector.tuple", <vscale x 4 x i8>, 8) undef, ptr %base, i32 %vl, i32 5) - %1 = call <vscale x 1 x i32> @llvm.riscv.tuple.extract.nxv1i32.triscv.vector.tuple_nxv4i8_8t(target("riscv.vector.tuple", <vscale x 4 x i8>, 8) %0, i32 1) - ret <vscale x 1 x i32> %1 + ret target("riscv.vector.tuple", <vscale x 4 x i8>, 8) %0 } - -define <vscale x 1 x i32> @test_vlseg8_mask_nxv1i32_triscv.vector.tuple_nxv4i8_8t(ptr %base, i32 %vl, <vscale x 1 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 4 x i8>, 8) @test_vlseg8_mask_nxv1i32_triscv.vector.tuple_nxv4i8_8t(ptr %base, i32 %vl, <vscale x 1 x i1> %mask) { ; CHECK-LABEL: test_vlseg8_mask_nxv1i32_triscv.vector.tuple_nxv4i8_8t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, ma -; CHECK-NEXT: vlseg8e32.v v7, (a0), v0.t +; CHECK-NEXT: vlseg8e32.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 8) @llvm.riscv.vlseg8.mask.triscv.vector.tuple_nxv4i8_8t.nxv1i1(target("riscv.vector.tuple", <vscale x 4 x i8>, 8) undef, ptr %base, <vscale x 1 x i1> %mask, i32 %vl, i32 1, i32 5) - %1 = call <vscale x 1 x i32> @llvm.riscv.tuple.extract.nxv1i32.triscv.vector.tuple_nxv4i8_8t(target("riscv.vector.tuple", <vscale x 4 x i8>, 8) %0, i32 1) - ret <vscale x 1 x i32> %1 + ret target("riscv.vector.tuple", <vscale x 4 x i8>, 8) %0 } - -declare target("riscv.vector.tuple", <vscale x 8 x i8>, 8) @llvm.riscv.vlseg8.mask.triscv.vector.tuple_nxv8i8_8t.nxv2i1(target("riscv.vector.tuple", <vscale x 8 x i8>, 8), ptr, <vscale x 2 x i1>, i32, i32, i32) - -define <vscale x 2 x i32> @test_vlseg8_nxv2i32_triscv.vector.tuple_nxv8i8_8t(ptr %base, i32 %vl) { +define target("riscv.vector.tuple", <vscale x 8 x i8>, 8) @test_vlseg8_nxv2i32_triscv.vector.tuple_nxv8i8_8t(ptr %base, i32 %vl) { ; CHECK-LABEL: test_vlseg8_nxv2i32_triscv.vector.tuple_nxv8i8_8t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma -; CHECK-NEXT: vlseg8e32.v v7, (a0) +; CHECK-NEXT: vlseg8e32.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 8) @llvm.riscv.vlseg8.triscv.vector.tuple_nxv8i8_8t(target("riscv.vector.tuple", <vscale x 8 x i8>, 8) undef, ptr %base, i32 %vl, i32 5) - %1 = call <vscale x 2 x i32> @llvm.riscv.tuple.extract.nxv2i32.triscv.vector.tuple_nxv8i8_8t(target("riscv.vector.tuple", <vscale x 8 x i8>, 8) %0, i32 1) - ret <vscale x 2 x i32> %1 + ret target("riscv.vector.tuple", <vscale x 8 x i8>, 8) %0 } - -define <vscale x 2 x i32> @test_vlseg8_mask_nxv2i32_triscv.vector.tuple_nxv8i8_8t(ptr %base, i32 %vl, <vscale x 2 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 8 x i8>, 8) @test_vlseg8_mask_nxv2i32_triscv.vector.tuple_nxv8i8_8t(ptr %base, i32 %vl, <vscale x 2 x i1> %mask) { ; CHECK-LABEL: test_vlseg8_mask_nxv2i32_triscv.vector.tuple_nxv8i8_8t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma -; CHECK-NEXT: vlseg8e32.v v7, (a0), v0.t +; CHECK-NEXT: vlseg8e32.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 8) @llvm.riscv.vlseg8.mask.triscv.vector.tuple_nxv8i8_8t.nxv2i1(target("riscv.vector.tuple", <vscale x 8 x i8>, 8) undef, ptr %base, <vscale x 2 x i1> %mask, i32 %vl, i32 1, i32 5) - %1 = call <vscale x 2 x i32> @llvm.riscv.tuple.extract.nxv2i32.triscv.vector.tuple_nxv8i8_8t(target("riscv.vector.tuple", <vscale x 8 x i8>, 8) %0, i32 1) - ret <vscale x 2 x i32> %1 + ret target("riscv.vector.tuple", <vscale x 8 x i8>, 8) %0 } - -declare target("riscv.vector.tuple", <vscale x 8 x i8>, 2) @llvm.riscv.vlseg2.mask.triscv.vector.tuple_nxv8i8_2t.nxv1i1(target("riscv.vector.tuple", <vscale x 8 x i8>, 2), ptr, <vscale x 1 x i1>, i32, i32, i32) - -define <vscale x 1 x i64> @test_vlseg2_nxv1i64_triscv.vector.tuple_nxv8i8_2t(ptr %base, i32 %vl) { +define target("riscv.vector.tuple", <vscale x 8 x i8>, 2) @test_vlseg2_nxv1i64_triscv.vector.tuple_nxv8i8_2t(ptr %base, i32 %vl) { ; CHECK-LABEL: test_vlseg2_nxv1i64_triscv.vector.tuple_nxv8i8_2t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e64, m1, ta, ma -; CHECK-NEXT: vlseg2e64.v v7, (a0) +; CHECK-NEXT: vlseg2e64.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 2) @llvm.riscv.vlseg2.triscv.vector.tuple_nxv8i8_2t(target("riscv.vector.tuple", <vscale x 8 x i8>, 2) undef, ptr %base, i32 %vl, i32 6) - %1 = call <vscale x 1 x i64> @llvm.riscv.tuple.extract.nxv1i64.triscv.vector.tuple_nxv8i8_2t(target("riscv.vector.tuple", <vscale x 8 x i8>, 2) %0, i32 1) - ret <vscale x 1 x i64> %1 + ret target("riscv.vector.tuple", <vscale x 8 x i8>, 2) %0 } - -define <vscale x 1 x i64> @test_vlseg2_mask_nxv1i64_triscv.vector.tuple_nxv8i8_2t(ptr %base, i32 %vl, <vscale x 1 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 8 x i8>, 2) @test_vlseg2_mask_nxv1i64_triscv.vector.tuple_nxv8i8_2t(ptr %base, i32 %vl, <vscale x 1 x i1> %mask) { ; CHECK-LABEL: test_vlseg2_mask_nxv1i64_triscv.vector.tuple_nxv8i8_2t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e64, m1, ta, ma -; CHECK-NEXT: vlseg2e64.v v7, (a0), v0.t +; CHECK-NEXT: vlseg2e64.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 2) @llvm.riscv.vlseg2.mask.triscv.vector.tuple_nxv8i8_2t.nxv1i1(target("riscv.vector.tuple", <vscale x 8 x i8>, 2) undef, ptr %base, <vscale x 1 x i1> %mask, i32 %vl, i32 1, i32 6) - %1 = call <vscale x 1 x i64> @llvm.riscv.tuple.extract.nxv1i64.triscv.vector.tuple_nxv8i8_2t(target("riscv.vector.tuple", <vscale x 8 x i8>, 2) %0, i32 1) - ret <vscale x 1 x i64> %1 + ret target("riscv.vector.tuple", <vscale x 8 x i8>, 2) %0 } - -declare target("riscv.vector.tuple", <vscale x 16 x i8>, 2) @llvm.riscv.vlseg2.mask.triscv.vector.tuple_nxv16i8_2t.nxv2i1(target("riscv.vector.tuple", <vscale x 16 x i8>, 2), ptr, <vscale x 2 x i1>, i32, i32, i32) - -define <vscale x 2 x i64> @test_vlseg2_nxv2i64_triscv.vector.tuple_nxv16i8_2t(ptr %base, i32 %vl) { +define target("riscv.vector.tuple", <vscale x 16 x i8>, 2) @test_vlseg2_nxv2i64_triscv.vector.tuple_nxv16i8_2t(ptr %base, i32 %vl) { ; CHECK-LABEL: test_vlseg2_nxv2i64_triscv.vector.tuple_nxv16i8_2t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e64, m2, ta, ma -; CHECK-NEXT: vlseg2e64.v v6, (a0) +; CHECK-NEXT: vlseg2e64.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 2) @llvm.riscv.vlseg2.triscv.vector.tuple_nxv16i8_2t(target("riscv.vector.tuple", <vscale x 16 x i8>, 2) undef, ptr %base, i32 %vl, i32 6) - %1 = call <vscale x 2 x i64> @llvm.riscv.tuple.extract.nxv2i64.triscv.vector.tuple_nxv16i8_2t(target("riscv.vector.tuple", <vscale x 16 x i8>, 2) %0, i32 1) - ret <vscale x 2 x i64> %1 + ret target("riscv.vector.tuple", <vscale x 16 x i8>, 2) %0 } - -define <vscale x 2 x i64> @test_vlseg2_mask_nxv2i64_triscv.vector.tuple_nxv16i8_2t(ptr %base, i32 %vl, <vscale x 2 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 16 x i8>, 2) @test_vlseg2_mask_nxv2i64_triscv.vector.tuple_nxv16i8_2t(ptr %base, i32 %vl, <vscale x 2 x i1> %mask) { ; CHECK-LABEL: test_vlseg2_mask_nxv2i64_triscv.vector.tuple_nxv16i8_2t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e64, m2, ta, ma -; CHECK-NEXT: vlseg2e64.v v6, (a0), v0.t +; CHECK-NEXT: vlseg2e64.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 2) @llvm.riscv.vlseg2.mask.triscv.vector.tuple_nxv16i8_2t.nxv2i1(target("riscv.vector.tuple", <vscale x 16 x i8>, 2) undef, ptr %base, <vscale x 2 x i1> %mask, i32 %vl, i32 1, i32 6) - %1 = call <vscale x 2 x i64> @llvm.riscv.tuple.extract.nxv2i64.triscv.vector.tuple_nxv16i8_2t(target("riscv.vector.tuple", <vscale x 16 x i8>, 2) %0, i32 1) - ret <vscale x 2 x i64> %1 + ret target("riscv.vector.tuple", <vscale x 16 x i8>, 2) %0 } - -declare target("riscv.vector.tuple", <vscale x 32 x i8>, 2) @llvm.riscv.vlseg2.mask.triscv.vector.tuple_nxv32i8_2t.nxv4i1(target("riscv.vector.tuple", <vscale x 32 x i8>, 2), ptr, <vscale x 4 x i1>, i32, i32, i32) - -define <vscale x 4 x i64> @test_vlseg2_nxv4i64_triscv.vector.tuple_nxv32i8_2t(ptr %base, i32 %vl) { +define target("riscv.vector.tuple", <vscale x 32 x i8>, 2) @test_vlseg2_nxv4i64_triscv.vector.tuple_nxv32i8_2t(ptr %base, i32 %vl) { ; CHECK-LABEL: test_vlseg2_nxv4i64_triscv.vector.tuple_nxv32i8_2t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e64, m4, ta, ma -; CHECK-NEXT: vlseg2e64.v v4, (a0) +; CHECK-NEXT: vlseg2e64.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 32 x i8>, 2) @llvm.riscv.vlseg2.triscv.vector.tuple_nxv32i8_2t(target("riscv.vector.tuple", <vscale x 32 x i8>, 2) undef, ptr %base, i32 %vl, i32 6) - %1 = call <vscale x 4 x i64> @llvm.riscv.tuple.extract.nxv4i64.triscv.vector.tuple_nxv32i8_2t(target("riscv.vector.tuple", <vscale x 32 x i8>, 2) %0, i32 1) - ret <vscale x 4 x i64> %1 + ret target("riscv.vector.tuple", <vscale x 32 x i8>, 2) %0 } - -define <vscale x 4 x i64> @test_vlseg2_mask_nxv4i64_triscv.vector.tuple_nxv32i8_2t(ptr %base, i32 %vl, <vscale x 4 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 32 x i8>, 2) @test_vlseg2_mask_nxv4i64_triscv.vector.tuple_nxv32i8_2t(ptr %base, i32 %vl, <vscale x 4 x i1> %mask) { ; CHECK-LABEL: test_vlseg2_mask_nxv4i64_triscv.vector.tuple_nxv32i8_2t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e64, m4, ta, ma -; CHECK-NEXT: vlseg2e64.v v4, (a0), v0.t +; CHECK-NEXT: vlseg2e64.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 32 x i8>, 2) @llvm.riscv.vlseg2.mask.triscv.vector.tuple_nxv32i8_2t.nxv4i1(target("riscv.vector.tuple", <vscale x 32 x i8>, 2) undef, ptr %base, <vscale x 4 x i1> %mask, i32 %vl, i32 1, i32 6) - %1 = call <vscale x 4 x i64> @llvm.riscv.tuple.extract.nxv4i64.triscv.vector.tuple_nxv32i8_2t(target("riscv.vector.tuple", <vscale x 32 x i8>, 2) %0, i32 1) - ret <vscale x 4 x i64> %1 + ret target("riscv.vector.tuple", <vscale x 32 x i8>, 2) %0 } - -declare target("riscv.vector.tuple", <vscale x 8 x i8>, 3) @llvm.riscv.vlseg3.mask.triscv.vector.tuple_nxv8i8_3t.nxv1i1(target("riscv.vector.tuple", <vscale x 8 x i8>, 3), ptr, <vscale x 1 x i1>, i32, i32, i32) - -define <vscale x 1 x i64> @test_vlseg3_nxv1i64_triscv.vector.tuple_nxv8i8_3t(ptr %base, i32 %vl) { +define target("riscv.vector.tuple", <vscale x 8 x i8>, 3) @test_vlseg3_nxv1i64_triscv.vector.tuple_nxv8i8_3t(ptr %base, i32 %vl) { ; CHECK-LABEL: test_vlseg3_nxv1i64_triscv.vector.tuple_nxv8i8_3t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e64, m1, ta, ma -; CHECK-NEXT: vlseg3e64.v v7, (a0) +; CHECK-NEXT: vlseg3e64.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 3) @llvm.riscv.vlseg3.triscv.vector.tuple_nxv8i8_3t(target("riscv.vector.tuple", <vscale x 8 x i8>, 3) undef, ptr %base, i32 %vl, i32 6) - %1 = call <vscale x 1 x i64> @llvm.riscv.tuple.extract.nxv1i64.triscv.vector.tuple_nxv8i8_3t(target("riscv.vector.tuple", <vscale x 8 x i8>, 3) %0, i32 1) - ret <vscale x 1 x i64> %1 + ret target("riscv.vector.tuple", <vscale x 8 x i8>, 3) %0 } - -define <vscale x 1 x i64> @test_vlseg3_mask_nxv1i64_triscv.vector.tuple_nxv8i8_3t(ptr %base, i32 %vl, <vscale x 1 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 8 x i8>, 3) @test_vlseg3_mask_nxv1i64_triscv.vector.tuple_nxv8i8_3t(ptr %base, i32 %vl, <vscale x 1 x i1> %mask) { ; CHECK-LABEL: test_vlseg3_mask_nxv1i64_triscv.vector.tuple_nxv8i8_3t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e64, m1, ta, ma -; CHECK-NEXT: vlseg3e64.v v7, (a0), v0.t +; CHECK-NEXT: vlseg3e64.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 3) @llvm.riscv.vlseg3.mask.triscv.vector.tuple_nxv8i8_3t.nxv1i1(target("riscv.vector.tuple", <vscale x 8 x i8>, 3) undef, ptr %base, <vscale x 1 x i1> %mask, i32 %vl, i32 1, i32 6) - %1 = call <vscale x 1 x i64> @llvm.riscv.tuple.extract.nxv1i64.triscv.vector.tuple_nxv8i8_3t(target("riscv.vector.tuple", <vscale x 8 x i8>, 3) %0, i32 1) - ret <vscale x 1 x i64> %1 + ret target("riscv.vector.tuple", <vscale x 8 x i8>, 3) %0 } - -declare target("riscv.vector.tuple", <vscale x 16 x i8>, 3) @llvm.riscv.vlseg3.mask.triscv.vector.tuple_nxv16i8_3t.nxv2i1(target("riscv.vector.tuple", <vscale x 16 x i8>, 3), ptr, <vscale x 2 x i1>, i32, i32, i32) - -define <vscale x 2 x i64> @test_vlseg3_nxv2i64_triscv.vector.tuple_nxv16i8_3t(ptr %base, i32 %vl) { +define target("riscv.vector.tuple", <vscale x 16 x i8>, 3) @test_vlseg3_nxv2i64_triscv.vector.tuple_nxv16i8_3t(ptr %base, i32 %vl) { ; CHECK-LABEL: test_vlseg3_nxv2i64_triscv.vector.tuple_nxv16i8_3t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e64, m2, ta, ma -; CHECK-NEXT: vlseg3e64.v v6, (a0) +; CHECK-NEXT: vlseg3e64.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 3) @llvm.riscv.vlseg3.triscv.vector.tuple_nxv16i8_3t(target("riscv.vector.tuple", <vscale x 16 x i8>, 3) undef, ptr %base, i32 %vl, i32 6) - %1 = call <vscale x 2 x i64> @llvm.riscv.tuple.extract.nxv2i64.triscv.vector.tuple_nxv16i8_3t(target("riscv.vector.tuple", <vscale x 16 x i8>, 3) %0, i32 1) - ret <vscale x 2 x i64> %1 + ret target("riscv.vector.tuple", <vscale x 16 x i8>, 3) %0 } - -define <vscale x 2 x i64> @test_vlseg3_mask_nxv2i64_triscv.vector.tuple_nxv16i8_3t(ptr %base, i32 %vl, <vscale x 2 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 16 x i8>, 3) @test_vlseg3_mask_nxv2i64_triscv.vector.tuple_nxv16i8_3t(ptr %base, i32 %vl, <vscale x 2 x i1> %mask) { ; CHECK-LABEL: test_vlseg3_mask_nxv2i64_triscv.vector.tuple_nxv16i8_3t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e64, m2, ta, ma -; CHECK-NEXT: vlseg3e64.v v6, (a0), v0.t +; CHECK-NEXT: vlseg3e64.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 3) @llvm.riscv.vlseg3.mask.triscv.vector.tuple_nxv16i8_3t.nxv2i1(target("riscv.vector.tuple", <vscale x 16 x i8>, 3) undef, ptr %base, <vscale x 2 x i1> %mask, i32 %vl, i32 1, i32 6) - %1 = call <vscale x 2 x i64> @llvm.riscv.tuple.extract.nxv2i64.triscv.vector.tuple_nxv16i8_3t(target("riscv.vector.tuple", <vscale x 16 x i8>, 3) %0, i32 1) - ret <vscale x 2 x i64> %1 + ret target("riscv.vector.tuple", <vscale x 16 x i8>, 3) %0 } - -declare target("riscv.vector.tuple", <vscale x 8 x i8>, 4) @llvm.riscv.vlseg4.mask.triscv.vector.tuple_nxv8i8_4t.nxv1i1(target("riscv.vector.tuple", <vscale x 8 x i8>, 4), ptr, <vscale x 1 x i1>, i32, i32, i32) - -define <vscale x 1 x i64> @test_vlseg4_nxv1i64_triscv.vector.tuple_nxv8i8_4t(ptr %base, i32 %vl) { +define target("riscv.vector.tuple", <vscale x 8 x i8>, 4) @test_vlseg4_nxv1i64_triscv.vector.tuple_nxv8i8_4t(ptr %base, i32 %vl) { ; CHECK-LABEL: test_vlseg4_nxv1i64_triscv.vector.tuple_nxv8i8_4t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e64, m1, ta, ma -; CHECK-NEXT: vlseg4e64.v v7, (a0) +; CHECK-NEXT: vlseg4e64.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 4) @llvm.riscv.vlseg4.triscv.vector.tuple_nxv8i8_4t(target("riscv.vector.tuple", <vscale x 8 x i8>, 4) undef, ptr %base, i32 %vl, i32 6) - %1 = call <vscale x 1 x i64> @llvm.riscv.tuple.extract.nxv1i64.triscv.vector.tuple_nxv8i8_4t(target("riscv.vector.tuple", <vscale x 8 x i8>, 4) %0, i32 1) - ret <vscale x 1 x i64> %1 + ret target("riscv.vector.tuple", <vscale x 8 x i8>, 4) %0 } - -define <vscale x 1 x i64> @test_vlseg4_mask_nxv1i64_triscv.vector.tuple_nxv8i8_4t(ptr %base, i32 %vl, <vscale x 1 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 8 x i8>, 4) @test_vlseg4_mask_nxv1i64_triscv.vector.tuple_nxv8i8_4t(ptr %base, i32 %vl, <vscale x 1 x i1> %mask) { ; CHECK-LABEL: test_vlseg4_mask_nxv1i64_triscv.vector.tuple_nxv8i8_4t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e64, m1, ta, ma -; CHECK-NEXT: vlseg4e64.v v7, (a0), v0.t +; CHECK-NEXT: vlseg4e64.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 4) @llvm.riscv.vlseg4.mask.triscv.vector.tuple_nxv8i8_4t.nxv1i1(target("riscv.vector.tuple", <vscale x 8 x i8>, 4) undef, ptr %base, <vscale x 1 x i1> %mask, i32 %vl, i32 1, i32 6) - %1 = call <vscale x 1 x i64> @llvm.riscv.tuple.extract.nxv1i64.triscv.vector.tuple_nxv8i8_4t(target("riscv.vector.tuple", <vscale x 8 x i8>, 4) %0, i32 1) - ret <vscale x 1 x i64> %1 + ret target("riscv.vector.tuple", <vscale x 8 x i8>, 4) %0 } - -declare target("riscv.vector.tuple", <vscale x 16 x i8>, 4) @llvm.riscv.vlseg4.mask.triscv.vector.tuple_nxv16i8_4t.nxv2i1(target("riscv.vector.tuple", <vscale x 16 x i8>, 4), ptr, <vscale x 2 x i1>, i32, i32, i32) - -define <vscale x 2 x i64> @test_vlseg4_nxv2i64_triscv.vector.tuple_nxv16i8_4t(ptr %base, i32 %vl) { +define target("riscv.vector.tuple", <vscale x 16 x i8>, 4) @test_vlseg4_nxv2i64_triscv.vector.tuple_nxv16i8_4t(ptr %base, i32 %vl) { ; CHECK-LABEL: test_vlseg4_nxv2i64_triscv.vector.tuple_nxv16i8_4t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e64, m2, ta, ma -; CHECK-NEXT: vlseg4e64.v v6, (a0) +; CHECK-NEXT: vlseg4e64.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 4) @llvm.riscv.vlseg4.triscv.vector.tuple_nxv16i8_4t(target("riscv.vector.tuple", <vscale x 16 x i8>, 4) undef, ptr %base, i32 %vl, i32 6) - %1 = call <vscale x 2 x i64> @llvm.riscv.tuple.extract.nxv2i64.triscv.vector.tuple_nxv16i8_4t(target("riscv.vector.tuple", <vscale x 16 x i8>, 4) %0, i32 1) - ret <vscale x 2 x i64> %1 + ret target("riscv.vector.tuple", <vscale x 16 x i8>, 4) %0 } - -define <vscale x 2 x i64> @test_vlseg4_mask_nxv2i64_triscv.vector.tuple_nxv16i8_4t(ptr %base, i32 %vl, <vscale x 2 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 16 x i8>, 4) @test_vlseg4_mask_nxv2i64_triscv.vector.tuple_nxv16i8_4t(ptr %base, i32 %vl, <vscale x 2 x i1> %mask) { ; CHECK-LABEL: test_vlseg4_mask_nxv2i64_triscv.vector.tuple_nxv16i8_4t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e64, m2, ta, ma -; CHECK-NEXT: vlseg4e64.v v6, (a0), v0.t +; CHECK-NEXT: vlseg4e64.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 4) @llvm.riscv.vlseg4.mask.triscv.vector.tuple_nxv16i8_4t.nxv2i1(target("riscv.vector.tuple", <vscale x 16 x i8>, 4) undef, ptr %base, <vscale x 2 x i1> %mask, i32 %vl, i32 1, i32 6) - %1 = call <vscale x 2 x i64> @llvm.riscv.tuple.extract.nxv2i64.triscv.vector.tuple_nxv16i8_4t(target("riscv.vector.tuple", <vscale x 16 x i8>, 4) %0, i32 1) - ret <vscale x 2 x i64> %1 + ret target("riscv.vector.tuple", <vscale x 16 x i8>, 4) %0 } - -declare target("riscv.vector.tuple", <vscale x 8 x i8>, 5) @llvm.riscv.vlseg5.mask.triscv.vector.tuple_nxv8i8_5t.nxv1i1(target("riscv.vector.tuple", <vscale x 8 x i8>, 5), ptr, <vscale x 1 x i1>, i32, i32, i32) - -define <vscale x 1 x i64> @test_vlseg5_nxv1i64_triscv.vector.tuple_nxv8i8_5t(ptr %base, i32 %vl) { +define target("riscv.vector.tuple", <vscale x 8 x i8>, 5) @test_vlseg5_nxv1i64_triscv.vector.tuple_nxv8i8_5t(ptr %base, i32 %vl) { ; CHECK-LABEL: test_vlseg5_nxv1i64_triscv.vector.tuple_nxv8i8_5t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e64, m1, ta, ma -; CHECK-NEXT: vlseg5e64.v v7, (a0) +; CHECK-NEXT: vlseg5e64.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 5) @llvm.riscv.vlseg5.triscv.vector.tuple_nxv8i8_5t(target("riscv.vector.tuple", <vscale x 8 x i8>, 5) undef, ptr %base, i32 %vl, i32 6) - %1 = call <vscale x 1 x i64> @llvm.riscv.tuple.extract.nxv1i64.triscv.vector.tuple_nxv8i8_5t(target("riscv.vector.tuple", <vscale x 8 x i8>, 5) %0, i32 1) - ret <vscale x 1 x i64> %1 + ret target("riscv.vector.tuple", <vscale x 8 x i8>, 5) %0 } - -define <vscale x 1 x i64> @test_vlseg5_mask_nxv1i64_triscv.vector.tuple_nxv8i8_5t(ptr %base, i32 %vl, <vscale x 1 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 8 x i8>, 5) @test_vlseg5_mask_nxv1i64_triscv.vector.tuple_nxv8i8_5t(ptr %base, i32 %vl, <vscale x 1 x i1> %mask) { ; CHECK-LABEL: test_vlseg5_mask_nxv1i64_triscv.vector.tuple_nxv8i8_5t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e64, m1, ta, ma -; CHECK-NEXT: vlseg5e64.v v7, (a0), v0.t +; CHECK-NEXT: vlseg5e64.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 5) @llvm.riscv.vlseg5.mask.triscv.vector.tuple_nxv8i8_5t.nxv1i1(target("riscv.vector.tuple", <vscale x 8 x i8>, 5) undef, ptr %base, <vscale x 1 x i1> %mask, i32 %vl, i32 1, i32 6) - %1 = call <vscale x 1 x i64> @llvm.riscv.tuple.extract.nxv1i64.triscv.vector.tuple_nxv8i8_5t(target("riscv.vector.tuple", <vscale x 8 x i8>, 5) %0, i32 1) - ret <vscale x 1 x i64> %1 + ret target("riscv.vector.tuple", <vscale x 8 x i8>, 5) %0 } - -declare target("riscv.vector.tuple", <vscale x 8 x i8>, 6) @llvm.riscv.vlseg6.mask.triscv.vector.tuple_nxv8i8_6t.nxv1i1(target("riscv.vector.tuple", <vscale x 8 x i8>, 6), ptr, <vscale x 1 x i1>, i32, i32, i32) - -define <vscale x 1 x i64> @test_vlseg6_nxv1i64_triscv.vector.tuple_nxv8i8_6t(ptr %base, i32 %vl) { +define target("riscv.vector.tuple", <vscale x 8 x i8>, 6) @test_vlseg6_nxv1i64_triscv.vector.tuple_nxv8i8_6t(ptr %base, i32 %vl) { ; CHECK-LABEL: test_vlseg6_nxv1i64_triscv.vector.tuple_nxv8i8_6t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e64, m1, ta, ma -; CHECK-NEXT: vlseg6e64.v v7, (a0) +; CHECK-NEXT: vlseg6e64.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 6) @llvm.riscv.vlseg6.triscv.vector.tuple_nxv8i8_6t(target("riscv.vector.tuple", <vscale x 8 x i8>, 6) undef, ptr %base, i32 %vl, i32 6) - %1 = call <vscale x 1 x i64> @llvm.riscv.tuple.extract.nxv1i64.triscv.vector.tuple_nxv8i8_6t(target("riscv.vector.tuple", <vscale x 8 x i8>, 6) %0, i32 1) - ret <vscale x 1 x i64> %1 + ret target("riscv.vector.tuple", <vscale x 8 x i8>, 6) %0 } - -define <vscale x 1 x i64> @test_vlseg6_mask_nxv1i64_triscv.vector.tuple_nxv8i8_6t(ptr %base, i32 %vl, <vscale x 1 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 8 x i8>, 6) @test_vlseg6_mask_nxv1i64_triscv.vector.tuple_nxv8i8_6t(ptr %base, i32 %vl, <vscale x 1 x i1> %mask) { ; CHECK-LABEL: test_vlseg6_mask_nxv1i64_triscv.vector.tuple_nxv8i8_6t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e64, m1, ta, ma -; CHECK-NEXT: vlseg6e64.v v7, (a0), v0.t +; CHECK-NEXT: vlseg6e64.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 6) @llvm.riscv.vlseg6.mask.triscv.vector.tuple_nxv8i8_6t.nxv1i1(target("riscv.vector.tuple", <vscale x 8 x i8>, 6) undef, ptr %base, <vscale x 1 x i1> %mask, i32 %vl, i32 1, i32 6) - %1 = call <vscale x 1 x i64> @llvm.riscv.tuple.extract.nxv1i64.triscv.vector.tuple_nxv8i8_6t(target("riscv.vector.tuple", <vscale x 8 x i8>, 6) %0, i32 1) - ret <vscale x 1 x i64> %1 + ret target("riscv.vector.tuple", <vscale x 8 x i8>, 6) %0 } - -declare target("riscv.vector.tuple", <vscale x 8 x i8>, 7) @llvm.riscv.vlseg7.mask.triscv.vector.tuple_nxv8i8_7t.nxv1i1(target("riscv.vector.tuple", <vscale x 8 x i8>, 7), ptr, <vscale x 1 x i1>, i32, i32, i32) - -define <vscale x 1 x i64> @test_vlseg7_nxv1i64_triscv.vector.tuple_nxv8i8_7t(ptr %base, i32 %vl) { +define target("riscv.vector.tuple", <vscale x 8 x i8>, 7) @test_vlseg7_nxv1i64_triscv.vector.tuple_nxv8i8_7t(ptr %base, i32 %vl) { ; CHECK-LABEL: test_vlseg7_nxv1i64_triscv.vector.tuple_nxv8i8_7t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e64, m1, ta, ma -; CHECK-NEXT: vlseg7e64.v v7, (a0) +; CHECK-NEXT: vlseg7e64.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 7) @llvm.riscv.vlseg7.triscv.vector.tuple_nxv8i8_7t(target("riscv.vector.tuple", <vscale x 8 x i8>, 7) undef, ptr %base, i32 %vl, i32 6) - %1 = call <vscale x 1 x i64> @llvm.riscv.tuple.extract.nxv1i64.triscv.vector.tuple_nxv8i8_7t(target("riscv.vector.tuple", <vscale x 8 x i8>, 7) %0, i32 1) - ret <vscale x 1 x i64> %1 + ret target("riscv.vector.tuple", <vscale x 8 x i8>, 7) %0 } - -define <vscale x 1 x i64> @test_vlseg7_mask_nxv1i64_triscv.vector.tuple_nxv8i8_7t(ptr %base, i32 %vl, <vscale x 1 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 8 x i8>, 7) @test_vlseg7_mask_nxv1i64_triscv.vector.tuple_nxv8i8_7t(ptr %base, i32 %vl, <vscale x 1 x i1> %mask) { ; CHECK-LABEL: test_vlseg7_mask_nxv1i64_triscv.vector.tuple_nxv8i8_7t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e64, m1, ta, ma -; CHECK-NEXT: vlseg7e64.v v7, (a0), v0.t +; CHECK-NEXT: vlseg7e64.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 7) @llvm.riscv.vlseg7.mask.triscv.vector.tuple_nxv8i8_7t.nxv1i1(target("riscv.vector.tuple", <vscale x 8 x i8>, 7) undef, ptr %base, <vscale x 1 x i1> %mask, i32 %vl, i32 1, i32 6) - %1 = call <vscale x 1 x i64> @llvm.riscv.tuple.extract.nxv1i64.triscv.vector.tuple_nxv8i8_7t(target("riscv.vector.tuple", <vscale x 8 x i8>, 7) %0, i32 1) - ret <vscale x 1 x i64> %1 + ret target("riscv.vector.tuple", <vscale x 8 x i8>, 7) %0 } - -declare target("riscv.vector.tuple", <vscale x 8 x i8>, 8) @llvm.riscv.vlseg8.mask.triscv.vector.tuple_nxv8i8_8t.nxv1i1(target("riscv.vector.tuple", <vscale x 8 x i8>, 8), ptr, <vscale x 1 x i1>, i32, i32, i32) - -define <vscale x 1 x i64> @test_vlseg8_nxv1i64_triscv.vector.tuple_nxv8i8_8t(ptr %base, i32 %vl) { +define target("riscv.vector.tuple", <vscale x 8 x i8>, 8) @test_vlseg8_nxv1i64_triscv.vector.tuple_nxv8i8_8t(ptr %base, i32 %vl) { ; CHECK-LABEL: test_vlseg8_nxv1i64_triscv.vector.tuple_nxv8i8_8t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e64, m1, ta, ma -; CHECK-NEXT: vlseg8e64.v v7, (a0) +; CHECK-NEXT: vlseg8e64.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 8) @llvm.riscv.vlseg8.triscv.vector.tuple_nxv8i8_8t(target("riscv.vector.tuple", <vscale x 8 x i8>, 8) undef, ptr %base, i32 %vl, i32 6) - %1 = call <vscale x 1 x i64> @llvm.riscv.tuple.extract.nxv1i64.triscv.vector.tuple_nxv8i8_8t(target("riscv.vector.tuple", <vscale x 8 x i8>, 8) %0, i32 1) - ret <vscale x 1 x i64> %1 + ret target("riscv.vector.tuple", <vscale x 8 x i8>, 8) %0 } - -define <vscale x 1 x i64> @test_vlseg8_mask_nxv1i64_triscv.vector.tuple_nxv8i8_8t(ptr %base, i32 %vl, <vscale x 1 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 8 x i8>, 8) @test_vlseg8_mask_nxv1i64_triscv.vector.tuple_nxv8i8_8t(ptr %base, i32 %vl, <vscale x 1 x i1> %mask) { ; CHECK-LABEL: test_vlseg8_mask_nxv1i64_triscv.vector.tuple_nxv8i8_8t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e64, m1, ta, ma -; CHECK-NEXT: vlseg8e64.v v7, (a0), v0.t +; CHECK-NEXT: vlseg8e64.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 8) @llvm.riscv.vlseg8.mask.triscv.vector.tuple_nxv8i8_8t.nxv1i1(target("riscv.vector.tuple", <vscale x 8 x i8>, 8) undef, ptr %base, <vscale x 1 x i1> %mask, i32 %vl, i32 1, i32 6) - %1 = call <vscale x 1 x i64> @llvm.riscv.tuple.extract.nxv1i64.triscv.vector.tuple_nxv8i8_8t(target("riscv.vector.tuple", <vscale x 8 x i8>, 8) %0, i32 1) - ret <vscale x 1 x i64> %1 + ret target("riscv.vector.tuple", <vscale x 8 x i8>, 8) %0 } - - -define <vscale x 1 x half> @test_vlseg2_nxv1f16_triscv.vector.tuple_nxv2i8_2t(ptr %base, i32 %vl) { +define target("riscv.vector.tuple", <vscale x 2 x i8>, 2) @test_vlseg2_nxv1f16_triscv.vector.tuple_nxv2i8_2t(ptr %base, i32 %vl) { ; CHECK-LABEL: test_vlseg2_nxv1f16_triscv.vector.tuple_nxv2i8_2t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, ma -; CHECK-NEXT: vlseg2e16.v v7, (a0) +; CHECK-NEXT: vlseg2e16.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 2 x i8>, 2) @llvm.riscv.vlseg2.triscv.vector.tuple_nxv2i8_2t(target("riscv.vector.tuple", <vscale x 2 x i8>, 2) undef, ptr %base, i32 %vl, i32 4) - %1 = call <vscale x 1 x half> @llvm.riscv.tuple.extract.nxv1f16.triscv.vector.tuple_nxv2i8_2t(target("riscv.vector.tuple", <vscale x 2 x i8>, 2) %0, i32 1) - ret <vscale x 1 x half> %1 + ret target("riscv.vector.tuple", <vscale x 2 x i8>, 2) %0 } - -define <vscale x 1 x half> @test_vlseg2_mask_nxv1f16_triscv.vector.tuple_nxv2i8_2t(ptr %base, i32 %vl, <vscale x 1 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 2 x i8>, 2) @test_vlseg2_mask_nxv1f16_triscv.vector.tuple_nxv2i8_2t(ptr %base, i32 %vl, <vscale x 1 x i1> %mask) { ; CHECK-LABEL: test_vlseg2_mask_nxv1f16_triscv.vector.tuple_nxv2i8_2t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, ma -; CHECK-NEXT: vlseg2e16.v v7, (a0), v0.t +; CHECK-NEXT: vlseg2e16.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 2 x i8>, 2) @llvm.riscv.vlseg2.mask.triscv.vector.tuple_nxv2i8_2t.nxv1i1(target("riscv.vector.tuple", <vscale x 2 x i8>, 2) undef, ptr %base, <vscale x 1 x i1> %mask, i32 %vl, i32 1, i32 4) - %1 = call <vscale x 1 x half> @llvm.riscv.tuple.extract.nxv1f16.triscv.vector.tuple_nxv2i8_2t(target("riscv.vector.tuple", <vscale x 2 x i8>, 2) %0, i32 1) - ret <vscale x 1 x half> %1 + ret target("riscv.vector.tuple", <vscale x 2 x i8>, 2) %0 } - - -define <vscale x 2 x half> @test_vlseg2_nxv2f16_triscv.vector.tuple_nxv4i8_2t(ptr %base, i32 %vl) { +define target("riscv.vector.tuple", <vscale x 4 x i8>, 2) @test_vlseg2_nxv2f16_triscv.vector.tuple_nxv4i8_2t(ptr %base, i32 %vl) { ; CHECK-LABEL: test_vlseg2_nxv2f16_triscv.vector.tuple_nxv4i8_2t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, ma -; CHECK-NEXT: vlseg2e16.v v7, (a0) +; CHECK-NEXT: vlseg2e16.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 2) @llvm.riscv.vlseg2.triscv.vector.tuple_nxv4i8_2t(target("riscv.vector.tuple", <vscale x 4 x i8>, 2) undef, ptr %base, i32 %vl, i32 4) - %1 = call <vscale x 2 x half> @llvm.riscv.tuple.extract.nxv2f16.triscv.vector.tuple_nxv4i8_2t(target("riscv.vector.tuple", <vscale x 4 x i8>, 2) %0, i32 1) - ret <vscale x 2 x half> %1 + ret target("riscv.vector.tuple", <vscale x 4 x i8>, 2) %0 } - -define <vscale x 2 x half> @test_vlseg2_mask_nxv2f16_triscv.vector.tuple_nxv4i8_2t(ptr %base, i32 %vl, <vscale x 2 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 4 x i8>, 2) @test_vlseg2_mask_nxv2f16_triscv.vector.tuple_nxv4i8_2t(ptr %base, i32 %vl, <vscale x 2 x i1> %mask) { ; CHECK-LABEL: test_vlseg2_mask_nxv2f16_triscv.vector.tuple_nxv4i8_2t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, ma -; CHECK-NEXT: vlseg2e16.v v7, (a0), v0.t +; CHECK-NEXT: vlseg2e16.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 2) @llvm.riscv.vlseg2.mask.triscv.vector.tuple_nxv4i8_2t.nxv2i1(target("riscv.vector.tuple", <vscale x 4 x i8>, 2) undef, ptr %base, <vscale x 2 x i1> %mask, i32 %vl, i32 1, i32 4) - %1 = call <vscale x 2 x half> @llvm.riscv.tuple.extract.nxv2f16.triscv.vector.tuple_nxv4i8_2t(target("riscv.vector.tuple", <vscale x 4 x i8>, 2) %0, i32 1) - ret <vscale x 2 x half> %1 + ret target("riscv.vector.tuple", <vscale x 4 x i8>, 2) %0 } - - -define <vscale x 4 x half> @test_vlseg2_nxv4f16_triscv.vector.tuple_nxv8i8_2t(ptr %base, i32 %vl) { +define target("riscv.vector.tuple", <vscale x 8 x i8>, 2) @test_vlseg2_nxv4f16_triscv.vector.tuple_nxv8i8_2t(ptr %base, i32 %vl) { ; CHECK-LABEL: test_vlseg2_nxv4f16_triscv.vector.tuple_nxv8i8_2t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma -; CHECK-NEXT: vlseg2e16.v v7, (a0) +; CHECK-NEXT: vlseg2e16.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 2) @llvm.riscv.vlseg2.triscv.vector.tuple_nxv8i8_2t(target("riscv.vector.tuple", <vscale x 8 x i8>, 2) undef, ptr %base, i32 %vl, i32 4) - %1 = call <vscale x 4 x half> @llvm.riscv.tuple.extract.nxv4f16.triscv.vector.tuple_nxv8i8_2t(target("riscv.vector.tuple", <vscale x 8 x i8>, 2) %0, i32 1) - ret <vscale x 4 x half> %1 + ret target("riscv.vector.tuple", <vscale x 8 x i8>, 2) %0 } - -define <vscale x 4 x half> @test_vlseg2_mask_nxv4f16_triscv.vector.tuple_nxv8i8_2t(ptr %base, i32 %vl, <vscale x 4 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 8 x i8>, 2) @test_vlseg2_mask_nxv4f16_triscv.vector.tuple_nxv8i8_2t(ptr %base, i32 %vl, <vscale x 4 x i1> %mask) { ; CHECK-LABEL: test_vlseg2_mask_nxv4f16_triscv.vector.tuple_nxv8i8_2t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma -; CHECK-NEXT: vlseg2e16.v v7, (a0), v0.t +; CHECK-NEXT: vlseg2e16.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 2) @llvm.riscv.vlseg2.mask.triscv.vector.tuple_nxv8i8_2t.nxv4i1(target("riscv.vector.tuple", <vscale x 8 x i8>, 2) undef, ptr %base, <vscale x 4 x i1> %mask, i32 %vl, i32 1, i32 4) - %1 = call <vscale x 4 x half> @llvm.riscv.tuple.extract.nxv4f16.triscv.vector.tuple_nxv8i8_2t(target("riscv.vector.tuple", <vscale x 8 x i8>, 2) %0, i32 1) - ret <vscale x 4 x half> %1 + ret target("riscv.vector.tuple", <vscale x 8 x i8>, 2) %0 } - - -define <vscale x 8 x half> @test_vlseg2_nxv8f16_triscv.vector.tuple_nxv16i8_2t(ptr %base, i32 %vl) { +define target("riscv.vector.tuple", <vscale x 16 x i8>, 2) @test_vlseg2_nxv8f16_triscv.vector.tuple_nxv16i8_2t(ptr %base, i32 %vl) { ; CHECK-LABEL: test_vlseg2_nxv8f16_triscv.vector.tuple_nxv16i8_2t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, ma -; CHECK-NEXT: vlseg2e16.v v6, (a0) +; CHECK-NEXT: vlseg2e16.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 2) @llvm.riscv.vlseg2.triscv.vector.tuple_nxv16i8_2t(target("riscv.vector.tuple", <vscale x 16 x i8>, 2) undef, ptr %base, i32 %vl, i32 4) - %1 = call <vscale x 8 x half> @llvm.riscv.tuple.extract.nxv8f16.triscv.vector.tuple_nxv16i8_2t(target("riscv.vector.tuple", <vscale x 16 x i8>, 2) %0, i32 1) - ret <vscale x 8 x half> %1 + ret target("riscv.vector.tuple", <vscale x 16 x i8>, 2) %0 } - -define <vscale x 8 x half> @test_vlseg2_mask_nxv8f16_triscv.vector.tuple_nxv16i8_2t(ptr %base, i32 %vl, <vscale x 8 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 16 x i8>, 2) @test_vlseg2_mask_nxv8f16_triscv.vector.tuple_nxv16i8_2t(ptr %base, i32 %vl, <vscale x 8 x i1> %mask) { ; CHECK-LABEL: test_vlseg2_mask_nxv8f16_triscv.vector.tuple_nxv16i8_2t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, ma -; CHECK-NEXT: vlseg2e16.v v6, (a0), v0.t +; CHECK-NEXT: vlseg2e16.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 2) @llvm.riscv.vlseg2.mask.triscv.vector.tuple_nxv16i8_2t.nxv8i1(target("riscv.vector.tuple", <vscale x 16 x i8>, 2) undef, ptr %base, <vscale x 8 x i1> %mask, i32 %vl, i32 1, i32 4) - %1 = call <vscale x 8 x half> @llvm.riscv.tuple.extract.nxv8f16.triscv.vector.tuple_nxv16i8_2t(target("riscv.vector.tuple", <vscale x 16 x i8>, 2) %0, i32 1) - ret <vscale x 8 x half> %1 + ret target("riscv.vector.tuple", <vscale x 16 x i8>, 2) %0 } - - -define <vscale x 16 x half> @test_vlseg2_nxv16f16_triscv.vector.tuple_nxv32i8_2t(ptr %base, i32 %vl) { +define target("riscv.vector.tuple", <vscale x 32 x i8>, 2) @test_vlseg2_nxv16f16_triscv.vector.tuple_nxv32i8_2t(ptr %base, i32 %vl) { ; CHECK-LABEL: test_vlseg2_nxv16f16_triscv.vector.tuple_nxv32i8_2t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, ma -; CHECK-NEXT: vlseg2e16.v v4, (a0) +; CHECK-NEXT: vlseg2e16.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 32 x i8>, 2) @llvm.riscv.vlseg2.triscv.vector.tuple_nxv32i8_2t(target("riscv.vector.tuple", <vscale x 32 x i8>, 2) undef, ptr %base, i32 %vl, i32 4) - %1 = call <vscale x 16 x half> @llvm.riscv.tuple.extract.nxv16f16.triscv.vector.tuple_nxv32i8_2t(target("riscv.vector.tuple", <vscale x 32 x i8>, 2) %0, i32 1) - ret <vscale x 16 x half> %1 + ret target("riscv.vector.tuple", <vscale x 32 x i8>, 2) %0 } - -define <vscale x 16 x half> @test_vlseg2_mask_nxv16f16_triscv.vector.tuple_nxv32i8_2t(ptr %base, i32 %vl, <vscale x 16 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 32 x i8>, 2) @test_vlseg2_mask_nxv16f16_triscv.vector.tuple_nxv32i8_2t(ptr %base, i32 %vl, <vscale x 16 x i1> %mask) { ; CHECK-LABEL: test_vlseg2_mask_nxv16f16_triscv.vector.tuple_nxv32i8_2t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, ma -; CHECK-NEXT: vlseg2e16.v v4, (a0), v0.t +; CHECK-NEXT: vlseg2e16.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 32 x i8>, 2) @llvm.riscv.vlseg2.mask.triscv.vector.tuple_nxv32i8_2t.nxv16i1(target("riscv.vector.tuple", <vscale x 32 x i8>, 2) undef, ptr %base, <vscale x 16 x i1> %mask, i32 %vl, i32 1, i32 4) - %1 = call <vscale x 16 x half> @llvm.riscv.tuple.extract.nxv16f16.triscv.vector.tuple_nxv32i8_2t(target("riscv.vector.tuple", <vscale x 32 x i8>, 2) %0, i32 1) - ret <vscale x 16 x half> %1 + ret target("riscv.vector.tuple", <vscale x 32 x i8>, 2) %0 } - - -define <vscale x 1 x half> @test_vlseg3_nxv1f16_triscv.vector.tuple_nxv2i8_3t(ptr %base, i32 %vl) { +define target("riscv.vector.tuple", <vscale x 2 x i8>, 3) @test_vlseg3_nxv1f16_triscv.vector.tuple_nxv2i8_3t(ptr %base, i32 %vl) { ; CHECK-LABEL: test_vlseg3_nxv1f16_triscv.vector.tuple_nxv2i8_3t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, ma -; CHECK-NEXT: vlseg3e16.v v7, (a0) +; CHECK-NEXT: vlseg3e16.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 2 x i8>, 3) @llvm.riscv.vlseg3.triscv.vector.tuple_nxv2i8_3t(target("riscv.vector.tuple", <vscale x 2 x i8>, 3) undef, ptr %base, i32 %vl, i32 4) - %1 = call <vscale x 1 x half> @llvm.riscv.tuple.extract.nxv1f16.triscv.vector.tuple_nxv2i8_3t(target("riscv.vector.tuple", <vscale x 2 x i8>, 3) %0, i32 1) - ret <vscale x 1 x half> %1 + ret target("riscv.vector.tuple", <vscale x 2 x i8>, 3) %0 } - -define <vscale x 1 x half> @test_vlseg3_mask_nxv1f16_triscv.vector.tuple_nxv2i8_3t(ptr %base, i32 %vl, <vscale x 1 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 2 x i8>, 3) @test_vlseg3_mask_nxv1f16_triscv.vector.tuple_nxv2i8_3t(ptr %base, i32 %vl, <vscale x 1 x i1> %mask) { ; CHECK-LABEL: test_vlseg3_mask_nxv1f16_triscv.vector.tuple_nxv2i8_3t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, ma -; CHECK-NEXT: vlseg3e16.v v7, (a0), v0.t +; CHECK-NEXT: vlseg3e16.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 2 x i8>, 3) @llvm.riscv.vlseg3.mask.triscv.vector.tuple_nxv2i8_3t.nxv1i1(target("riscv.vector.tuple", <vscale x 2 x i8>, 3) undef, ptr %base, <vscale x 1 x i1> %mask, i32 %vl, i32 1, i32 4) - %1 = call <vscale x 1 x half> @llvm.riscv.tuple.extract.nxv1f16.triscv.vector.tuple_nxv2i8_3t(target("riscv.vector.tuple", <vscale x 2 x i8>, 3) %0, i32 1) - ret <vscale x 1 x half> %1 + ret target("riscv.vector.tuple", <vscale x 2 x i8>, 3) %0 } - - -define <vscale x 2 x half> @test_vlseg3_nxv2f16_triscv.vector.tuple_nxv4i8_3t(ptr %base, i32 %vl) { +define target("riscv.vector.tuple", <vscale x 4 x i8>, 3) @test_vlseg3_nxv2f16_triscv.vector.tuple_nxv4i8_3t(ptr %base, i32 %vl) { ; CHECK-LABEL: test_vlseg3_nxv2f16_triscv.vector.tuple_nxv4i8_3t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, ma -; CHECK-NEXT: vlseg3e16.v v7, (a0) +; CHECK-NEXT: vlseg3e16.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 3) @llvm.riscv.vlseg3.triscv.vector.tuple_nxv4i8_3t(target("riscv.vector.tuple", <vscale x 4 x i8>, 3) undef, ptr %base, i32 %vl, i32 4) - %1 = call <vscale x 2 x half> @llvm.riscv.tuple.extract.nxv2f16.triscv.vector.tuple_nxv4i8_3t(target("riscv.vector.tuple", <vscale x 4 x i8>, 3) %0, i32 1) - ret <vscale x 2 x half> %1 + ret target("riscv.vector.tuple", <vscale x 4 x i8>, 3) %0 } - -define <vscale x 2 x half> @test_vlseg3_mask_nxv2f16_triscv.vector.tuple_nxv4i8_3t(ptr %base, i32 %vl, <vscale x 2 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 4 x i8>, 3) @test_vlseg3_mask_nxv2f16_triscv.vector.tuple_nxv4i8_3t(ptr %base, i32 %vl, <vscale x 2 x i1> %mask) { ; CHECK-LABEL: test_vlseg3_mask_nxv2f16_triscv.vector.tuple_nxv4i8_3t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, ma -; CHECK-NEXT: vlseg3e16.v v7, (a0), v0.t +; CHECK-NEXT: vlseg3e16.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 3) @llvm.riscv.vlseg3.mask.triscv.vector.tuple_nxv4i8_3t.nxv2i1(target("riscv.vector.tuple", <vscale x 4 x i8>, 3) undef, ptr %base, <vscale x 2 x i1> %mask, i32 %vl, i32 1, i32 4) - %1 = call <vscale x 2 x half> @llvm.riscv.tuple.extract.nxv2f16.triscv.vector.tuple_nxv4i8_3t(target("riscv.vector.tuple", <vscale x 4 x i8>, 3) %0, i32 1) - ret <vscale x 2 x half> %1 + ret target("riscv.vector.tuple", <vscale x 4 x i8>, 3) %0 } - - -define <vscale x 4 x half> @test_vlseg3_nxv4f16_triscv.vector.tuple_nxv8i8_3t(ptr %base, i32 %vl) { +define target("riscv.vector.tuple", <vscale x 8 x i8>, 3) @test_vlseg3_nxv4f16_triscv.vector.tuple_nxv8i8_3t(ptr %base, i32 %vl) { ; CHECK-LABEL: test_vlseg3_nxv4f16_triscv.vector.tuple_nxv8i8_3t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma -; CHECK-NEXT: vlseg3e16.v v7, (a0) +; CHECK-NEXT: vlseg3e16.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 3) @llvm.riscv.vlseg3.triscv.vector.tuple_nxv8i8_3t(target("riscv.vector.tuple", <vscale x 8 x i8>, 3) undef, ptr %base, i32 %vl, i32 4) - %1 = call <vscale x 4 x half> @llvm.riscv.tuple.extract.nxv4f16.triscv.vector.tuple_nxv8i8_3t(target("riscv.vector.tuple", <vscale x 8 x i8>, 3) %0, i32 1) - ret <vscale x 4 x half> %1 + ret target("riscv.vector.tuple", <vscale x 8 x i8>, 3) %0 } - -define <vscale x 4 x half> @test_vlseg3_mask_nxv4f16_triscv.vector.tuple_nxv8i8_3t(ptr %base, i32 %vl, <vscale x 4 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 8 x i8>, 3) @test_vlseg3_mask_nxv4f16_triscv.vector.tuple_nxv8i8_3t(ptr %base, i32 %vl, <vscale x 4 x i1> %mask) { ; CHECK-LABEL: test_vlseg3_mask_nxv4f16_triscv.vector.tuple_nxv8i8_3t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma -; CHECK-NEXT: vlseg3e16.v v7, (a0), v0.t +; CHECK-NEXT: vlseg3e16.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 3) @llvm.riscv.vlseg3.mask.triscv.vector.tuple_nxv8i8_3t.nxv4i1(target("riscv.vector.tuple", <vscale x 8 x i8>, 3) undef, ptr %base, <vscale x 4 x i1> %mask, i32 %vl, i32 1, i32 4) - %1 = call <vscale x 4 x half> @llvm.riscv.tuple.extract.nxv4f16.triscv.vector.tuple_nxv8i8_3t(target("riscv.vector.tuple", <vscale x 8 x i8>, 3) %0, i32 1) - ret <vscale x 4 x half> %1 + ret target("riscv.vector.tuple", <vscale x 8 x i8>, 3) %0 } - - -define <vscale x 8 x half> @test_vlseg3_nxv8f16_triscv.vector.tuple_nxv16i8_3t(ptr %base, i32 %vl) { +define target("riscv.vector.tuple", <vscale x 16 x i8>, 3) @test_vlseg3_nxv8f16_triscv.vector.tuple_nxv16i8_3t(ptr %base, i32 %vl) { ; CHECK-LABEL: test_vlseg3_nxv8f16_triscv.vector.tuple_nxv16i8_3t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, ma -; CHECK-NEXT: vlseg3e16.v v6, (a0) +; CHECK-NEXT: vlseg3e16.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 3) @llvm.riscv.vlseg3.triscv.vector.tuple_nxv16i8_3t(target("riscv.vector.tuple", <vscale x 16 x i8>, 3) undef, ptr %base, i32 %vl, i32 4) - %1 = call <vscale x 8 x half> @llvm.riscv.tuple.extract.nxv8f16.triscv.vector.tuple_nxv16i8_3t(target("riscv.vector.tuple", <vscale x 16 x i8>, 3) %0, i32 1) - ret <vscale x 8 x half> %1 + ret target("riscv.vector.tuple", <vscale x 16 x i8>, 3) %0 } - -define <vscale x 8 x half> @test_vlseg3_mask_nxv8f16_triscv.vector.tuple_nxv16i8_3t(ptr %base, i32 %vl, <vscale x 8 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 16 x i8>, 3) @test_vlseg3_mask_nxv8f16_triscv.vector.tuple_nxv16i8_3t(ptr %base, i32 %vl, <vscale x 8 x i1> %mask) { ; CHECK-LABEL: test_vlseg3_mask_nxv8f16_triscv.vector.tuple_nxv16i8_3t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, ma -; CHECK-NEXT: vlseg3e16.v v6, (a0), v0.t +; CHECK-NEXT: vlseg3e16.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 3) @llvm.riscv.vlseg3.mask.triscv.vector.tuple_nxv16i8_3t.nxv8i1(target("riscv.vector.tuple", <vscale x 16 x i8>, 3) undef, ptr %base, <vscale x 8 x i1> %mask, i32 %vl, i32 1, i32 4) - %1 = call <vscale x 8 x half> @llvm.riscv.tuple.extract.nxv8f16.triscv.vector.tuple_nxv16i8_3t(target("riscv.vector.tuple", <vscale x 16 x i8>, 3) %0, i32 1) - ret <vscale x 8 x half> %1 + ret target("riscv.vector.tuple", <vscale x 16 x i8>, 3) %0 } - - -define <vscale x 1 x half> @test_vlseg4_nxv1f16_triscv.vector.tuple_nxv2i8_4t(ptr %base, i32 %vl) { +define target("riscv.vector.tuple", <vscale x 2 x i8>, 4) @test_vlseg4_nxv1f16_triscv.vector.tuple_nxv2i8_4t(ptr %base, i32 %vl) { ; CHECK-LABEL: test_vlseg4_nxv1f16_triscv.vector.tuple_nxv2i8_4t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, ma -; CHECK-NEXT: vlseg4e16.v v7, (a0) +; CHECK-NEXT: vlseg4e16.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 2 x i8>, 4) @llvm.riscv.vlseg4.triscv.vector.tuple_nxv2i8_4t(target("riscv.vector.tuple", <vscale x 2 x i8>, 4) undef, ptr %base, i32 %vl, i32 4) - %1 = call <vscale x 1 x half> @llvm.riscv.tuple.extract.nxv1f16.triscv.vector.tuple_nxv2i8_4t(target("riscv.vector.tuple", <vscale x 2 x i8>, 4) %0, i32 1) - ret <vscale x 1 x half> %1 + ret target("riscv.vector.tuple", <vscale x 2 x i8>, 4) %0 } - -define <vscale x 1 x half> @test_vlseg4_mask_nxv1f16_triscv.vector.tuple_nxv2i8_4t(ptr %base, i32 %vl, <vscale x 1 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 2 x i8>, 4) @test_vlseg4_mask_nxv1f16_triscv.vector.tuple_nxv2i8_4t(ptr %base, i32 %vl, <vscale x 1 x i1> %mask) { ; CHECK-LABEL: test_vlseg4_mask_nxv1f16_triscv.vector.tuple_nxv2i8_4t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, ma -; CHECK-NEXT: vlseg4e16.v v7, (a0), v0.t +; CHECK-NEXT: vlseg4e16.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 2 x i8>, 4) @llvm.riscv.vlseg4.mask.triscv.vector.tuple_nxv2i8_4t.nxv1i1(target("riscv.vector.tuple", <vscale x 2 x i8>, 4) undef, ptr %base, <vscale x 1 x i1> %mask, i32 %vl, i32 1, i32 4) - %1 = call <vscale x 1 x half> @llvm.riscv.tuple.extract.nxv1f16.triscv.vector.tuple_nxv2i8_4t(target("riscv.vector.tuple", <vscale x 2 x i8>, 4) %0, i32 1) - ret <vscale x 1 x half> %1 + ret target("riscv.vector.tuple", <vscale x 2 x i8>, 4) %0 } - - -define <vscale x 2 x half> @test_vlseg4_nxv2f16_triscv.vector.tuple_nxv4i8_4t(ptr %base, i32 %vl) { +define target("riscv.vector.tuple", <vscale x 4 x i8>, 4) @test_vlseg4_nxv2f16_triscv.vector.tuple_nxv4i8_4t(ptr %base, i32 %vl) { ; CHECK-LABEL: test_vlseg4_nxv2f16_triscv.vector.tuple_nxv4i8_4t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, ma -; CHECK-NEXT: vlseg4e16.v v7, (a0) +; CHECK-NEXT: vlseg4e16.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 4) @llvm.riscv.vlseg4.triscv.vector.tuple_nxv4i8_4t(target("riscv.vector.tuple", <vscale x 4 x i8>, 4) undef, ptr %base, i32 %vl, i32 4) - %1 = call <vscale x 2 x half> @llvm.riscv.tuple.extract.nxv2f16.triscv.vector.tuple_nxv4i8_4t(target("riscv.vector.tuple", <vscale x 4 x i8>, 4) %0, i32 1) - ret <vscale x 2 x half> %1 + ret target("riscv.vector.tuple", <vscale x 4 x i8>, 4) %0 } - -define <vscale x 2 x half> @test_vlseg4_mask_nxv2f16_triscv.vector.tuple_nxv4i8_4t(ptr %base, i32 %vl, <vscale x 2 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 4 x i8>, 4) @test_vlseg4_mask_nxv2f16_triscv.vector.tuple_nxv4i8_4t(ptr %base, i32 %vl, <vscale x 2 x i1> %mask) { ; CHECK-LABEL: test_vlseg4_mask_nxv2f16_triscv.vector.tuple_nxv4i8_4t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, ma -; CHECK-NEXT: vlseg4e16.v v7, (a0), v0.t +; CHECK-NEXT: vlseg4e16.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 4) @llvm.riscv.vlseg4.mask.triscv.vector.tuple_nxv4i8_4t.nxv2i1(target("riscv.vector.tuple", <vscale x 4 x i8>, 4) undef, ptr %base, <vscale x 2 x i1> %mask, i32 %vl, i32 1, i32 4) - %1 = call <vscale x 2 x half> @llvm.riscv.tuple.extract.nxv2f16.triscv.vector.tuple_nxv4i8_4t(target("riscv.vector.tuple", <vscale x 4 x i8>, 4) %0, i32 1) - ret <vscale x 2 x half> %1 + ret target("riscv.vector.tuple", <vscale x 4 x i8>, 4) %0 } - - -define <vscale x 4 x half> @test_vlseg4_nxv4f16_triscv.vector.tuple_nxv8i8_4t(ptr %base, i32 %vl) { +define target("riscv.vector.tuple", <vscale x 8 x i8>, 4) @test_vlseg4_nxv4f16_triscv.vector.tuple_nxv8i8_4t(ptr %base, i32 %vl) { ; CHECK-LABEL: test_vlseg4_nxv4f16_triscv.vector.tuple_nxv8i8_4t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma -; CHECK-NEXT: vlseg4e16.v v7, (a0) +; CHECK-NEXT: vlseg4e16.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 4) @llvm.riscv.vlseg4.triscv.vector.tuple_nxv8i8_4t(target("riscv.vector.tuple", <vscale x 8 x i8>, 4) undef, ptr %base, i32 %vl, i32 4) - %1 = call <vscale x 4 x half> @llvm.riscv.tuple.extract.nxv4f16.triscv.vector.tuple_nxv8i8_4t(target("riscv.vector.tuple", <vscale x 8 x i8>, 4) %0, i32 1) - ret <vscale x 4 x half> %1 + ret target("riscv.vector.tuple", <vscale x 8 x i8>, 4) %0 } - -define <vscale x 4 x half> @test_vlseg4_mask_nxv4f16_triscv.vector.tuple_nxv8i8_4t(ptr %base, i32 %vl, <vscale x 4 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 8 x i8>, 4) @test_vlseg4_mask_nxv4f16_triscv.vector.tuple_nxv8i8_4t(ptr %base, i32 %vl, <vscale x 4 x i1> %mask) { ; CHECK-LABEL: test_vlseg4_mask_nxv4f16_triscv.vector.tuple_nxv8i8_4t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma -; CHECK-NEXT: vlseg4e16.v v7, (a0), v0.t +; CHECK-NEXT: vlseg4e16.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 4) @llvm.riscv.vlseg4.mask.triscv.vector.tuple_nxv8i8_4t.nxv4i1(target("riscv.vector.tuple", <vscale x 8 x i8>, 4) undef, ptr %base, <vscale x 4 x i1> %mask, i32 %vl, i32 1, i32 4) - %1 = call <vscale x 4 x half> @llvm.riscv.tuple.extract.nxv4f16.triscv.vector.tuple_nxv8i8_4t(target("riscv.vector.tuple", <vscale x 8 x i8>, 4) %0, i32 1) - ret <vscale x 4 x half> %1 + ret target("riscv.vector.tuple", <vscale x 8 x i8>, 4) %0 } - - -define <vscale x 8 x half> @test_vlseg4_nxv8f16_triscv.vector.tuple_nxv16i8_4t(ptr %base, i32 %vl) { +define target("riscv.vector.tuple", <vscale x 16 x i8>, 4) @test_vlseg4_nxv8f16_triscv.vector.tuple_nxv16i8_4t(ptr %base, i32 %vl) { ; CHECK-LABEL: test_vlseg4_nxv8f16_triscv.vector.tuple_nxv16i8_4t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, ma -; CHECK-NEXT: vlseg4e16.v v6, (a0) +; CHECK-NEXT: vlseg4e16.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 4) @llvm.riscv.vlseg4.triscv.vector.tuple_nxv16i8_4t(target("riscv.vector.tuple", <vscale x 16 x i8>, 4) undef, ptr %base, i32 %vl, i32 4) - %1 = call <vscale x 8 x half> @llvm.riscv.tuple.extract.nxv8f16.triscv.vector.tuple_nxv16i8_4t(target("riscv.vector.tuple", <vscale x 16 x i8>, 4) %0, i32 1) - ret <vscale x 8 x half> %1 + ret target("riscv.vector.tuple", <vscale x 16 x i8>, 4) %0 } - -define <vscale x 8 x half> @test_vlseg4_mask_nxv8f16_triscv.vector.tuple_nxv16i8_4t(ptr %base, i32 %vl, <vscale x 8 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 16 x i8>, 4) @test_vlseg4_mask_nxv8f16_triscv.vector.tuple_nxv16i8_4t(ptr %base, i32 %vl, <vscale x 8 x i1> %mask) { ; CHECK-LABEL: test_vlseg4_mask_nxv8f16_triscv.vector.tuple_nxv16i8_4t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, ma -; CHECK-NEXT: vlseg4e16.v v6, (a0), v0.t +; CHECK-NEXT: vlseg4e16.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 4) @llvm.riscv.vlseg4.mask.triscv.vector.tuple_nxv16i8_4t.nxv8i1(target("riscv.vector.tuple", <vscale x 16 x i8>, 4) undef, ptr %base, <vscale x 8 x i1> %mask, i32 %vl, i32 1, i32 4) - %1 = call <vscale x 8 x half> @llvm.riscv.tuple.extract.nxv8f16.triscv.vector.tuple_nxv16i8_4t(target("riscv.vector.tuple", <vscale x 16 x i8>, 4) %0, i32 1) - ret <vscale x 8 x half> %1 + ret target("riscv.vector.tuple", <vscale x 16 x i8>, 4) %0 } - - -define <vscale x 1 x half> @test_vlseg5_nxv1f16_triscv.vector.tuple_nxv2i8_5t(ptr %base, i32 %vl) { +define target("riscv.vector.tuple", <vscale x 2 x i8>, 5) @test_vlseg5_nxv1f16_triscv.vector.tuple_nxv2i8_5t(ptr %base, i32 %vl) { ; CHECK-LABEL: test_vlseg5_nxv1f16_triscv.vector.tuple_nxv2i8_5t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, ma -; CHECK-NEXT: vlseg5e16.v v7, (a0) +; CHECK-NEXT: vlseg5e16.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 2 x i8>, 5) @llvm.riscv.vlseg5.triscv.vector.tuple_nxv2i8_5t(target("riscv.vector.tuple", <vscale x 2 x i8>, 5) undef, ptr %base, i32 %vl, i32 4) - %1 = call <vscale x 1 x half> @llvm.riscv.tuple.extract.nxv1f16.triscv.vector.tuple_nxv2i8_5t(target("riscv.vector.tuple", <vscale x 2 x i8>, 5) %0, i32 1) - ret <vscale x 1 x half> %1 + ret target("riscv.vector.tuple", <vscale x 2 x i8>, 5) %0 } - -define <vscale x 1 x half> @test_vlseg5_mask_nxv1f16_triscv.vector.tuple_nxv2i8_5t(ptr %base, i32 %vl, <vscale x 1 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 2 x i8>, 5) @test_vlseg5_mask_nxv1f16_triscv.vector.tuple_nxv2i8_5t(ptr %base, i32 %vl, <vscale x 1 x i1> %mask) { ; CHECK-LABEL: test_vlseg5_mask_nxv1f16_triscv.vector.tuple_nxv2i8_5t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, ma -; CHECK-NEXT: vlseg5e16.v v7, (a0), v0.t +; CHECK-NEXT: vlseg5e16.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 2 x i8>, 5) @llvm.riscv.vlseg5.mask.triscv.vector.tuple_nxv2i8_5t.nxv1i1(target("riscv.vector.tuple", <vscale x 2 x i8>, 5) undef, ptr %base, <vscale x 1 x i1> %mask, i32 %vl, i32 1, i32 4) - %1 = call <vscale x 1 x half> @llvm.riscv.tuple.extract.nxv1f16.triscv.vector.tuple_nxv2i8_5t(target("riscv.vector.tuple", <vscale x 2 x i8>, 5) %0, i32 1) - ret <vscale x 1 x half> %1 + ret target("riscv.vector.tuple", <vscale x 2 x i8>, 5) %0 } - - -define <vscale x 2 x half> @test_vlseg5_nxv2f16_triscv.vector.tuple_nxv4i8_5t(ptr %base, i32 %vl) { +define target("riscv.vector.tuple", <vscale x 4 x i8>, 5) @test_vlseg5_nxv2f16_triscv.vector.tuple_nxv4i8_5t(ptr %base, i32 %vl) { ; CHECK-LABEL: test_vlseg5_nxv2f16_triscv.vector.tuple_nxv4i8_5t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, ma -; CHECK-NEXT: vlseg5e16.v v7, (a0) +; CHECK-NEXT: vlseg5e16.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 5) @llvm.riscv.vlseg5.triscv.vector.tuple_nxv4i8_5t(target("riscv.vector.tuple", <vscale x 4 x i8>, 5) undef, ptr %base, i32 %vl, i32 4) - %1 = call <vscale x 2 x half> @llvm.riscv.tuple.extract.nxv2f16.triscv.vector.tuple_nxv4i8_5t(target("riscv.vector.tuple", <vscale x 4 x i8>, 5) %0, i32 1) - ret <vscale x 2 x half> %1 + ret target("riscv.vector.tuple", <vscale x 4 x i8>, 5) %0 } - -define <vscale x 2 x half> @test_vlseg5_mask_nxv2f16_triscv.vector.tuple_nxv4i8_5t(ptr %base, i32 %vl, <vscale x 2 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 4 x i8>, 5) @test_vlseg5_mask_nxv2f16_triscv.vector.tuple_nxv4i8_5t(ptr %base, i32 %vl, <vscale x 2 x i1> %mask) { ; CHECK-LABEL: test_vlseg5_mask_nxv2f16_triscv.vector.tuple_nxv4i8_5t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, ma -; CHECK-NEXT: vlseg5e16.v v7, (a0), v0.t +; CHECK-NEXT: vlseg5e16.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 5) @llvm.riscv.vlseg5.mask.triscv.vector.tuple_nxv4i8_5t.nxv2i1(target("riscv.vector.tuple", <vscale x 4 x i8>, 5) undef, ptr %base, <vscale x 2 x i1> %mask, i32 %vl, i32 1, i32 4) - %1 = call <vscale x 2 x half> @llvm.riscv.tuple.extract.nxv2f16.triscv.vector.tuple_nxv4i8_5t(target("riscv.vector.tuple", <vscale x 4 x i8>, 5) %0, i32 1) - ret <vscale x 2 x half> %1 + ret target("riscv.vector.tuple", <vscale x 4 x i8>, 5) %0 } - - -define <vscale x 4 x half> @test_vlseg5_nxv4f16_triscv.vector.tuple_nxv8i8_5t(ptr %base, i32 %vl) { +define target("riscv.vector.tuple", <vscale x 8 x i8>, 5) @test_vlseg5_nxv4f16_triscv.vector.tuple_nxv8i8_5t(ptr %base, i32 %vl) { ; CHECK-LABEL: test_vlseg5_nxv4f16_triscv.vector.tuple_nxv8i8_5t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma -; CHECK-NEXT: vlseg5e16.v v7, (a0) +; CHECK-NEXT: vlseg5e16.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 5) @llvm.riscv.vlseg5.triscv.vector.tuple_nxv8i8_5t(target("riscv.vector.tuple", <vscale x 8 x i8>, 5) undef, ptr %base, i32 %vl, i32 4) - %1 = call <vscale x 4 x half> @llvm.riscv.tuple.extract.nxv4f16.triscv.vector.tuple_nxv8i8_5t(target("riscv.vector.tuple", <vscale x 8 x i8>, 5) %0, i32 1) - ret <vscale x 4 x half> %1 + ret target("riscv.vector.tuple", <vscale x 8 x i8>, 5) %0 } - -define <vscale x 4 x half> @test_vlseg5_mask_nxv4f16_triscv.vector.tuple_nxv8i8_5t(ptr %base, i32 %vl, <vscale x 4 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 8 x i8>, 5) @test_vlseg5_mask_nxv4f16_triscv.vector.tuple_nxv8i8_5t(ptr %base, i32 %vl, <vscale x 4 x i1> %mask) { ; CHECK-LABEL: test_vlseg5_mask_nxv4f16_triscv.vector.tuple_nxv8i8_5t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma -; CHECK-NEXT: vlseg5e16.v v7, (a0), v0.t +; CHECK-NEXT: vlseg5e16.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 5) @llvm.riscv.vlseg5.mask.triscv.vector.tuple_nxv8i8_5t.nxv4i1(target("riscv.vector.tuple", <vscale x 8 x i8>, 5) undef, ptr %base, <vscale x 4 x i1> %mask, i32 %vl, i32 1, i32 4) - %1 = call <vscale x 4 x half> @llvm.riscv.tuple.extract.nxv4f16.triscv.vector.tuple_nxv8i8_5t(target("riscv.vector.tuple", <vscale x 8 x i8>, 5) %0, i32 1) - ret <vscale x 4 x half> %1 + ret target("riscv.vector.tuple", <vscale x 8 x i8>, 5) %0 } - - -define <vscale x 1 x half> @test_vlseg6_nxv1f16_triscv.vector.tuple_nxv2i8_6t(ptr %base, i32 %vl) { +define target("riscv.vector.tuple", <vscale x 2 x i8>, 6) @test_vlseg6_nxv1f16_triscv.vector.tuple_nxv2i8_6t(ptr %base, i32 %vl) { ; CHECK-LABEL: test_vlseg6_nxv1f16_triscv.vector.tuple_nxv2i8_6t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, ma -; CHECK-NEXT: vlseg6e16.v v7, (a0) +; CHECK-NEXT: vlseg6e16.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 2 x i8>, 6) @llvm.riscv.vlseg6.triscv.vector.tuple_nxv2i8_6t(target("riscv.vector.tuple", <vscale x 2 x i8>, 6) undef, ptr %base, i32 %vl, i32 4) - %1 = call <vscale x 1 x half> @llvm.riscv.tuple.extract.nxv1f16.triscv.vector.tuple_nxv2i8_6t(target("riscv.vector.tuple", <vscale x 2 x i8>, 6) %0, i32 1) - ret <vscale x 1 x half> %1 + ret target("riscv.vector.tuple", <vscale x 2 x i8>, 6) %0 } - -define <vscale x 1 x half> @test_vlseg6_mask_nxv1f16_triscv.vector.tuple_nxv2i8_6t(ptr %base, i32 %vl, <vscale x 1 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 2 x i8>, 6) @test_vlseg6_mask_nxv1f16_triscv.vector.tuple_nxv2i8_6t(ptr %base, i32 %vl, <vscale x 1 x i1> %mask) { ; CHECK-LABEL: test_vlseg6_mask_nxv1f16_triscv.vector.tuple_nxv2i8_6t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, ma -; CHECK-NEXT: vlseg6e16.v v7, (a0), v0.t +; CHECK-NEXT: vlseg6e16.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 2 x i8>, 6) @llvm.riscv.vlseg6.mask.triscv.vector.tuple_nxv2i8_6t.nxv1i1(target("riscv.vector.tuple", <vscale x 2 x i8>, 6) undef, ptr %base, <vscale x 1 x i1> %mask, i32 %vl, i32 1, i32 4) - %1 = call <vscale x 1 x half> @llvm.riscv.tuple.extract.nxv1f16.triscv.vector.tuple_nxv2i8_6t(target("riscv.vector.tuple", <vscale x 2 x i8>, 6) %0, i32 1) - ret <vscale x 1 x half> %1 + ret target("riscv.vector.tuple", <vscale x 2 x i8>, 6) %0 } - - -define <vscale x 2 x half> @test_vlseg6_nxv2f16_triscv.vector.tuple_nxv4i8_6t(ptr %base, i32 %vl) { +define target("riscv.vector.tuple", <vscale x 4 x i8>, 6) @test_vlseg6_nxv2f16_triscv.vector.tuple_nxv4i8_6t(ptr %base, i32 %vl) { ; CHECK-LABEL: test_vlseg6_nxv2f16_triscv.vector.tuple_nxv4i8_6t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, ma -; CHECK-NEXT: vlseg6e16.v v7, (a0) +; CHECK-NEXT: vlseg6e16.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 6) @llvm.riscv.vlseg6.triscv.vector.tuple_nxv4i8_6t(target("riscv.vector.tuple", <vscale x 4 x i8>, 6) undef, ptr %base, i32 %vl, i32 4) - %1 = call <vscale x 2 x half> @llvm.riscv.tuple.extract.nxv2f16.triscv.vector.tuple_nxv4i8_6t(target("riscv.vector.tuple", <vscale x 4 x i8>, 6) %0, i32 1) - ret <vscale x 2 x half> %1 + ret target("riscv.vector.tuple", <vscale x 4 x i8>, 6) %0 } - -define <vscale x 2 x half> @test_vlseg6_mask_nxv2f16_triscv.vector.tuple_nxv4i8_6t(ptr %base, i32 %vl, <vscale x 2 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 4 x i8>, 6) @test_vlseg6_mask_nxv2f16_triscv.vector.tuple_nxv4i8_6t(ptr %base, i32 %vl, <vscale x 2 x i1> %mask) { ; CHECK-LABEL: test_vlseg6_mask_nxv2f16_triscv.vector.tuple_nxv4i8_6t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, ma -; CHECK-NEXT: vlseg6e16.v v7, (a0), v0.t +; CHECK-NEXT: vlseg6e16.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 6) @llvm.riscv.vlseg6.mask.triscv.vector.tuple_nxv4i8_6t.nxv2i1(target("riscv.vector.tuple", <vscale x 4 x i8>, 6) undef, ptr %base, <vscale x 2 x i1> %mask, i32 %vl, i32 1, i32 4) - %1 = call <vscale x 2 x half> @llvm.riscv.tuple.extract.nxv2f16.triscv.vector.tuple_nxv4i8_6t(target("riscv.vector.tuple", <vscale x 4 x i8>, 6) %0, i32 1) - ret <vscale x 2 x half> %1 + ret target("riscv.vector.tuple", <vscale x 4 x i8>, 6) %0 } - - -define <vscale x 4 x half> @test_vlseg6_nxv4f16_triscv.vector.tuple_nxv8i8_6t(ptr %base, i32 %vl) { +define target("riscv.vector.tuple", <vscale x 8 x i8>, 6) @test_vlseg6_nxv4f16_triscv.vector.tuple_nxv8i8_6t(ptr %base, i32 %vl) { ; CHECK-LABEL: test_vlseg6_nxv4f16_triscv.vector.tuple_nxv8i8_6t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma -; CHECK-NEXT: vlseg6e16.v v7, (a0) +; CHECK-NEXT: vlseg6e16.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 6) @llvm.riscv.vlseg6.triscv.vector.tuple_nxv8i8_6t(target("riscv.vector.tuple", <vscale x 8 x i8>, 6) undef, ptr %base, i32 %vl, i32 4) - %1 = call <vscale x 4 x half> @llvm.riscv.tuple.extract.nxv4f16.triscv.vector.tuple_nxv8i8_6t(target("riscv.vector.tuple", <vscale x 8 x i8>, 6) %0, i32 1) - ret <vscale x 4 x half> %1 + ret target("riscv.vector.tuple", <vscale x 8 x i8>, 6) %0 } - -define <vscale x 4 x half> @test_vlseg6_mask_nxv4f16_triscv.vector.tuple_nxv8i8_6t(ptr %base, i32 %vl, <vscale x 4 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 8 x i8>, 6) @test_vlseg6_mask_nxv4f16_triscv.vector.tuple_nxv8i8_6t(ptr %base, i32 %vl, <vscale x 4 x i1> %mask) { ; CHECK-LABEL: test_vlseg6_mask_nxv4f16_triscv.vector.tuple_nxv8i8_6t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma -; CHECK-NEXT: vlseg6e16.v v7, (a0), v0.t +; CHECK-NEXT: vlseg6e16.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 6) @llvm.riscv.vlseg6.mask.triscv.vector.tuple_nxv8i8_6t.nxv4i1(target("riscv.vector.tuple", <vscale x 8 x i8>, 6) undef, ptr %base, <vscale x 4 x i1> %mask, i32 %vl, i32 1, i32 4) - %1 = call <vscale x 4 x half> @llvm.riscv.tuple.extract.nxv4f16.triscv.vector.tuple_nxv8i8_6t(target("riscv.vector.tuple", <vscale x 8 x i8>, 6) %0, i32 1) - ret <vscale x 4 x half> %1 + ret target("riscv.vector.tuple", <vscale x 8 x i8>, 6) %0 } - - -define <vscale x 1 x half> @test_vlseg7_nxv1f16_triscv.vector.tuple_nxv2i8_7t(ptr %base, i32 %vl) { +define target("riscv.vector.tuple", <vscale x 2 x i8>, 7) @test_vlseg7_nxv1f16_triscv.vector.tuple_nxv2i8_7t(ptr %base, i32 %vl) { ; CHECK-LABEL: test_vlseg7_nxv1f16_triscv.vector.tuple_nxv2i8_7t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, ma -; CHECK-NEXT: vlseg7e16.v v7, (a0) +; CHECK-NEXT: vlseg7e16.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 2 x i8>, 7) @llvm.riscv.vlseg7.triscv.vector.tuple_nxv2i8_7t(target("riscv.vector.tuple", <vscale x 2 x i8>, 7) undef, ptr %base, i32 %vl, i32 4) - %1 = call <vscale x 1 x half> @llvm.riscv.tuple.extract.nxv1f16.triscv.vector.tuple_nxv2i8_7t(target("riscv.vector.tuple", <vscale x 2 x i8>, 7) %0, i32 1) - ret <vscale x 1 x half> %1 + ret target("riscv.vector.tuple", <vscale x 2 x i8>, 7) %0 } - -define <vscale x 1 x half> @test_vlseg7_mask_nxv1f16_triscv.vector.tuple_nxv2i8_7t(ptr %base, i32 %vl, <vscale x 1 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 2 x i8>, 7) @test_vlseg7_mask_nxv1f16_triscv.vector.tuple_nxv2i8_7t(ptr %base, i32 %vl, <vscale x 1 x i1> %mask) { ; CHECK-LABEL: test_vlseg7_mask_nxv1f16_triscv.vector.tuple_nxv2i8_7t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, ma -; CHECK-NEXT: vlseg7e16.v v7, (a0), v0.t +; CHECK-NEXT: vlseg7e16.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 2 x i8>, 7) @llvm.riscv.vlseg7.mask.triscv.vector.tuple_nxv2i8_7t.nxv1i1(target("riscv.vector.tuple", <vscale x 2 x i8>, 7) undef, ptr %base, <vscale x 1 x i1> %mask, i32 %vl, i32 1, i32 4) - %1 = call <vscale x 1 x half> @llvm.riscv.tuple.extract.nxv1f16.triscv.vector.tuple_nxv2i8_7t(target("riscv.vector.tuple", <vscale x 2 x i8>, 7) %0, i32 1) - ret <vscale x 1 x half> %1 + ret target("riscv.vector.tuple", <vscale x 2 x i8>, 7) %0 } - - -define <vscale x 2 x half> @test_vlseg7_nxv2f16_triscv.vector.tuple_nxv4i8_7t(ptr %base, i32 %vl) { +define target("riscv.vector.tuple", <vscale x 4 x i8>, 7) @test_vlseg7_nxv2f16_triscv.vector.tuple_nxv4i8_7t(ptr %base, i32 %vl) { ; CHECK-LABEL: test_vlseg7_nxv2f16_triscv.vector.tuple_nxv4i8_7t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, ma -; CHECK-NEXT: vlseg7e16.v v7, (a0) +; CHECK-NEXT: vlseg7e16.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 7) @llvm.riscv.vlseg7.triscv.vector.tuple_nxv4i8_7t(target("riscv.vector.tuple", <vscale x 4 x i8>, 7) undef, ptr %base, i32 %vl, i32 4) - %1 = call <vscale x 2 x half> @llvm.riscv.tuple.extract.nxv2f16.triscv.vector.tuple_nxv4i8_7t(target("riscv.vector.tuple", <vscale x 4 x i8>, 7) %0, i32 1) - ret <vscale x 2 x half> %1 + ret target("riscv.vector.tuple", <vscale x 4 x i8>, 7) %0 } - -define <vscale x 2 x half> @test_vlseg7_mask_nxv2f16_triscv.vector.tuple_nxv4i8_7t(ptr %base, i32 %vl, <vscale x 2 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 4 x i8>, 7) @test_vlseg7_mask_nxv2f16_triscv.vector.tuple_nxv4i8_7t(ptr %base, i32 %vl, <vscale x 2 x i1> %mask) { ; CHECK-LABEL: test_vlseg7_mask_nxv2f16_triscv.vector.tuple_nxv4i8_7t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, ma -; CHECK-NEXT: vlseg7e16.v v7, (a0), v0.t +; CHECK-NEXT: vlseg7e16.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 7) @llvm.riscv.vlseg7.mask.triscv.vector.tuple_nxv4i8_7t.nxv2i1(target("riscv.vector.tuple", <vscale x 4 x i8>, 7) undef, ptr %base, <vscale x 2 x i1> %mask, i32 %vl, i32 1, i32 4) - %1 = call <vscale x 2 x half> @llvm.riscv.tuple.extract.nxv2f16.triscv.vector.tuple_nxv4i8_7t(target("riscv.vector.tuple", <vscale x 4 x i8>, 7) %0, i32 1) - ret <vscale x 2 x half> %1 + ret target("riscv.vector.tuple", <vscale x 4 x i8>, 7) %0 } - - -define <vscale x 4 x half> @test_vlseg7_nxv4f16_triscv.vector.tuple_nxv8i8_7t(ptr %base, i32 %vl) { +define target("riscv.vector.tuple", <vscale x 8 x i8>, 7) @test_vlseg7_nxv4f16_triscv.vector.tuple_nxv8i8_7t(ptr %base, i32 %vl) { ; CHECK-LABEL: test_vlseg7_nxv4f16_triscv.vector.tuple_nxv8i8_7t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma -; CHECK-NEXT: vlseg7e16.v v7, (a0) +; CHECK-NEXT: vlseg7e16.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 7) @llvm.riscv.vlseg7.triscv.vector.tuple_nxv8i8_7t(target("riscv.vector.tuple", <vscale x 8 x i8>, 7) undef, ptr %base, i32 %vl, i32 4) - %1 = call <vscale x 4 x half> @llvm.riscv.tuple.extract.nxv4f16.triscv.vector.tuple_nxv8i8_7t(target("riscv.vector.tuple", <vscale x 8 x i8>, 7) %0, i32 1) - ret <vscale x 4 x half> %1 + ret target("riscv.vector.tuple", <vscale x 8 x i8>, 7) %0 } - -define <vscale x 4 x half> @test_vlseg7_mask_nxv4f16_triscv.vector.tuple_nxv8i8_7t(ptr %base, i32 %vl, <vscale x 4 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 8 x i8>, 7) @test_vlseg7_mask_nxv4f16_triscv.vector.tuple_nxv8i8_7t(ptr %base, i32 %vl, <vscale x 4 x i1> %mask) { ; CHECK-LABEL: test_vlseg7_mask_nxv4f16_triscv.vector.tuple_nxv8i8_7t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma -; CHECK-NEXT: vlseg7e16.v v7, (a0), v0.t +; CHECK-NEXT: vlseg7e16.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 7) @llvm.riscv.vlseg7.mask.triscv.vector.tuple_nxv8i8_7t.nxv4i1(target("riscv.vector.tuple", <vscale x 8 x i8>, 7) undef, ptr %base, <vscale x 4 x i1> %mask, i32 %vl, i32 1, i32 4) - %1 = call <vscale x 4 x half> @llvm.riscv.tuple.extract.nxv4f16.triscv.vector.tuple_nxv8i8_7t(target("riscv.vector.tuple", <vscale x 8 x i8>, 7) %0, i32 1) - ret <vscale x 4 x half> %1 + ret target("riscv.vector.tuple", <vscale x 8 x i8>, 7) %0 } - - -define <vscale x 1 x half> @test_vlseg8_nxv1f16_triscv.vector.tuple_nxv2i8_8t(ptr %base, i32 %vl) { +define target("riscv.vector.tuple", <vscale x 2 x i8>, 8) @test_vlseg8_nxv1f16_triscv.vector.tuple_nxv2i8_8t(ptr %base, i32 %vl) { ; CHECK-LABEL: test_vlseg8_nxv1f16_triscv.vector.tuple_nxv2i8_8t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, ma -; CHECK-NEXT: vlseg8e16.v v7, (a0) +; CHECK-NEXT: vlseg8e16.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 2 x i8>, 8) @llvm.riscv.vlseg8.triscv.vector.tuple_nxv2i8_8t(target("riscv.vector.tuple", <vscale x 2 x i8>, 8) undef, ptr %base, i32 %vl, i32 4) - %1 = call <vscale x 1 x half> @llvm.riscv.tuple.extract.nxv1f16.triscv.vector.tuple_nxv2i8_8t(target("riscv.vector.tuple", <vscale x 2 x i8>, 8) %0, i32 1) - ret <vscale x 1 x half> %1 + ret target("riscv.vector.tuple", <vscale x 2 x i8>, 8) %0 } - -define <vscale x 1 x half> @test_vlseg8_mask_nxv1f16_triscv.vector.tuple_nxv2i8_8t(ptr %base, i32 %vl, <vscale x 1 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 2 x i8>, 8) @test_vlseg8_mask_nxv1f16_triscv.vector.tuple_nxv2i8_8t(ptr %base, i32 %vl, <vscale x 1 x i1> %mask) { ; CHECK-LABEL: test_vlseg8_mask_nxv1f16_triscv.vector.tuple_nxv2i8_8t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, ma -; CHECK-NEXT: vlseg8e16.v v7, (a0), v0.t +; CHECK-NEXT: vlseg8e16.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 2 x i8>, 8) @llvm.riscv.vlseg8.mask.triscv.vector.tuple_nxv2i8_8t.nxv1i1(target("riscv.vector.tuple", <vscale x 2 x i8>, 8) undef, ptr %base, <vscale x 1 x i1> %mask, i32 %vl, i32 1, i32 4) - %1 = call <vscale x 1 x half> @llvm.riscv.tuple.extract.nxv1f16.triscv.vector.tuple_nxv2i8_8t(target("riscv.vector.tuple", <vscale x 2 x i8>, 8) %0, i32 1) - ret <vscale x 1 x half> %1 + ret target("riscv.vector.tuple", <vscale x 2 x i8>, 8) %0 } - - -define <vscale x 2 x half> @test_vlseg8_nxv2f16_triscv.vector.tuple_nxv4i8_8t(ptr %base, i32 %vl) { +define target("riscv.vector.tuple", <vscale x 4 x i8>, 8) @test_vlseg8_nxv2f16_triscv.vector.tuple_nxv4i8_8t(ptr %base, i32 %vl) { ; CHECK-LABEL: test_vlseg8_nxv2f16_triscv.vector.tuple_nxv4i8_8t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, ma -; CHECK-NEXT: vlseg8e16.v v7, (a0) +; CHECK-NEXT: vlseg8e16.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 8) @llvm.riscv.vlseg8.triscv.vector.tuple_nxv4i8_8t(target("riscv.vector.tuple", <vscale x 4 x i8>, 8) undef, ptr %base, i32 %vl, i32 4) - %1 = call <vscale x 2 x half> @llvm.riscv.tuple.extract.nxv2f16.triscv.vector.tuple_nxv4i8_8t(target("riscv.vector.tuple", <vscale x 4 x i8>, 8) %0, i32 1) - ret <vscale x 2 x half> %1 + ret target("riscv.vector.tuple", <vscale x 4 x i8>, 8) %0 } - -define <vscale x 2 x half> @test_vlseg8_mask_nxv2f16_triscv.vector.tuple_nxv4i8_8t(ptr %base, i32 %vl, <vscale x 2 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 4 x i8>, 8) @test_vlseg8_mask_nxv2f16_triscv.vector.tuple_nxv4i8_8t(ptr %base, i32 %vl, <vscale x 2 x i1> %mask) { ; CHECK-LABEL: test_vlseg8_mask_nxv2f16_triscv.vector.tuple_nxv4i8_8t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, ma -; CHECK-NEXT: vlseg8e16.v v7, (a0), v0.t +; CHECK-NEXT: vlseg8e16.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 8) @llvm.riscv.vlseg8.mask.triscv.vector.tuple_nxv4i8_8t.nxv2i1(target("riscv.vector.tuple", <vscale x 4 x i8>, 8) undef, ptr %base, <vscale x 2 x i1> %mask, i32 %vl, i32 1, i32 4) - %1 = call <vscale x 2 x half> @llvm.riscv.tuple.extract.nxv2f16.triscv.vector.tuple_nxv4i8_8t(target("riscv.vector.tuple", <vscale x 4 x i8>, 8) %0, i32 1) - ret <vscale x 2 x half> %1 + ret target("riscv.vector.tuple", <vscale x 4 x i8>, 8) %0 } - - -define <vscale x 4 x half> @test_vlseg8_nxv4f16_triscv.vector.tuple_nxv8i8_8t(ptr %base, i32 %vl) { +define target("riscv.vector.tuple", <vscale x 8 x i8>, 8) @test_vlseg8_nxv4f16_triscv.vector.tuple_nxv8i8_8t(ptr %base, i32 %vl) { ; CHECK-LABEL: test_vlseg8_nxv4f16_triscv.vector.tuple_nxv8i8_8t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma -; CHECK-NEXT: vlseg8e16.v v7, (a0) +; CHECK-NEXT: vlseg8e16.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 8) @llvm.riscv.vlseg8.triscv.vector.tuple_nxv8i8_8t(target("riscv.vector.tuple", <vscale x 8 x i8>, 8) undef, ptr %base, i32 %vl, i32 4) - %1 = call <vscale x 4 x half> @llvm.riscv.tuple.extract.nxv4f16.triscv.vector.tuple_nxv8i8_8t(target("riscv.vector.tuple", <vscale x 8 x i8>, 8) %0, i32 1) - ret <vscale x 4 x half> %1 + ret target("riscv.vector.tuple", <vscale x 8 x i8>, 8) %0 } - -define <vscale x 4 x half> @test_vlseg8_mask_nxv4f16_triscv.vector.tuple_nxv8i8_8t(ptr %base, i32 %vl, <vscale x 4 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 8 x i8>, 8) @test_vlseg8_mask_nxv4f16_triscv.vector.tuple_nxv8i8_8t(ptr %base, i32 %vl, <vscale x 4 x i1> %mask) { ; CHECK-LABEL: test_vlseg8_mask_nxv4f16_triscv.vector.tuple_nxv8i8_8t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma -; CHECK-NEXT: vlseg8e16.v v7, (a0), v0.t +; CHECK-NEXT: vlseg8e16.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 8) @llvm.riscv.vlseg8.mask.triscv.vector.tuple_nxv8i8_8t.nxv4i1(target("riscv.vector.tuple", <vscale x 8 x i8>, 8) undef, ptr %base, <vscale x 4 x i1> %mask, i32 %vl, i32 1, i32 4) - %1 = call <vscale x 4 x half> @llvm.riscv.tuple.extract.nxv4f16.triscv.vector.tuple_nxv8i8_8t(target("riscv.vector.tuple", <vscale x 8 x i8>, 8) %0, i32 1) - ret <vscale x 4 x half> %1 + ret target("riscv.vector.tuple", <vscale x 8 x i8>, 8) %0 } - - -define <vscale x 1 x float> @test_vlseg2_nxv1f32_triscv.vector.tuple_nxv4i8_2t(ptr %base, i32 %vl) { +define target("riscv.vector.tuple", <vscale x 4 x i8>, 2) @test_vlseg2_nxv1f32_triscv.vector.tuple_nxv4i8_2t(ptr %base, i32 %vl) { ; CHECK-LABEL: test_vlseg2_nxv1f32_triscv.vector.tuple_nxv4i8_2t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, ma -; CHECK-NEXT: vlseg2e32.v v7, (a0) +; CHECK-NEXT: vlseg2e32.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 2) @llvm.riscv.vlseg2.triscv.vector.tuple_nxv4i8_2t(target("riscv.vector.tuple", <vscale x 4 x i8>, 2) undef, ptr %base, i32 %vl, i32 5) - %1 = call <vscale x 1 x float> @llvm.riscv.tuple.extract.nxv1f32.triscv.vector.tuple_nxv4i8_2t(target("riscv.vector.tuple", <vscale x 4 x i8>, 2) %0, i32 1) - ret <vscale x 1 x float> %1 + ret target("riscv.vector.tuple", <vscale x 4 x i8>, 2) %0 } - -define <vscale x 1 x float> @test_vlseg2_mask_nxv1f32_triscv.vector.tuple_nxv4i8_2t(ptr %base, i32 %vl, <vscale x 1 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 4 x i8>, 2) @test_vlseg2_mask_nxv1f32_triscv.vector.tuple_nxv4i8_2t(ptr %base, i32 %vl, <vscale x 1 x i1> %mask) { ; CHECK-LABEL: test_vlseg2_mask_nxv1f32_triscv.vector.tuple_nxv4i8_2t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, ma -; CHECK-NEXT: vlseg2e32.v v7, (a0), v0.t +; CHECK-NEXT: vlseg2e32.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 2) @llvm.riscv.vlseg2.mask.triscv.vector.tuple_nxv4i8_2t.nxv1i1(target("riscv.vector.tuple", <vscale x 4 x i8>, 2) undef, ptr %base, <vscale x 1 x i1> %mask, i32 %vl, i32 1, i32 5) - %1 = call <vscale x 1 x float> @llvm.riscv.tuple.extract.nxv1f32.triscv.vector.tuple_nxv4i8_2t(target("riscv.vector.tuple", <vscale x 4 x i8>, 2) %0, i32 1) - ret <vscale x 1 x float> %1 + ret target("riscv.vector.tuple", <vscale x 4 x i8>, 2) %0 } - - -define <vscale x 2 x float> @test_vlseg2_nxv2f32_triscv.vector.tuple_nxv8i8_2t(ptr %base, i32 %vl) { +define target("riscv.vector.tuple", <vscale x 8 x i8>, 2) @test_vlseg2_nxv2f32_triscv.vector.tuple_nxv8i8_2t(ptr %base, i32 %vl) { ; CHECK-LABEL: test_vlseg2_nxv2f32_triscv.vector.tuple_nxv8i8_2t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma -; CHECK-NEXT: vlseg2e32.v v7, (a0) +; CHECK-NEXT: vlseg2e32.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 2) @llvm.riscv.vlseg2.triscv.vector.tuple_nxv8i8_2t(target("riscv.vector.tuple", <vscale x 8 x i8>, 2) undef, ptr %base, i32 %vl, i32 5) - %1 = call <vscale x 2 x float> @llvm.riscv.tuple.extract.nxv2f32.triscv.vector.tuple_nxv8i8_2t(target("riscv.vector.tuple", <vscale x 8 x i8>, 2) %0, i32 1) - ret <vscale x 2 x float> %1 + ret target("riscv.vector.tuple", <vscale x 8 x i8>, 2) %0 } - -define <vscale x 2 x float> @test_vlseg2_mask_nxv2f32_triscv.vector.tuple_nxv8i8_2t(ptr %base, i32 %vl, <vscale x 2 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 8 x i8>, 2) @test_vlseg2_mask_nxv2f32_triscv.vector.tuple_nxv8i8_2t(ptr %base, i32 %vl, <vscale x 2 x i1> %mask) { ; CHECK-LABEL: test_vlseg2_mask_nxv2f32_triscv.vector.tuple_nxv8i8_2t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma -; CHECK-NEXT: vlseg2e32.v v7, (a0), v0.t +; CHECK-NEXT: vlseg2e32.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 2) @llvm.riscv.vlseg2.mask.triscv.vector.tuple_nxv8i8_2t.nxv2i1(target("riscv.vector.tuple", <vscale x 8 x i8>, 2) undef, ptr %base, <vscale x 2 x i1> %mask, i32 %vl, i32 1, i32 5) - %1 = call <vscale x 2 x float> @llvm.riscv.tuple.extract.nxv2f32.triscv.vector.tuple_nxv8i8_2t(target("riscv.vector.tuple", <vscale x 8 x i8>, 2) %0, i32 1) - ret <vscale x 2 x float> %1 + ret target("riscv.vector.tuple", <vscale x 8 x i8>, 2) %0 } - - -define <vscale x 4 x float> @test_vlseg2_nxv4f32_triscv.vector.tuple_nxv16i8_2t(ptr %base, i32 %vl) { +define target("riscv.vector.tuple", <vscale x 16 x i8>, 2) @test_vlseg2_nxv4f32_triscv.vector.tuple_nxv16i8_2t(ptr %base, i32 %vl) { ; CHECK-LABEL: test_vlseg2_nxv4f32_triscv.vector.tuple_nxv16i8_2t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, ma -; CHECK-NEXT: vlseg2e32.v v6, (a0) +; CHECK-NEXT: vlseg2e32.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 2) @llvm.riscv.vlseg2.triscv.vector.tuple_nxv16i8_2t(target("riscv.vector.tuple", <vscale x 16 x i8>, 2) undef, ptr %base, i32 %vl, i32 5) - %1 = call <vscale x 4 x float> @llvm.riscv.tuple.extract.nxv4f32.triscv.vector.tuple_nxv16i8_2t(target("riscv.vector.tuple", <vscale x 16 x i8>, 2) %0, i32 1) - ret <vscale x 4 x float> %1 + ret target("riscv.vector.tuple", <vscale x 16 x i8>, 2) %0 } - -define <vscale x 4 x float> @test_vlseg2_mask_nxv4f32_triscv.vector.tuple_nxv16i8_2t(ptr %base, i32 %vl, <vscale x 4 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 16 x i8>, 2) @test_vlseg2_mask_nxv4f32_triscv.vector.tuple_nxv16i8_2t(ptr %base, i32 %vl, <vscale x 4 x i1> %mask) { ; CHECK-LABEL: test_vlseg2_mask_nxv4f32_triscv.vector.tuple_nxv16i8_2t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, ma -; CHECK-NEXT: vlseg2e32.v v6, (a0), v0.t +; CHECK-NEXT: vlseg2e32.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 2) @llvm.riscv.vlseg2.mask.triscv.vector.tuple_nxv16i8_2t.nxv4i1(target("riscv.vector.tuple", <vscale x 16 x i8>, 2) undef, ptr %base, <vscale x 4 x i1> %mask, i32 %vl, i32 1, i32 5) - %1 = call <vscale x 4 x float> @llvm.riscv.tuple.extract.nxv4f32.triscv.vector.tuple_nxv16i8_2t(target("riscv.vector.tuple", <vscale x 16 x i8>, 2) %0, i32 1) - ret <vscale x 4 x float> %1 + ret target("riscv.vector.tuple", <vscale x 16 x i8>, 2) %0 } - - -define <vscale x 8 x float> @test_vlseg2_nxv8f32_triscv.vector.tuple_nxv32i8_2t(ptr %base, i32 %vl) { +define target("riscv.vector.tuple", <vscale x 32 x i8>, 2) @test_vlseg2_nxv8f32_triscv.vector.tuple_nxv32i8_2t(ptr %base, i32 %vl) { ; CHECK-LABEL: test_vlseg2_nxv8f32_triscv.vector.tuple_nxv32i8_2t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, ma -; CHECK-NEXT: vlseg2e32.v v4, (a0) +; CHECK-NEXT: vlseg2e32.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 32 x i8>, 2) @llvm.riscv.vlseg2.triscv.vector.tuple_nxv32i8_2t(target("riscv.vector.tuple", <vscale x 32 x i8>, 2) undef, ptr %base, i32 %vl, i32 5) - %1 = call <vscale x 8 x float> @llvm.riscv.tuple.extract.nxv8f32.triscv.vector.tuple_nxv32i8_2t(target("riscv.vector.tuple", <vscale x 32 x i8>, 2) %0, i32 1) - ret <vscale x 8 x float> %1 + ret target("riscv.vector.tuple", <vscale x 32 x i8>, 2) %0 } - -define <vscale x 8 x float> @test_vlseg2_mask_nxv8f32_triscv.vector.tuple_nxv32i8_2t(ptr %base, i32 %vl, <vscale x 8 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 32 x i8>, 2) @test_vlseg2_mask_nxv8f32_triscv.vector.tuple_nxv32i8_2t(ptr %base, i32 %vl, <vscale x 8 x i1> %mask) { ; CHECK-LABEL: test_vlseg2_mask_nxv8f32_triscv.vector.tuple_nxv32i8_2t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, ma -; CHECK-NEXT: vlseg2e32.v v4, (a0), v0.t +; CHECK-NEXT: vlseg2e32.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 32 x i8>, 2) @llvm.riscv.vlseg2.mask.triscv.vector.tuple_nxv32i8_2t.nxv8i1(target("riscv.vector.tuple", <vscale x 32 x i8>, 2) undef, ptr %base, <vscale x 8 x i1> %mask, i32 %vl, i32 1, i32 5) - %1 = call <vscale x 8 x float> @llvm.riscv.tuple.extract.nxv8f32.triscv.vector.tuple_nxv32i8_2t(target("riscv.vector.tuple", <vscale x 32 x i8>, 2) %0, i32 1) - ret <vscale x 8 x float> %1 + ret target("riscv.vector.tuple", <vscale x 32 x i8>, 2) %0 } - - -define <vscale x 1 x float> @test_vlseg3_nxv1f32_triscv.vector.tuple_nxv4i8_3t(ptr %base, i32 %vl) { +define target("riscv.vector.tuple", <vscale x 4 x i8>, 3) @test_vlseg3_nxv1f32_triscv.vector.tuple_nxv4i8_3t(ptr %base, i32 %vl) { ; CHECK-LABEL: test_vlseg3_nxv1f32_triscv.vector.tuple_nxv4i8_3t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, ma -; CHECK-NEXT: vlseg3e32.v v7, (a0) +; CHECK-NEXT: vlseg3e32.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 3) @llvm.riscv.vlseg3.triscv.vector.tuple_nxv4i8_3t(target("riscv.vector.tuple", <vscale x 4 x i8>, 3) undef, ptr %base, i32 %vl, i32 5) - %1 = call <vscale x 1 x float> @llvm.riscv.tuple.extract.nxv1f32.triscv.vector.tuple_nxv4i8_3t(target("riscv.vector.tuple", <vscale x 4 x i8>, 3) %0, i32 1) - ret <vscale x 1 x float> %1 + ret target("riscv.vector.tuple", <vscale x 4 x i8>, 3) %0 } - -define <vscale x 1 x float> @test_vlseg3_mask_nxv1f32_triscv.vector.tuple_nxv4i8_3t(ptr %base, i32 %vl, <vscale x 1 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 4 x i8>, 3) @test_vlseg3_mask_nxv1f32_triscv.vector.tuple_nxv4i8_3t(ptr %base, i32 %vl, <vscale x 1 x i1> %mask) { ; CHECK-LABEL: test_vlseg3_mask_nxv1f32_triscv.vector.tuple_nxv4i8_3t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, ma -; CHECK-NEXT: vlseg3e32.v v7, (a0), v0.t +; CHECK-NEXT: vlseg3e32.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 3) @llvm.riscv.vlseg3.mask.triscv.vector.tuple_nxv4i8_3t.nxv1i1(target("riscv.vector.tuple", <vscale x 4 x i8>, 3) undef, ptr %base, <vscale x 1 x i1> %mask, i32 %vl, i32 1, i32 5) - %1 = call <vscale x 1 x float> @llvm.riscv.tuple.extract.nxv1f32.triscv.vector.tuple_nxv4i8_3t(target("riscv.vector.tuple", <vscale x 4 x i8>, 3) %0, i32 1) - ret <vscale x 1 x float> %1 + ret target("riscv.vector.tuple", <vscale x 4 x i8>, 3) %0 } - - -define <vscale x 2 x float> @test_vlseg3_nxv2f32_triscv.vector.tuple_nxv8i8_3t(ptr %base, i32 %vl) { +define target("riscv.vector.tuple", <vscale x 8 x i8>, 3) @test_vlseg3_nxv2f32_triscv.vector.tuple_nxv8i8_3t(ptr %base, i32 %vl) { ; CHECK-LABEL: test_vlseg3_nxv2f32_triscv.vector.tuple_nxv8i8_3t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma -; CHECK-NEXT: vlseg3e32.v v7, (a0) +; CHECK-NEXT: vlseg3e32.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 3) @llvm.riscv.vlseg3.triscv.vector.tuple_nxv8i8_3t(target("riscv.vector.tuple", <vscale x 8 x i8>, 3) undef, ptr %base, i32 %vl, i32 5) - %1 = call <vscale x 2 x float> @llvm.riscv.tuple.extract.nxv2f32.triscv.vector.tuple_nxv8i8_3t(target("riscv.vector.tuple", <vscale x 8 x i8>, 3) %0, i32 1) - ret <vscale x 2 x float> %1 + ret target("riscv.vector.tuple", <vscale x 8 x i8>, 3) %0 } - -define <vscale x 2 x float> @test_vlseg3_mask_nxv2f32_triscv.vector.tuple_nxv8i8_3t(ptr %base, i32 %vl, <vscale x 2 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 8 x i8>, 3) @test_vlseg3_mask_nxv2f32_triscv.vector.tuple_nxv8i8_3t(ptr %base, i32 %vl, <vscale x 2 x i1> %mask) { ; CHECK-LABEL: test_vlseg3_mask_nxv2f32_triscv.vector.tuple_nxv8i8_3t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma -; CHECK-NEXT: vlseg3e32.v v7, (a0), v0.t +; CHECK-NEXT: vlseg3e32.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 3) @llvm.riscv.vlseg3.mask.triscv.vector.tuple_nxv8i8_3t.nxv2i1(target("riscv.vector.tuple", <vscale x 8 x i8>, 3) undef, ptr %base, <vscale x 2 x i1> %mask, i32 %vl, i32 1, i32 5) - %1 = call <vscale x 2 x float> @llvm.riscv.tuple.extract.nxv2f32.triscv.vector.tuple_nxv8i8_3t(target("riscv.vector.tuple", <vscale x 8 x i8>, 3) %0, i32 1) - ret <vscale x 2 x float> %1 + ret target("riscv.vector.tuple", <vscale x 8 x i8>, 3) %0 } - - -define <vscale x 4 x float> @test_vlseg3_nxv4f32_triscv.vector.tuple_nxv16i8_3t(ptr %base, i32 %vl) { +define target("riscv.vector.tuple", <vscale x 16 x i8>, 3) @test_vlseg3_nxv4f32_triscv.vector.tuple_nxv16i8_3t(ptr %base, i32 %vl) { ; CHECK-LABEL: test_vlseg3_nxv4f32_triscv.vector.tuple_nxv16i8_3t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, ma -; CHECK-NEXT: vlseg3e32.v v6, (a0) +; CHECK-NEXT: vlseg3e32.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 3) @llvm.riscv.vlseg3.triscv.vector.tuple_nxv16i8_3t(target("riscv.vector.tuple", <vscale x 16 x i8>, 3) undef, ptr %base, i32 %vl, i32 5) - %1 = call <vscale x 4 x float> @llvm.riscv.tuple.extract.nxv4f32.triscv.vector.tuple_nxv16i8_3t(target("riscv.vector.tuple", <vscale x 16 x i8>, 3) %0, i32 1) - ret <vscale x 4 x float> %1 + ret target("riscv.vector.tuple", <vscale x 16 x i8>, 3) %0 } - -define <vscale x 4 x float> @test_vlseg3_mask_nxv4f32_triscv.vector.tuple_nxv16i8_3t(ptr %base, i32 %vl, <vscale x 4 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 16 x i8>, 3) @test_vlseg3_mask_nxv4f32_triscv.vector.tuple_nxv16i8_3t(ptr %base, i32 %vl, <vscale x 4 x i1> %mask) { ; CHECK-LABEL: test_vlseg3_mask_nxv4f32_triscv.vector.tuple_nxv16i8_3t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, ma -; CHECK-NEXT: vlseg3e32.v v6, (a0), v0.t +; CHECK-NEXT: vlseg3e32.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 3) @llvm.riscv.vlseg3.mask.triscv.vector.tuple_nxv16i8_3t.nxv4i1(target("riscv.vector.tuple", <vscale x 16 x i8>, 3) undef, ptr %base, <vscale x 4 x i1> %mask, i32 %vl, i32 1, i32 5) - %1 = call <vscale x 4 x float> @llvm.riscv.tuple.extract.nxv4f32.triscv.vector.tuple_nxv16i8_3t(target("riscv.vector.tuple", <vscale x 16 x i8>, 3) %0, i32 1) - ret <vscale x 4 x float> %1 + ret target("riscv.vector.tuple", <vscale x 16 x i8>, 3) %0 } - - -define <vscale x 1 x float> @test_vlseg4_nxv1f32_triscv.vector.tuple_nxv4i8_4t(ptr %base, i32 %vl) { +define target("riscv.vector.tuple", <vscale x 4 x i8>, 4) @test_vlseg4_nxv1f32_triscv.vector.tuple_nxv4i8_4t(ptr %base, i32 %vl) { ; CHECK-LABEL: test_vlseg4_nxv1f32_triscv.vector.tuple_nxv4i8_4t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, ma -; CHECK-NEXT: vlseg4e32.v v7, (a0) +; CHECK-NEXT: vlseg4e32.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 4) @llvm.riscv.vlseg4.triscv.vector.tuple_nxv4i8_4t(target("riscv.vector.tuple", <vscale x 4 x i8>, 4) undef, ptr %base, i32 %vl, i32 5) - %1 = call <vscale x 1 x float> @llvm.riscv.tuple.extract.nxv1f32.triscv.vector.tuple_nxv4i8_4t(target("riscv.vector.tuple", <vscale x 4 x i8>, 4) %0, i32 1) - ret <vscale x 1 x float> %1 + ret target("riscv.vector.tuple", <vscale x 4 x i8>, 4) %0 } - -define <vscale x 1 x float> @test_vlseg4_mask_nxv1f32_triscv.vector.tuple_nxv4i8_4t(ptr %base, i32 %vl, <vscale x 1 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 4 x i8>, 4) @test_vlseg4_mask_nxv1f32_triscv.vector.tuple_nxv4i8_4t(ptr %base, i32 %vl, <vscale x 1 x i1> %mask) { ; CHECK-LABEL: test_vlseg4_mask_nxv1f32_triscv.vector.tuple_nxv4i8_4t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, ma -; CHECK-NEXT: vlseg4e32.v v7, (a0), v0.t +; CHECK-NEXT: vlseg4e32.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 4) @llvm.riscv.vlseg4.mask.triscv.vector.tuple_nxv4i8_4t.nxv1i1(target("riscv.vector.tuple", <vscale x 4 x i8>, 4) undef, ptr %base, <vscale x 1 x i1> %mask, i32 %vl, i32 1, i32 5) - %1 = call <vscale x 1 x float> @llvm.riscv.tuple.extract.nxv1f32.triscv.vector.tuple_nxv4i8_4t(target("riscv.vector.tuple", <vscale x 4 x i8>, 4) %0, i32 1) - ret <vscale x 1 x float> %1 + ret target("riscv.vector.tuple", <vscale x 4 x i8>, 4) %0 } - - -define <vscale x 2 x float> @test_vlseg4_nxv2f32_triscv.vector.tuple_nxv8i8_4t(ptr %base, i32 %vl) { +define target("riscv.vector.tuple", <vscale x 8 x i8>, 4) @test_vlseg4_nxv2f32_triscv.vector.tuple_nxv8i8_4t(ptr %base, i32 %vl) { ; CHECK-LABEL: test_vlseg4_nxv2f32_triscv.vector.tuple_nxv8i8_4t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma -; CHECK-NEXT: vlseg4e32.v v7, (a0) +; CHECK-NEXT: vlseg4e32.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 4) @llvm.riscv.vlseg4.triscv.vector.tuple_nxv8i8_4t(target("riscv.vector.tuple", <vscale x 8 x i8>, 4) undef, ptr %base, i32 %vl, i32 5) - %1 = call <vscale x 2 x float> @llvm.riscv.tuple.extract.nxv2f32.triscv.vector.tuple_nxv8i8_4t(target("riscv.vector.tuple", <vscale x 8 x i8>, 4) %0, i32 1) - ret <vscale x 2 x float> %1 + ret target("riscv.vector.tuple", <vscale x 8 x i8>, 4) %0 } - -define <vscale x 2 x float> @test_vlseg4_mask_nxv2f32_triscv.vector.tuple_nxv8i8_4t(ptr %base, i32 %vl, <vscale x 2 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 8 x i8>, 4) @test_vlseg4_mask_nxv2f32_triscv.vector.tuple_nxv8i8_4t(ptr %base, i32 %vl, <vscale x 2 x i1> %mask) { ; CHECK-LABEL: test_vlseg4_mask_nxv2f32_triscv.vector.tuple_nxv8i8_4t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma -; CHECK-NEXT: vlseg4e32.v v7, (a0), v0.t +; CHECK-NEXT: vlseg4e32.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 4) @llvm.riscv.vlseg4.mask.triscv.vector.tuple_nxv8i8_4t.nxv2i1(target("riscv.vector.tuple", <vscale x 8 x i8>, 4) undef, ptr %base, <vscale x 2 x i1> %mask, i32 %vl, i32 1, i32 5) - %1 = call <vscale x 2 x float> @llvm.riscv.tuple.extract.nxv2f32.triscv.vector.tuple_nxv8i8_4t(target("riscv.vector.tuple", <vscale x 8 x i8>, 4) %0, i32 1) - ret <vscale x 2 x float> %1 + ret target("riscv.vector.tuple", <vscale x 8 x i8>, 4) %0 } - - -define <vscale x 4 x float> @test_vlseg4_nxv4f32_triscv.vector.tuple_nxv16i8_4t(ptr %base, i32 %vl) { +define target("riscv.vector.tuple", <vscale x 16 x i8>, 4) @test_vlseg4_nxv4f32_triscv.vector.tuple_nxv16i8_4t(ptr %base, i32 %vl) { ; CHECK-LABEL: test_vlseg4_nxv4f32_triscv.vector.tuple_nxv16i8_4t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, ma -; CHECK-NEXT: vlseg4e32.v v6, (a0) +; CHECK-NEXT: vlseg4e32.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 4) @llvm.riscv.vlseg4.triscv.vector.tuple_nxv16i8_4t(target("riscv.vector.tuple", <vscale x 16 x i8>, 4) undef, ptr %base, i32 %vl, i32 5) - %1 = call <vscale x 4 x float> @llvm.riscv.tuple.extract.nxv4f32.triscv.vector.tuple_nxv16i8_4t(target("riscv.vector.tuple", <vscale x 16 x i8>, 4) %0, i32 1) - ret <vscale x 4 x float> %1 + ret target("riscv.vector.tuple", <vscale x 16 x i8>, 4) %0 } - -define <vscale x 4 x float> @test_vlseg4_mask_nxv4f32_triscv.vector.tuple_nxv16i8_4t(ptr %base, i32 %vl, <vscale x 4 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 16 x i8>, 4) @test_vlseg4_mask_nxv4f32_triscv.vector.tuple_nxv16i8_4t(ptr %base, i32 %vl, <vscale x 4 x i1> %mask) { ; CHECK-LABEL: test_vlseg4_mask_nxv4f32_triscv.vector.tuple_nxv16i8_4t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, ma -; CHECK-NEXT: vlseg4e32.v v6, (a0), v0.t +; CHECK-NEXT: vlseg4e32.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 4) @llvm.riscv.vlseg4.mask.triscv.vector.tuple_nxv16i8_4t.nxv4i1(target("riscv.vector.tuple", <vscale x 16 x i8>, 4) undef, ptr %base, <vscale x 4 x i1> %mask, i32 %vl, i32 1, i32 5) - %1 = call <vscale x 4 x float> @llvm.riscv.tuple.extract.nxv4f32.triscv.vector.tuple_nxv16i8_4t(target("riscv.vector.tuple", <vscale x 16 x i8>, 4) %0, i32 1) - ret <vscale x 4 x float> %1 + ret target("riscv.vector.tuple", <vscale x 16 x i8>, 4) %0 } - - -define <vscale x 1 x float> @test_vlseg5_nxv1f32_triscv.vector.tuple_nxv4i8_5t(ptr %base, i32 %vl) { +define target("riscv.vector.tuple", <vscale x 4 x i8>, 5) @test_vlseg5_nxv1f32_triscv.vector.tuple_nxv4i8_5t(ptr %base, i32 %vl) { ; CHECK-LABEL: test_vlseg5_nxv1f32_triscv.vector.tuple_nxv4i8_5t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, ma -; CHECK-NEXT: vlseg5e32.v v7, (a0) +; CHECK-NEXT: vlseg5e32.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 5) @llvm.riscv.vlseg5.triscv.vector.tuple_nxv4i8_5t(target("riscv.vector.tuple", <vscale x 4 x i8>, 5) undef, ptr %base, i32 %vl, i32 5) - %1 = call <vscale x 1 x float> @llvm.riscv.tuple.extract.nxv1f32.triscv.vector.tuple_nxv4i8_5t(target("riscv.vector.tuple", <vscale x 4 x i8>, 5) %0, i32 1) - ret <vscale x 1 x float> %1 + ret target("riscv.vector.tuple", <vscale x 4 x i8>, 5) %0 } - -define <vscale x 1 x float> @test_vlseg5_mask_nxv1f32_triscv.vector.tuple_nxv4i8_5t(ptr %base, i32 %vl, <vscale x 1 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 4 x i8>, 5) @test_vlseg5_mask_nxv1f32_triscv.vector.tuple_nxv4i8_5t(ptr %base, i32 %vl, <vscale x 1 x i1> %mask) { ; CHECK-LABEL: test_vlseg5_mask_nxv1f32_triscv.vector.tuple_nxv4i8_5t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, ma -; CHECK-NEXT: vlseg5e32.v v7, (a0), v0.t +; CHECK-NEXT: vlseg5e32.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 5) @llvm.riscv.vlseg5.mask.triscv.vector.tuple_nxv4i8_5t.nxv1i1(target("riscv.vector.tuple", <vscale x 4 x i8>, 5) undef, ptr %base, <vscale x 1 x i1> %mask, i32 %vl, i32 1, i32 5) - %1 = call <vscale x 1 x float> @llvm.riscv.tuple.extract.nxv1f32.triscv.vector.tuple_nxv4i8_5t(target("riscv.vector.tuple", <vscale x 4 x i8>, 5) %0, i32 1) - ret <vscale x 1 x float> %1 + ret target("riscv.vector.tuple", <vscale x 4 x i8>, 5) %0 } - - -define <vscale x 2 x float> @test_vlseg5_nxv2f32_triscv.vector.tuple_nxv8i8_5t(ptr %base, i32 %vl) { +define target("riscv.vector.tuple", <vscale x 8 x i8>, 5) @test_vlseg5_nxv2f32_triscv.vector.tuple_nxv8i8_5t(ptr %base, i32 %vl) { ; CHECK-LABEL: test_vlseg5_nxv2f32_triscv.vector.tuple_nxv8i8_5t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma -; CHECK-NEXT: vlseg5e32.v v7, (a0) +; CHECK-NEXT: vlseg5e32.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 5) @llvm.riscv.vlseg5.triscv.vector.tuple_nxv8i8_5t(target("riscv.vector.tuple", <vscale x 8 x i8>, 5) undef, ptr %base, i32 %vl, i32 5) - %1 = call <vscale x 2 x float> @llvm.riscv.tuple.extract.nxv2f32.triscv.vector.tuple_nxv8i8_5t(target("riscv.vector.tuple", <vscale x 8 x i8>, 5) %0, i32 1) - ret <vscale x 2 x float> %1 + ret target("riscv.vector.tuple", <vscale x 8 x i8>, 5) %0 } - -define <vscale x 2 x float> @test_vlseg5_mask_nxv2f32_triscv.vector.tuple_nxv8i8_5t(ptr %base, i32 %vl, <vscale x 2 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 8 x i8>, 5) @test_vlseg5_mask_nxv2f32_triscv.vector.tuple_nxv8i8_5t(ptr %base, i32 %vl, <vscale x 2 x i1> %mask) { ; CHECK-LABEL: test_vlseg5_mask_nxv2f32_triscv.vector.tuple_nxv8i8_5t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma -; CHECK-NEXT: vlseg5e32.v v7, (a0), v0.t +; CHECK-NEXT: vlseg5e32.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 5) @llvm.riscv.vlseg5.mask.triscv.vector.tuple_nxv8i8_5t.nxv2i1(target("riscv.vector.tuple", <vscale x 8 x i8>, 5) undef, ptr %base, <vscale x 2 x i1> %mask, i32 %vl, i32 1, i32 5) - %1 = call <vscale x 2 x float> @llvm.riscv.tuple.extract.nxv2f32.triscv.vector.tuple_nxv8i8_5t(target("riscv.vector.tuple", <vscale x 8 x i8>, 5) %0, i32 1) - ret <vscale x 2 x float> %1 + ret target("riscv.vector.tuple", <vscale x 8 x i8>, 5) %0 } - - -define <vscale x 1 x float> @test_vlseg6_nxv1f32_triscv.vector.tuple_nxv4i8_6t(ptr %base, i32 %vl) { +define target("riscv.vector.tuple", <vscale x 4 x i8>, 6) @test_vlseg6_nxv1f32_triscv.vector.tuple_nxv4i8_6t(ptr %base, i32 %vl) { ; CHECK-LABEL: test_vlseg6_nxv1f32_triscv.vector.tuple_nxv4i8_6t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, ma -; CHECK-NEXT: vlseg6e32.v v7, (a0) +; CHECK-NEXT: vlseg6e32.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 6) @llvm.riscv.vlseg6.triscv.vector.tuple_nxv4i8_6t(target("riscv.vector.tuple", <vscale x 4 x i8>, 6) undef, ptr %base, i32 %vl, i32 5) - %1 = call <vscale x 1 x float> @llvm.riscv.tuple.extract.nxv1f32.triscv.vector.tuple_nxv4i8_6t(target("riscv.vector.tuple", <vscale x 4 x i8>, 6) %0, i32 1) - ret <vscale x 1 x float> %1 + ret target("riscv.vector.tuple", <vscale x 4 x i8>, 6) %0 } - -define <vscale x 1 x float> @test_vlseg6_mask_nxv1f32_triscv.vector.tuple_nxv4i8_6t(ptr %base, i32 %vl, <vscale x 1 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 4 x i8>, 6) @test_vlseg6_mask_nxv1f32_triscv.vector.tuple_nxv4i8_6t(ptr %base, i32 %vl, <vscale x 1 x i1> %mask) { ; CHECK-LABEL: test_vlseg6_mask_nxv1f32_triscv.vector.tuple_nxv4i8_6t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, ma -; CHECK-NEXT: vlseg6e32.v v7, (a0), v0.t +; CHECK-NEXT: vlseg6e32.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 6) @llvm.riscv.vlseg6.mask.triscv.vector.tuple_nxv4i8_6t.nxv1i1(target("riscv.vector.tuple", <vscale x 4 x i8>, 6) undef, ptr %base, <vscale x 1 x i1> %mask, i32 %vl, i32 1, i32 5) - %1 = call <vscale x 1 x float> @llvm.riscv.tuple.extract.nxv1f32.triscv.vector.tuple_nxv4i8_6t(target("riscv.vector.tuple", <vscale x 4 x i8>, 6) %0, i32 1) - ret <vscale x 1 x float> %1 + ret target("riscv.vector.tuple", <vscale x 4 x i8>, 6) %0 } - - -define <vscale x 2 x float> @test_vlseg6_nxv2f32_triscv.vector.tuple_nxv8i8_6t(ptr %base, i32 %vl) { +define target("riscv.vector.tuple", <vscale x 8 x i8>, 6) @test_vlseg6_nxv2f32_triscv.vector.tuple_nxv8i8_6t(ptr %base, i32 %vl) { ; CHECK-LABEL: test_vlseg6_nxv2f32_triscv.vector.tuple_nxv8i8_6t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma -; CHECK-NEXT: vlseg6e32.v v7, (a0) +; CHECK-NEXT: vlseg6e32.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 6) @llvm.riscv.vlseg6.triscv.vector.tuple_nxv8i8_6t(target("riscv.vector.tuple", <vscale x 8 x i8>, 6) undef, ptr %base, i32 %vl, i32 5) - %1 = call <vscale x 2 x float> @llvm.riscv.tuple.extract.nxv2f32.triscv.vector.tuple_nxv8i8_6t(target("riscv.vector.tuple", <vscale x 8 x i8>, 6) %0, i32 1) - ret <vscale x 2 x float> %1 + ret target("riscv.vector.tuple", <vscale x 8 x i8>, 6) %0 } - -define <vscale x 2 x float> @test_vlseg6_mask_nxv2f32_triscv.vector.tuple_nxv8i8_6t(ptr %base, i32 %vl, <vscale x 2 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 8 x i8>, 6) @test_vlseg6_mask_nxv2f32_triscv.vector.tuple_nxv8i8_6t(ptr %base, i32 %vl, <vscale x 2 x i1> %mask) { ; CHECK-LABEL: test_vlseg6_mask_nxv2f32_triscv.vector.tuple_nxv8i8_6t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma -; CHECK-NEXT: vlseg6e32.v v7, (a0), v0.t +; CHECK-NEXT: vlseg6e32.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 6) @llvm.riscv.vlseg6.mask.triscv.vector.tuple_nxv8i8_6t.nxv2i1(target("riscv.vector.tuple", <vscale x 8 x i8>, 6) undef, ptr %base, <vscale x 2 x i1> %mask, i32 %vl, i32 1, i32 5) - %1 = call <vscale x 2 x float> @llvm.riscv.tuple.extract.nxv2f32.triscv.vector.tuple_nxv8i8_6t(target("riscv.vector.tuple", <vscale x 8 x i8>, 6) %0, i32 1) - ret <vscale x 2 x float> %1 + ret target("riscv.vector.tuple", <vscale x 8 x i8>, 6) %0 } - - -define <vscale x 1 x float> @test_vlseg7_nxv1f32_triscv.vector.tuple_nxv4i8_7t(ptr %base, i32 %vl) { +define target("riscv.vector.tuple", <vscale x 4 x i8>, 7) @test_vlseg7_nxv1f32_triscv.vector.tuple_nxv4i8_7t(ptr %base, i32 %vl) { ; CHECK-LABEL: test_vlseg7_nxv1f32_triscv.vector.tuple_nxv4i8_7t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, ma -; CHECK-NEXT: vlseg7e32.v v7, (a0) +; CHECK-NEXT: vlseg7e32.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 7) @llvm.riscv.vlseg7.triscv.vector.tuple_nxv4i8_7t(target("riscv.vector.tuple", <vscale x 4 x i8>, 7) undef, ptr %base, i32 %vl, i32 5) - %1 = call <vscale x 1 x float> @llvm.riscv.tuple.extract.nxv1f32.triscv.vector.tuple_nxv4i8_7t(target("riscv.vector.tuple", <vscale x 4 x i8>, 7) %0, i32 1) - ret <vscale x 1 x float> %1 + ret target("riscv.vector.tuple", <vscale x 4 x i8>, 7) %0 } - -define <vscale x 1 x float> @test_vlseg7_mask_nxv1f32_triscv.vector.tuple_nxv4i8_7t(ptr %base, i32 %vl, <vscale x 1 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 4 x i8>, 7) @test_vlseg7_mask_nxv1f32_triscv.vector.tuple_nxv4i8_7t(ptr %base, i32 %vl, <vscale x 1 x i1> %mask) { ; CHECK-LABEL: test_vlseg7_mask_nxv1f32_triscv.vector.tuple_nxv4i8_7t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, ma -; CHECK-NEXT: vlseg7e32.v v7, (a0), v0.t +; CHECK-NEXT: vlseg7e32.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 7) @llvm.riscv.vlseg7.mask.triscv.vector.tuple_nxv4i8_7t.nxv1i1(target("riscv.vector.tuple", <vscale x 4 x i8>, 7) undef, ptr %base, <vscale x 1 x i1> %mask, i32 %vl, i32 1, i32 5) - %1 = call <vscale x 1 x float> @llvm.riscv.tuple.extract.nxv1f32.triscv.vector.tuple_nxv4i8_7t(target("riscv.vector.tuple", <vscale x 4 x i8>, 7) %0, i32 1) - ret <vscale x 1 x float> %1 + ret target("riscv.vector.tuple", <vscale x 4 x i8>, 7) %0 } - - -define <vscale x 2 x float> @test_vlseg7_nxv2f32_triscv.vector.tuple_nxv8i8_7t(ptr %base, i32 %vl) { +define target("riscv.vector.tuple", <vscale x 8 x i8>, 7) @test_vlseg7_nxv2f32_triscv.vector.tuple_nxv8i8_7t(ptr %base, i32 %vl) { ; CHECK-LABEL: test_vlseg7_nxv2f32_triscv.vector.tuple_nxv8i8_7t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma -; CHECK-NEXT: vlseg7e32.v v7, (a0) +; CHECK-NEXT: vlseg7e32.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 7) @llvm.riscv.vlseg7.triscv.vector.tuple_nxv8i8_7t(target("riscv.vector.tuple", <vscale x 8 x i8>, 7) undef, ptr %base, i32 %vl, i32 5) - %1 = call <vscale x 2 x float> @llvm.riscv.tuple.extract.nxv2f32.triscv.vector.tuple_nxv8i8_7t(target("riscv.vector.tuple", <vscale x 8 x i8>, 7) %0, i32 1) - ret <vscale x 2 x float> %1 + ret target("riscv.vector.tuple", <vscale x 8 x i8>, 7) %0 } - -define <vscale x 2 x float> @test_vlseg7_mask_nxv2f32_triscv.vector.tuple_nxv8i8_7t(ptr %base, i32 %vl, <vscale x 2 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 8 x i8>, 7) @test_vlseg7_mask_nxv2f32_triscv.vector.tuple_nxv8i8_7t(ptr %base, i32 %vl, <vscale x 2 x i1> %mask) { ; CHECK-LABEL: test_vlseg7_mask_nxv2f32_triscv.vector.tuple_nxv8i8_7t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma -; CHECK-NEXT: vlseg7e32.v v7, (a0), v0.t +; CHECK-NEXT: vlseg7e32.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 7) @llvm.riscv.vlseg7.mask.triscv.vector.tuple_nxv8i8_7t.nxv2i1(target("riscv.vector.tuple", <vscale x 8 x i8>, 7) undef, ptr %base, <vscale x 2 x i1> %mask, i32 %vl, i32 1, i32 5) - %1 = call <vscale x 2 x float> @llvm.riscv.tuple.extract.nxv2f32.triscv.vector.tuple_nxv8i8_7t(target("riscv.vector.tuple", <vscale x 8 x i8>, 7) %0, i32 1) - ret <vscale x 2 x float> %1 + ret target("riscv.vector.tuple", <vscale x 8 x i8>, 7) %0 } - - -define <vscale x 1 x float> @test_vlseg8_nxv1f32_triscv.vector.tuple_nxv4i8_8t(ptr %base, i32 %vl) { +define target("riscv.vector.tuple", <vscale x 4 x i8>, 8) @test_vlseg8_nxv1f32_triscv.vector.tuple_nxv4i8_8t(ptr %base, i32 %vl) { ; CHECK-LABEL: test_vlseg8_nxv1f32_triscv.vector.tuple_nxv4i8_8t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, ma -; CHECK-NEXT: vlseg8e32.v v7, (a0) +; CHECK-NEXT: vlseg8e32.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 8) @llvm.riscv.vlseg8.triscv.vector.tuple_nxv4i8_8t(target("riscv.vector.tuple", <vscale x 4 x i8>, 8) undef, ptr %base, i32 %vl, i32 5) - %1 = call <vscale x 1 x float> @llvm.riscv.tuple.extract.nxv1f32.triscv.vector.tuple_nxv4i8_8t(target("riscv.vector.tuple", <vscale x 4 x i8>, 8) %0, i32 1) - ret <vscale x 1 x float> %1 + ret target("riscv.vector.tuple", <vscale x 4 x i8>, 8) %0 } - -define <vscale x 1 x float> @test_vlseg8_mask_nxv1f32_triscv.vector.tuple_nxv4i8_8t(ptr %base, i32 %vl, <vscale x 1 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 4 x i8>, 8) @test_vlseg8_mask_nxv1f32_triscv.vector.tuple_nxv4i8_8t(ptr %base, i32 %vl, <vscale x 1 x i1> %mask) { ; CHECK-LABEL: test_vlseg8_mask_nxv1f32_triscv.vector.tuple_nxv4i8_8t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, ma -; CHECK-NEXT: vlseg8e32.v v7, (a0), v0.t +; CHECK-NEXT: vlseg8e32.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 8) @llvm.riscv.vlseg8.mask.triscv.vector.tuple_nxv4i8_8t.nxv1i1(target("riscv.vector.tuple", <vscale x 4 x i8>, 8) undef, ptr %base, <vscale x 1 x i1> %mask, i32 %vl, i32 1, i32 5) - %1 = call <vscale x 1 x float> @llvm.riscv.tuple.extract.nxv1f32.triscv.vector.tuple_nxv4i8_8t(target("riscv.vector.tuple", <vscale x 4 x i8>, 8) %0, i32 1) - ret <vscale x 1 x float> %1 + ret target("riscv.vector.tuple", <vscale x 4 x i8>, 8) %0 } - - -define <vscale x 2 x float> @test_vlseg8_nxv2f32_triscv.vector.tuple_nxv8i8_8t(ptr %base, i32 %vl) { +define target("riscv.vector.tuple", <vscale x 8 x i8>, 8) @test_vlseg8_nxv2f32_triscv.vector.tuple_nxv8i8_8t(ptr %base, i32 %vl) { ; CHECK-LABEL: test_vlseg8_nxv2f32_triscv.vector.tuple_nxv8i8_8t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma -; CHECK-NEXT: vlseg8e32.v v7, (a0) +; CHECK-NEXT: vlseg8e32.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 8) @llvm.riscv.vlseg8.triscv.vector.tuple_nxv8i8_8t(target("riscv.vector.tuple", <vscale x 8 x i8>, 8) undef, ptr %base, i32 %vl, i32 5) - %1 = call <vscale x 2 x float> @llvm.riscv.tuple.extract.nxv2f32.triscv.vector.tuple_nxv8i8_8t(target("riscv.vector.tuple", <vscale x 8 x i8>, 8) %0, i32 1) - ret <vscale x 2 x float> %1 + ret target("riscv.vector.tuple", <vscale x 8 x i8>, 8) %0 } - -define <vscale x 2 x float> @test_vlseg8_mask_nxv2f32_triscv.vector.tuple_nxv8i8_8t(ptr %base, i32 %vl, <vscale x 2 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 8 x i8>, 8) @test_vlseg8_mask_nxv2f32_triscv.vector.tuple_nxv8i8_8t(ptr %base, i32 %vl, <vscale x 2 x i1> %mask) { ; CHECK-LABEL: test_vlseg8_mask_nxv2f32_triscv.vector.tuple_nxv8i8_8t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma -; CHECK-NEXT: vlseg8e32.v v7, (a0), v0.t +; CHECK-NEXT: vlseg8e32.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 8) @llvm.riscv.vlseg8.mask.triscv.vector.tuple_nxv8i8_8t.nxv2i1(target("riscv.vector.tuple", <vscale x 8 x i8>, 8) undef, ptr %base, <vscale x 2 x i1> %mask, i32 %vl, i32 1, i32 5) - %1 = call <vscale x 2 x float> @llvm.riscv.tuple.extract.nxv2f32.triscv.vector.tuple_nxv8i8_8t(target("riscv.vector.tuple", <vscale x 8 x i8>, 8) %0, i32 1) - ret <vscale x 2 x float> %1 + ret target("riscv.vector.tuple", <vscale x 8 x i8>, 8) %0 } - - -define <vscale x 1 x double> @test_vlseg2_nxv1f64_triscv.vector.tuple_nxv8i8_2t(ptr %base, i32 %vl) { +define target("riscv.vector.tuple", <vscale x 8 x i8>, 2) @test_vlseg2_nxv1f64_triscv.vector.tuple_nxv8i8_2t(ptr %base, i32 %vl) { ; CHECK-LABEL: test_vlseg2_nxv1f64_triscv.vector.tuple_nxv8i8_2t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e64, m1, ta, ma -; CHECK-NEXT: vlseg2e64.v v7, (a0) +; CHECK-NEXT: vlseg2e64.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 2) @llvm.riscv.vlseg2.triscv.vector.tuple_nxv8i8_2t(target("riscv.vector.tuple", <vscale x 8 x i8>, 2) undef, ptr %base, i32 %vl, i32 6) - %1 = call <vscale x 1 x double> @llvm.riscv.tuple.extract.nxv1f64.triscv.vector.tuple_nxv8i8_2t(target("riscv.vector.tuple", <vscale x 8 x i8>, 2) %0, i32 1) - ret <vscale x 1 x double> %1 + ret target("riscv.vector.tuple", <vscale x 8 x i8>, 2) %0 } - -define <vscale x 1 x double> @test_vlseg2_mask_nxv1f64_triscv.vector.tuple_nxv8i8_2t(ptr %base, i32 %vl, <vscale x 1 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 8 x i8>, 2) @test_vlseg2_mask_nxv1f64_triscv.vector.tuple_nxv8i8_2t(ptr %base, i32 %vl, <vscale x 1 x i1> %mask) { ; CHECK-LABEL: test_vlseg2_mask_nxv1f64_triscv.vector.tuple_nxv8i8_2t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e64, m1, ta, ma -; CHECK-NEXT: vlseg2e64.v v7, (a0), v0.t +; CHECK-NEXT: vlseg2e64.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 2) @llvm.riscv.vlseg2.mask.triscv.vector.tuple_nxv8i8_2t.nxv1i1(target("riscv.vector.tuple", <vscale x 8 x i8>, 2) undef, ptr %base, <vscale x 1 x i1> %mask, i32 %vl, i32 1, i32 6) - %1 = call <vscale x 1 x double> @llvm.riscv.tuple.extract.nxv1f64.triscv.vector.tuple_nxv8i8_2t(target("riscv.vector.tuple", <vscale x 8 x i8>, 2) %0, i32 1) - ret <vscale x 1 x double> %1 + ret target("riscv.vector.tuple", <vscale x 8 x i8>, 2) %0 } - - -define <vscale x 2 x double> @test_vlseg2_nxv2f64_triscv.vector.tuple_nxv16i8_2t(ptr %base, i32 %vl) { +define target("riscv.vector.tuple", <vscale x 16 x i8>, 2) @test_vlseg2_nxv2f64_triscv.vector.tuple_nxv16i8_2t(ptr %base, i32 %vl) { ; CHECK-LABEL: test_vlseg2_nxv2f64_triscv.vector.tuple_nxv16i8_2t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e64, m2, ta, ma -; CHECK-NEXT: vlseg2e64.v v6, (a0) +; CHECK-NEXT: vlseg2e64.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 2) @llvm.riscv.vlseg2.triscv.vector.tuple_nxv16i8_2t(target("riscv.vector.tuple", <vscale x 16 x i8>, 2) undef, ptr %base, i32 %vl, i32 6) - %1 = call <vscale x 2 x double> @llvm.riscv.tuple.extract.nxv2f64.triscv.vector.tuple_nxv16i8_2t(target("riscv.vector.tuple", <vscale x 16 x i8>, 2) %0, i32 1) - ret <vscale x 2 x double> %1 + ret target("riscv.vector.tuple", <vscale x 16 x i8>, 2) %0 } - -define <vscale x 2 x double> @test_vlseg2_mask_nxv2f64_triscv.vector.tuple_nxv16i8_2t(ptr %base, i32 %vl, <vscale x 2 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 16 x i8>, 2) @test_vlseg2_mask_nxv2f64_triscv.vector.tuple_nxv16i8_2t(ptr %base, i32 %vl, <vscale x 2 x i1> %mask) { ; CHECK-LABEL: test_vlseg2_mask_nxv2f64_triscv.vector.tuple_nxv16i8_2t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e64, m2, ta, ma -; CHECK-NEXT: vlseg2e64.v v6, (a0), v0.t +; CHECK-NEXT: vlseg2e64.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 2) @llvm.riscv.vlseg2.mask.triscv.vector.tuple_nxv16i8_2t.nxv2i1(target("riscv.vector.tuple", <vscale x 16 x i8>, 2) undef, ptr %base, <vscale x 2 x i1> %mask, i32 %vl, i32 1, i32 6) - %1 = call <vscale x 2 x double> @llvm.riscv.tuple.extract.nxv2f64.triscv.vector.tuple_nxv16i8_2t(target("riscv.vector.tuple", <vscale x 16 x i8>, 2) %0, i32 1) - ret <vscale x 2 x double> %1 + ret target("riscv.vector.tuple", <vscale x 16 x i8>, 2) %0 } - - -define <vscale x 4 x double> @test_vlseg2_nxv4f64_triscv.vector.tuple_nxv32i8_2t(ptr %base, i32 %vl) { +define target("riscv.vector.tuple", <vscale x 32 x i8>, 2) @test_vlseg2_nxv4f64_triscv.vector.tuple_nxv32i8_2t(ptr %base, i32 %vl) { ; CHECK-LABEL: test_vlseg2_nxv4f64_triscv.vector.tuple_nxv32i8_2t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e64, m4, ta, ma -; CHECK-NEXT: vlseg2e64.v v4, (a0) +; CHECK-NEXT: vlseg2e64.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 32 x i8>, 2) @llvm.riscv.vlseg2.triscv.vector.tuple_nxv32i8_2t(target("riscv.vector.tuple", <vscale x 32 x i8>, 2) undef, ptr %base, i32 %vl, i32 6) - %1 = call <vscale x 4 x double> @llvm.riscv.tuple.extract.nxv4f64.triscv.vector.tuple_nxv32i8_2t(target("riscv.vector.tuple", <vscale x 32 x i8>, 2) %0, i32 1) - ret <vscale x 4 x double> %1 + ret target("riscv.vector.tuple", <vscale x 32 x i8>, 2) %0 } - -define <vscale x 4 x double> @test_vlseg2_mask_nxv4f64_triscv.vector.tuple_nxv32i8_2t(ptr %base, i32 %vl, <vscale x 4 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 32 x i8>, 2) @test_vlseg2_mask_nxv4f64_triscv.vector.tuple_nxv32i8_2t(ptr %base, i32 %vl, <vscale x 4 x i1> %mask) { ; CHECK-LABEL: test_vlseg2_mask_nxv4f64_triscv.vector.tuple_nxv32i8_2t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e64, m4, ta, ma -; CHECK-NEXT: vlseg2e64.v v4, (a0), v0.t +; CHECK-NEXT: vlseg2e64.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 32 x i8>, 2) @llvm.riscv.vlseg2.mask.triscv.vector.tuple_nxv32i8_2t.nxv4i1(target("riscv.vector.tuple", <vscale x 32 x i8>, 2) undef, ptr %base, <vscale x 4 x i1> %mask, i32 %vl, i32 1, i32 6) - %1 = call <vscale x 4 x double> @llvm.riscv.tuple.extract.nxv4f64.triscv.vector.tuple_nxv32i8_2t(target("riscv.vector.tuple", <vscale x 32 x i8>, 2) %0, i32 1) - ret <vscale x 4 x double> %1 + ret target("riscv.vector.tuple", <vscale x 32 x i8>, 2) %0 } - - -define <vscale x 1 x double> @test_vlseg3_nxv1f64_triscv.vector.tuple_nxv8i8_3t(ptr %base, i32 %vl) { +define target("riscv.vector.tuple", <vscale x 8 x i8>, 3) @test_vlseg3_nxv1f64_triscv.vector.tuple_nxv8i8_3t(ptr %base, i32 %vl) { ; CHECK-LABEL: test_vlseg3_nxv1f64_triscv.vector.tuple_nxv8i8_3t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e64, m1, ta, ma -; CHECK-NEXT: vlseg3e64.v v7, (a0) +; CHECK-NEXT: vlseg3e64.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 3) @llvm.riscv.vlseg3.triscv.vector.tuple_nxv8i8_3t(target("riscv.vector.tuple", <vscale x 8 x i8>, 3) undef, ptr %base, i32 %vl, i32 6) - %1 = call <vscale x 1 x double> @llvm.riscv.tuple.extract.nxv1f64.triscv.vector.tuple_nxv8i8_3t(target("riscv.vector.tuple", <vscale x 8 x i8>, 3) %0, i32 1) - ret <vscale x 1 x double> %1 + ret target("riscv.vector.tuple", <vscale x 8 x i8>, 3) %0 } - -define <vscale x 1 x double> @test_vlseg3_mask_nxv1f64_triscv.vector.tuple_nxv8i8_3t(ptr %base, i32 %vl, <vscale x 1 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 8 x i8>, 3) @test_vlseg3_mask_nxv1f64_triscv.vector.tuple_nxv8i8_3t(ptr %base, i32 %vl, <vscale x 1 x i1> %mask) { ; CHECK-LABEL: test_vlseg3_mask_nxv1f64_triscv.vector.tuple_nxv8i8_3t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e64, m1, ta, ma -; CHECK-NEXT: vlseg3e64.v v7, (a0), v0.t +; CHECK-NEXT: vlseg3e64.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 3) @llvm.riscv.vlseg3.mask.triscv.vector.tuple_nxv8i8_3t.nxv1i1(target("riscv.vector.tuple", <vscale x 8 x i8>, 3) undef, ptr %base, <vscale x 1 x i1> %mask, i32 %vl, i32 1, i32 6) - %1 = call <vscale x 1 x double> @llvm.riscv.tuple.extract.nxv1f64.triscv.vector.tuple_nxv8i8_3t(target("riscv.vector.tuple", <vscale x 8 x i8>, 3) %0, i32 1) - ret <vscale x 1 x double> %1 + ret target("riscv.vector.tuple", <vscale x 8 x i8>, 3) %0 } - - -define <vscale x 2 x double> @test_vlseg3_nxv2f64_triscv.vector.tuple_nxv16i8_3t(ptr %base, i32 %vl) { +define target("riscv.vector.tuple", <vscale x 16 x i8>, 3) @test_vlseg3_nxv2f64_triscv.vector.tuple_nxv16i8_3t(ptr %base, i32 %vl) { ; CHECK-LABEL: test_vlseg3_nxv2f64_triscv.vector.tuple_nxv16i8_3t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e64, m2, ta, ma -; CHECK-NEXT: vlseg3e64.v v6, (a0) +; CHECK-NEXT: vlseg3e64.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 3) @llvm.riscv.vlseg3.triscv.vector.tuple_nxv16i8_3t(target("riscv.vector.tuple", <vscale x 16 x i8>, 3) undef, ptr %base, i32 %vl, i32 6) - %1 = call <vscale x 2 x double> @llvm.riscv.tuple.extract.nxv2f64.triscv.vector.tuple_nxv16i8_3t(target("riscv.vector.tuple", <vscale x 16 x i8>, 3) %0, i32 1) - ret <vscale x 2 x double> %1 + ret target("riscv.vector.tuple", <vscale x 16 x i8>, 3) %0 } - -define <vscale x 2 x double> @test_vlseg3_mask_nxv2f64_triscv.vector.tuple_nxv16i8_3t(ptr %base, i32 %vl, <vscale x 2 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 16 x i8>, 3) @test_vlseg3_mask_nxv2f64_triscv.vector.tuple_nxv16i8_3t(ptr %base, i32 %vl, <vscale x 2 x i1> %mask) { ; CHECK-LABEL: test_vlseg3_mask_nxv2f64_triscv.vector.tuple_nxv16i8_3t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e64, m2, ta, ma -; CHECK-NEXT: vlseg3e64.v v6, (a0), v0.t +; CHECK-NEXT: vlseg3e64.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 3) @llvm.riscv.vlseg3.mask.triscv.vector.tuple_nxv16i8_3t.nxv2i1(target("riscv.vector.tuple", <vscale x 16 x i8>, 3) undef, ptr %base, <vscale x 2 x i1> %mask, i32 %vl, i32 1, i32 6) - %1 = call <vscale x 2 x double> @llvm.riscv.tuple.extract.nxv2f64.triscv.vector.tuple_nxv16i8_3t(target("riscv.vector.tuple", <vscale x 16 x i8>, 3) %0, i32 1) - ret <vscale x 2 x double> %1 + ret target("riscv.vector.tuple", <vscale x 16 x i8>, 3) %0 } - - -define <vscale x 1 x double> @test_vlseg4_nxv1f64_triscv.vector.tuple_nxv8i8_4t(ptr %base, i32 %vl) { +define target("riscv.vector.tuple", <vscale x 8 x i8>, 4) @test_vlseg4_nxv1f64_triscv.vector.tuple_nxv8i8_4t(ptr %base, i32 %vl) { ; CHECK-LABEL: test_vlseg4_nxv1f64_triscv.vector.tuple_nxv8i8_4t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e64, m1, ta, ma -; CHECK-NEXT: vlseg4e64.v v7, (a0) +; CHECK-NEXT: vlseg4e64.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 4) @llvm.riscv.vlseg4.triscv.vector.tuple_nxv8i8_4t(target("riscv.vector.tuple", <vscale x 8 x i8>, 4) undef, ptr %base, i32 %vl, i32 6) - %1 = call <vscale x 1 x double> @llvm.riscv.tuple.extract.nxv1f64.triscv.vector.tuple_nxv8i8_4t(target("riscv.vector.tuple", <vscale x 8 x i8>, 4) %0, i32 1) - ret <vscale x 1 x double> %1 + ret target("riscv.vector.tuple", <vscale x 8 x i8>, 4) %0 } - -define <vscale x 1 x double> @test_vlseg4_mask_nxv1f64_triscv.vector.tuple_nxv8i8_4t(ptr %base, i32 %vl, <vscale x 1 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 8 x i8>, 4) @test_vlseg4_mask_nxv1f64_triscv.vector.tuple_nxv8i8_4t(ptr %base, i32 %vl, <vscale x 1 x i1> %mask) { ; CHECK-LABEL: test_vlseg4_mask_nxv1f64_triscv.vector.tuple_nxv8i8_4t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e64, m1, ta, ma -; CHECK-NEXT: vlseg4e64.v v7, (a0), v0.t +; CHECK-NEXT: vlseg4e64.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 4) @llvm.riscv.vlseg4.mask.triscv.vector.tuple_nxv8i8_4t.nxv1i1(target("riscv.vector.tuple", <vscale x 8 x i8>, 4) undef, ptr %base, <vscale x 1 x i1> %mask, i32 %vl, i32 1, i32 6) - %1 = call <vscale x 1 x double> @llvm.riscv.tuple.extract.nxv1f64.triscv.vector.tuple_nxv8i8_4t(target("riscv.vector.tuple", <vscale x 8 x i8>, 4) %0, i32 1) - ret <vscale x 1 x double> %1 + ret target("riscv.vector.tuple", <vscale x 8 x i8>, 4) %0 } - - -define <vscale x 2 x double> @test_vlseg4_nxv2f64_triscv.vector.tuple_nxv16i8_4t(ptr %base, i32 %vl) { +define target("riscv.vector.tuple", <vscale x 16 x i8>, 4) @test_vlseg4_nxv2f64_triscv.vector.tuple_nxv16i8_4t(ptr %base, i32 %vl) { ; CHECK-LABEL: test_vlseg4_nxv2f64_triscv.vector.tuple_nxv16i8_4t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e64, m2, ta, ma -; CHECK-NEXT: vlseg4e64.v v6, (a0) +; CHECK-NEXT: vlseg4e64.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 4) @llvm.riscv.vlseg4.triscv.vector.tuple_nxv16i8_4t(target("riscv.vector.tuple", <vscale x 16 x i8>, 4) undef, ptr %base, i32 %vl, i32 6) - %1 = call <vscale x 2 x double> @llvm.riscv.tuple.extract.nxv2f64.triscv.vector.tuple_nxv16i8_4t(target("riscv.vector.tuple", <vscale x 16 x i8>, 4) %0, i32 1) - ret <vscale x 2 x double> %1 + ret target("riscv.vector.tuple", <vscale x 16 x i8>, 4) %0 } - -define <vscale x 2 x double> @test_vlseg4_mask_nxv2f64_triscv.vector.tuple_nxv16i8_4t(ptr %base, i32 %vl, <vscale x 2 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 16 x i8>, 4) @test_vlseg4_mask_nxv2f64_triscv.vector.tuple_nxv16i8_4t(ptr %base, i32 %vl, <vscale x 2 x i1> %mask) { ; CHECK-LABEL: test_vlseg4_mask_nxv2f64_triscv.vector.tuple_nxv16i8_4t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e64, m2, ta, ma -; CHECK-NEXT: vlseg4e64.v v6, (a0), v0.t +; CHECK-NEXT: vlseg4e64.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 4) @llvm.riscv.vlseg4.mask.triscv.vector.tuple_nxv16i8_4t.nxv2i1(target("riscv.vector.tuple", <vscale x 16 x i8>, 4) undef, ptr %base, <vscale x 2 x i1> %mask, i32 %vl, i32 1, i32 6) - %1 = call <vscale x 2 x double> @llvm.riscv.tuple.extract.nxv2f64.triscv.vector.tuple_nxv16i8_4t(target("riscv.vector.tuple", <vscale x 16 x i8>, 4) %0, i32 1) - ret <vscale x 2 x double> %1 + ret target("riscv.vector.tuple", <vscale x 16 x i8>, 4) %0 } - - -define <vscale x 1 x double> @test_vlseg5_nxv1f64_triscv.vector.tuple_nxv8i8_5t(ptr %base, i32 %vl) { +define target("riscv.vector.tuple", <vscale x 8 x i8>, 5) @test_vlseg5_nxv1f64_triscv.vector.tuple_nxv8i8_5t(ptr %base, i32 %vl) { ; CHECK-LABEL: test_vlseg5_nxv1f64_triscv.vector.tuple_nxv8i8_5t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e64, m1, ta, ma -; CHECK-NEXT: vlseg5e64.v v7, (a0) +; CHECK-NEXT: vlseg5e64.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 5) @llvm.riscv.vlseg5.triscv.vector.tuple_nxv8i8_5t(target("riscv.vector.tuple", <vscale x 8 x i8>, 5) undef, ptr %base, i32 %vl, i32 6) - %1 = call <vscale x 1 x double> @llvm.riscv.tuple.extract.nxv1f64.triscv.vector.tuple_nxv8i8_5t(target("riscv.vector.tuple", <vscale x 8 x i8>, 5) %0, i32 1) - ret <vscale x 1 x double> %1 + ret target("riscv.vector.tuple", <vscale x 8 x i8>, 5) %0 } - -define <vscale x 1 x double> @test_vlseg5_mask_nxv1f64_triscv.vector.tuple_nxv8i8_5t(ptr %base, i32 %vl, <vscale x 1 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 8 x i8>, 5) @test_vlseg5_mask_nxv1f64_triscv.vector.tuple_nxv8i8_5t(ptr %base, i32 %vl, <vscale x 1 x i1> %mask) { ; CHECK-LABEL: test_vlseg5_mask_nxv1f64_triscv.vector.tuple_nxv8i8_5t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e64, m1, ta, ma -; CHECK-NEXT: vlseg5e64.v v7, (a0), v0.t +; CHECK-NEXT: vlseg5e64.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 5) @llvm.riscv.vlseg5.mask.triscv.vector.tuple_nxv8i8_5t.nxv1i1(target("riscv.vector.tuple", <vscale x 8 x i8>, 5) undef, ptr %base, <vscale x 1 x i1> %mask, i32 %vl, i32 1, i32 6) - %1 = call <vscale x 1 x double> @llvm.riscv.tuple.extract.nxv1f64.triscv.vector.tuple_nxv8i8_5t(target("riscv.vector.tuple", <vscale x 8 x i8>, 5) %0, i32 1) - ret <vscale x 1 x double> %1 + ret target("riscv.vector.tuple", <vscale x 8 x i8>, 5) %0 } - - -define <vscale x 1 x double> @test_vlseg6_nxv1f64_triscv.vector.tuple_nxv8i8_6t(ptr %base, i32 %vl) { +define target("riscv.vector.tuple", <vscale x 8 x i8>, 6) @test_vlseg6_nxv1f64_triscv.vector.tuple_nxv8i8_6t(ptr %base, i32 %vl) { ; CHECK-LABEL: test_vlseg6_nxv1f64_triscv.vector.tuple_nxv8i8_6t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e64, m1, ta, ma -; CHECK-NEXT: vlseg6e64.v v7, (a0) +; CHECK-NEXT: vlseg6e64.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 6) @llvm.riscv.vlseg6.triscv.vector.tuple_nxv8i8_6t(target("riscv.vector.tuple", <vscale x 8 x i8>, 6) undef, ptr %base, i32 %vl, i32 6) - %1 = call <vscale x 1 x double> @llvm.riscv.tuple.extract.nxv1f64.triscv.vector.tuple_nxv8i8_6t(target("riscv.vector.tuple", <vscale x 8 x i8>, 6) %0, i32 1) - ret <vscale x 1 x double> %1 + ret target("riscv.vector.tuple", <vscale x 8 x i8>, 6) %0 } - -define <vscale x 1 x double> @test_vlseg6_mask_nxv1f64_triscv.vector.tuple_nxv8i8_6t(ptr %base, i32 %vl, <vscale x 1 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 8 x i8>, 6) @test_vlseg6_mask_nxv1f64_triscv.vector.tuple_nxv8i8_6t(ptr %base, i32 %vl, <vscale x 1 x i1> %mask) { ; CHECK-LABEL: test_vlseg6_mask_nxv1f64_triscv.vector.tuple_nxv8i8_6t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e64, m1, ta, ma -; CHECK-NEXT: vlseg6e64.v v7, (a0), v0.t +; CHECK-NEXT: vlseg6e64.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 6) @llvm.riscv.vlseg6.mask.triscv.vector.tuple_nxv8i8_6t.nxv1i1(target("riscv.vector.tuple", <vscale x 8 x i8>, 6) undef, ptr %base, <vscale x 1 x i1> %mask, i32 %vl, i32 1, i32 6) - %1 = call <vscale x 1 x double> @llvm.riscv.tuple.extract.nxv1f64.triscv.vector.tuple_nxv8i8_6t(target("riscv.vector.tuple", <vscale x 8 x i8>, 6) %0, i32 1) - ret <vscale x 1 x double> %1 + ret target("riscv.vector.tuple", <vscale x 8 x i8>, 6) %0 } - - -define <vscale x 1 x double> @test_vlseg7_nxv1f64_triscv.vector.tuple_nxv8i8_7t(ptr %base, i32 %vl) { +define target("riscv.vector.tuple", <vscale x 8 x i8>, 7) @test_vlseg7_nxv1f64_triscv.vector.tuple_nxv8i8_7t(ptr %base, i32 %vl) { ; CHECK-LABEL: test_vlseg7_nxv1f64_triscv.vector.tuple_nxv8i8_7t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e64, m1, ta, ma -; CHECK-NEXT: vlseg7e64.v v7, (a0) +; CHECK-NEXT: vlseg7e64.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 7) @llvm.riscv.vlseg7.triscv.vector.tuple_nxv8i8_7t(target("riscv.vector.tuple", <vscale x 8 x i8>, 7) undef, ptr %base, i32 %vl, i32 6) - %1 = call <vscale x 1 x double> @llvm.riscv.tuple.extract.nxv1f64.triscv.vector.tuple_nxv8i8_7t(target("riscv.vector.tuple", <vscale x 8 x i8>, 7) %0, i32 1) - ret <vscale x 1 x double> %1 + ret target("riscv.vector.tuple", <vscale x 8 x i8>, 7) %0 } - -define <vscale x 1 x double> @test_vlseg7_mask_nxv1f64_triscv.vector.tuple_nxv8i8_7t(ptr %base, i32 %vl, <vscale x 1 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 8 x i8>, 7) @test_vlseg7_mask_nxv1f64_triscv.vector.tuple_nxv8i8_7t(ptr %base, i32 %vl, <vscale x 1 x i1> %mask) { ; CHECK-LABEL: test_vlseg7_mask_nxv1f64_triscv.vector.tuple_nxv8i8_7t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e64, m1, ta, ma -; CHECK-NEXT: vlseg7e64.v v7, (a0), v0.t +; CHECK-NEXT: vlseg7e64.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 7) @llvm.riscv.vlseg7.mask.triscv.vector.tuple_nxv8i8_7t.nxv1i1(target("riscv.vector.tuple", <vscale x 8 x i8>, 7) undef, ptr %base, <vscale x 1 x i1> %mask, i32 %vl, i32 1, i32 6) - %1 = call <vscale x 1 x double> @llvm.riscv.tuple.extract.nxv1f64.triscv.vector.tuple_nxv8i8_7t(target("riscv.vector.tuple", <vscale x 8 x i8>, 7) %0, i32 1) - ret <vscale x 1 x double> %1 + ret target("riscv.vector.tuple", <vscale x 8 x i8>, 7) %0 } - - -define <vscale x 1 x double> @test_vlseg8_nxv1f64_triscv.vector.tuple_nxv8i8_8t(ptr %base, i32 %vl) { +define target("riscv.vector.tuple", <vscale x 8 x i8>, 8) @test_vlseg8_nxv1f64_triscv.vector.tuple_nxv8i8_8t(ptr %base, i32 %vl) { ; CHECK-LABEL: test_vlseg8_nxv1f64_triscv.vector.tuple_nxv8i8_8t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e64, m1, ta, ma -; CHECK-NEXT: vlseg8e64.v v7, (a0) +; CHECK-NEXT: vlseg8e64.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 8) @llvm.riscv.vlseg8.triscv.vector.tuple_nxv8i8_8t(target("riscv.vector.tuple", <vscale x 8 x i8>, 8) undef, ptr %base, i32 %vl, i32 6) - %1 = call <vscale x 1 x double> @llvm.riscv.tuple.extract.nxv1f64.triscv.vector.tuple_nxv8i8_8t(target("riscv.vector.tuple", <vscale x 8 x i8>, 8) %0, i32 1) - ret <vscale x 1 x double> %1 + ret target("riscv.vector.tuple", <vscale x 8 x i8>, 8) %0 } - -define <vscale x 1 x double> @test_vlseg8_mask_nxv1f64_triscv.vector.tuple_nxv8i8_8t(ptr %base, i32 %vl, <vscale x 1 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 8 x i8>, 8) @test_vlseg8_mask_nxv1f64_triscv.vector.tuple_nxv8i8_8t(ptr %base, i32 %vl, <vscale x 1 x i1> %mask) { ; CHECK-LABEL: test_vlseg8_mask_nxv1f64_triscv.vector.tuple_nxv8i8_8t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e64, m1, ta, ma -; CHECK-NEXT: vlseg8e64.v v7, (a0), v0.t +; CHECK-NEXT: vlseg8e64.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 8) @llvm.riscv.vlseg8.mask.triscv.vector.tuple_nxv8i8_8t.nxv1i1(target("riscv.vector.tuple", <vscale x 8 x i8>, 8) undef, ptr %base, <vscale x 1 x i1> %mask, i32 %vl, i32 1, i32 6) - %1 = call <vscale x 1 x double> @llvm.riscv.tuple.extract.nxv1f64.triscv.vector.tuple_nxv8i8_8t(target("riscv.vector.tuple", <vscale x 8 x i8>, 8) %0, i32 1) - ret <vscale x 1 x double> %1 + ret target("riscv.vector.tuple", <vscale x 8 x i8>, 8) %0 } - - -define <vscale x 1 x bfloat> @test_vlseg2_nxv1bf16_triscv.vector.tuple_nxv2i8_2t(ptr %base, i32 %vl) { +define target("riscv.vector.tuple", <vscale x 2 x i8>, 2) @test_vlseg2_nxv1bf16_triscv.vector.tuple_nxv2i8_2t(ptr %base, i32 %vl) { ; CHECK-LABEL: test_vlseg2_nxv1bf16_triscv.vector.tuple_nxv2i8_2t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, ma -; CHECK-NEXT: vlseg2e16.v v7, (a0) +; CHECK-NEXT: vlseg2e16.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 2 x i8>, 2) @llvm.riscv.vlseg2.triscv.vector.tuple_nxv2i8_2t(target("riscv.vector.tuple", <vscale x 2 x i8>, 2) undef, ptr %base, i32 %vl, i32 4) - %1 = call <vscale x 1 x bfloat> @llvm.riscv.tuple.extract.nxv1bf16.triscv.vector.tuple_nxv2i8_2t(target("riscv.vector.tuple", <vscale x 2 x i8>, 2) %0, i32 1) - ret <vscale x 1 x bfloat> %1 + ret target("riscv.vector.tuple", <vscale x 2 x i8>, 2) %0 } - -define <vscale x 1 x bfloat> @test_vlseg2_mask_nxv1bf16_triscv.vector.tuple_nxv2i8_2t(ptr %base, i32 %vl, <vscale x 1 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 2 x i8>, 2) @test_vlseg2_mask_nxv1bf16_triscv.vector.tuple_nxv2i8_2t(ptr %base, i32 %vl, <vscale x 1 x i1> %mask) { ; CHECK-LABEL: test_vlseg2_mask_nxv1bf16_triscv.vector.tuple_nxv2i8_2t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, ma -; CHECK-NEXT: vlseg2e16.v v7, (a0), v0.t +; CHECK-NEXT: vlseg2e16.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 2 x i8>, 2) @llvm.riscv.vlseg2.mask.triscv.vector.tuple_nxv2i8_2t.nxv1i1(target("riscv.vector.tuple", <vscale x 2 x i8>, 2) undef, ptr %base, <vscale x 1 x i1> %mask, i32 %vl, i32 1, i32 4) - %1 = call <vscale x 1 x bfloat> @llvm.riscv.tuple.extract.nxv1bf16.triscv.vector.tuple_nxv2i8_2t(target("riscv.vector.tuple", <vscale x 2 x i8>, 2) %0, i32 1) - ret <vscale x 1 x bfloat> %1 + ret target("riscv.vector.tuple", <vscale x 2 x i8>, 2) %0 } - - -define <vscale x 2 x bfloat> @test_vlseg2_nxv2bf16_triscv.vector.tuple_nxv4i8_2t(ptr %base, i32 %vl) { +define target("riscv.vector.tuple", <vscale x 4 x i8>, 2) @test_vlseg2_nxv2bf16_triscv.vector.tuple_nxv4i8_2t(ptr %base, i32 %vl) { ; CHECK-LABEL: test_vlseg2_nxv2bf16_triscv.vector.tuple_nxv4i8_2t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, ma -; CHECK-NEXT: vlseg2e16.v v7, (a0) +; CHECK-NEXT: vlseg2e16.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 2) @llvm.riscv.vlseg2.triscv.vector.tuple_nxv4i8_2t(target("riscv.vector.tuple", <vscale x 4 x i8>, 2) undef, ptr %base, i32 %vl, i32 4) - %1 = call <vscale x 2 x bfloat> @llvm.riscv.tuple.extract.nxv2bf16.triscv.vector.tuple_nxv4i8_2t(target("riscv.vector.tuple", <vscale x 4 x i8>, 2) %0, i32 1) - ret <vscale x 2 x bfloat> %1 + ret target("riscv.vector.tuple", <vscale x 4 x i8>, 2) %0 } - -define <vscale x 2 x bfloat> @test_vlseg2_mask_nxv2bf16_triscv.vector.tuple_nxv4i8_2t(ptr %base, i32 %vl, <vscale x 2 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 4 x i8>, 2) @test_vlseg2_mask_nxv2bf16_triscv.vector.tuple_nxv4i8_2t(ptr %base, i32 %vl, <vscale x 2 x i1> %mask) { ; CHECK-LABEL: test_vlseg2_mask_nxv2bf16_triscv.vector.tuple_nxv4i8_2t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, ma -; CHECK-NEXT: vlseg2e16.v v7, (a0), v0.t +; CHECK-NEXT: vlseg2e16.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 2) @llvm.riscv.vlseg2.mask.triscv.vector.tuple_nxv4i8_2t.nxv2i1(target("riscv.vector.tuple", <vscale x 4 x i8>, 2) undef, ptr %base, <vscale x 2 x i1> %mask, i32 %vl, i32 1, i32 4) - %1 = call <vscale x 2 x bfloat> @llvm.riscv.tuple.extract.nxv2bf16.triscv.vector.tuple_nxv4i8_2t(target("riscv.vector.tuple", <vscale x 4 x i8>, 2) %0, i32 1) - ret <vscale x 2 x bfloat> %1 + ret target("riscv.vector.tuple", <vscale x 4 x i8>, 2) %0 } - - -define <vscale x 4 x bfloat> @test_vlseg2_nxv4bf16_triscv.vector.tuple_nxv8i8_2t(ptr %base, i32 %vl) { +define target("riscv.vector.tuple", <vscale x 8 x i8>, 2) @test_vlseg2_nxv4bf16_triscv.vector.tuple_nxv8i8_2t(ptr %base, i32 %vl) { ; CHECK-LABEL: test_vlseg2_nxv4bf16_triscv.vector.tuple_nxv8i8_2t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma -; CHECK-NEXT: vlseg2e16.v v7, (a0) +; CHECK-NEXT: vlseg2e16.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 2) @llvm.riscv.vlseg2.triscv.vector.tuple_nxv8i8_2t(target("riscv.vector.tuple", <vscale x 8 x i8>, 2) undef, ptr %base, i32 %vl, i32 4) - %1 = call <vscale x 4 x bfloat> @llvm.riscv.tuple.extract.nxv4bf16.triscv.vector.tuple_nxv8i8_2t(target("riscv.vector.tuple", <vscale x 8 x i8>, 2) %0, i32 1) - ret <vscale x 4 x bfloat> %1 + ret target("riscv.vector.tuple", <vscale x 8 x i8>, 2) %0 } - -define <vscale x 4 x bfloat> @test_vlseg2_mask_nxv4bf16_triscv.vector.tuple_nxv8i8_2t(ptr %base, i32 %vl, <vscale x 4 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 8 x i8>, 2) @test_vlseg2_mask_nxv4bf16_triscv.vector.tuple_nxv8i8_2t(ptr %base, i32 %vl, <vscale x 4 x i1> %mask) { ; CHECK-LABEL: test_vlseg2_mask_nxv4bf16_triscv.vector.tuple_nxv8i8_2t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma -; CHECK-NEXT: vlseg2e16.v v7, (a0), v0.t +; CHECK-NEXT: vlseg2e16.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 2) @llvm.riscv.vlseg2.mask.triscv.vector.tuple_nxv8i8_2t.nxv4i1(target("riscv.vector.tuple", <vscale x 8 x i8>, 2) undef, ptr %base, <vscale x 4 x i1> %mask, i32 %vl, i32 1, i32 4) - %1 = call <vscale x 4 x bfloat> @llvm.riscv.tuple.extract.nxv4bf16.triscv.vector.tuple_nxv8i8_2t(target("riscv.vector.tuple", <vscale x 8 x i8>, 2) %0, i32 1) - ret <vscale x 4 x bfloat> %1 + ret target("riscv.vector.tuple", <vscale x 8 x i8>, 2) %0 } - - -define <vscale x 8 x bfloat> @test_vlseg2_nxv8bf16_triscv.vector.tuple_nxv16i8_2t(ptr %base, i32 %vl) { +define target("riscv.vector.tuple", <vscale x 16 x i8>, 2) @test_vlseg2_nxv8bf16_triscv.vector.tuple_nxv16i8_2t(ptr %base, i32 %vl) { ; CHECK-LABEL: test_vlseg2_nxv8bf16_triscv.vector.tuple_nxv16i8_2t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, ma -; CHECK-NEXT: vlseg2e16.v v6, (a0) +; CHECK-NEXT: vlseg2e16.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 2) @llvm.riscv.vlseg2.triscv.vector.tuple_nxv16i8_2t(target("riscv.vector.tuple", <vscale x 16 x i8>, 2) undef, ptr %base, i32 %vl, i32 4) - %1 = call <vscale x 8 x bfloat> @llvm.riscv.tuple.extract.nxv8bf16.triscv.vector.tuple_nxv16i8_2t(target("riscv.vector.tuple", <vscale x 16 x i8>, 2) %0, i32 1) - ret <vscale x 8 x bfloat> %1 + ret target("riscv.vector.tuple", <vscale x 16 x i8>, 2) %0 } - -define <vscale x 8 x bfloat> @test_vlseg2_mask_nxv8bf16_triscv.vector.tuple_nxv16i8_2t(ptr %base, i32 %vl, <vscale x 8 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 16 x i8>, 2) @test_vlseg2_mask_nxv8bf16_triscv.vector.tuple_nxv16i8_2t(ptr %base, i32 %vl, <vscale x 8 x i1> %mask) { ; CHECK-LABEL: test_vlseg2_mask_nxv8bf16_triscv.vector.tuple_nxv16i8_2t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, ma -; CHECK-NEXT: vlseg2e16.v v6, (a0), v0.t +; CHECK-NEXT: vlseg2e16.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 2) @llvm.riscv.vlseg2.mask.triscv.vector.tuple_nxv16i8_2t.nxv8i1(target("riscv.vector.tuple", <vscale x 16 x i8>, 2) undef, ptr %base, <vscale x 8 x i1> %mask, i32 %vl, i32 1, i32 4) - %1 = call <vscale x 8 x bfloat> @llvm.riscv.tuple.extract.nxv8bf16.triscv.vector.tuple_nxv16i8_2t(target("riscv.vector.tuple", <vscale x 16 x i8>, 2) %0, i32 1) - ret <vscale x 8 x bfloat> %1 + ret target("riscv.vector.tuple", <vscale x 16 x i8>, 2) %0 } - - -define <vscale x 16 x bfloat> @test_vlseg2_nxv16bf16_triscv.vector.tuple_nxv32i8_2t(ptr %base, i32 %vl) { +define target("riscv.vector.tuple", <vscale x 32 x i8>, 2) @test_vlseg2_nxv16bf16_triscv.vector.tuple_nxv32i8_2t(ptr %base, i32 %vl) { ; CHECK-LABEL: test_vlseg2_nxv16bf16_triscv.vector.tuple_nxv32i8_2t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, ma -; CHECK-NEXT: vlseg2e16.v v4, (a0) +; CHECK-NEXT: vlseg2e16.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 32 x i8>, 2) @llvm.riscv.vlseg2.triscv.vector.tuple_nxv32i8_2t(target("riscv.vector.tuple", <vscale x 32 x i8>, 2) undef, ptr %base, i32 %vl, i32 4) - %1 = call <vscale x 16 x bfloat> @llvm.riscv.tuple.extract.nxv16bf16.triscv.vector.tuple_nxv32i8_2t(target("riscv.vector.tuple", <vscale x 32 x i8>, 2) %0, i32 1) - ret <vscale x 16 x bfloat> %1 + ret target("riscv.vector.tuple", <vscale x 32 x i8>, 2) %0 } - -define <vscale x 16 x bfloat> @test_vlseg2_mask_nxv16bf16_triscv.vector.tuple_nxv32i8_2t(ptr %base, i32 %vl, <vscale x 16 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 32 x i8>, 2) @test_vlseg2_mask_nxv16bf16_triscv.vector.tuple_nxv32i8_2t(ptr %base, i32 %vl, <vscale x 16 x i1> %mask) { ; CHECK-LABEL: test_vlseg2_mask_nxv16bf16_triscv.vector.tuple_nxv32i8_2t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, ma -; CHECK-NEXT: vlseg2e16.v v4, (a0), v0.t +; CHECK-NEXT: vlseg2e16.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 32 x i8>, 2) @llvm.riscv.vlseg2.mask.triscv.vector.tuple_nxv32i8_2t.nxv16i1(target("riscv.vector.tuple", <vscale x 32 x i8>, 2) undef, ptr %base, <vscale x 16 x i1> %mask, i32 %vl, i32 1, i32 4) - %1 = call <vscale x 16 x bfloat> @llvm.riscv.tuple.extract.nxv16bf16.triscv.vector.tuple_nxv32i8_2t(target("riscv.vector.tuple", <vscale x 32 x i8>, 2) %0, i32 1) - ret <vscale x 16 x bfloat> %1 + ret target("riscv.vector.tuple", <vscale x 32 x i8>, 2) %0 } - - -define <vscale x 1 x bfloat> @test_vlseg3_nxv1bf16_triscv.vector.tuple_nxv2i8_3t(ptr %base, i32 %vl) { +define target("riscv.vector.tuple", <vscale x 2 x i8>, 3) @test_vlseg3_nxv1bf16_triscv.vector.tuple_nxv2i8_3t(ptr %base, i32 %vl) { ; CHECK-LABEL: test_vlseg3_nxv1bf16_triscv.vector.tuple_nxv2i8_3t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, ma -; CHECK-NEXT: vlseg3e16.v v7, (a0) +; CHECK-NEXT: vlseg3e16.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 2 x i8>, 3) @llvm.riscv.vlseg3.triscv.vector.tuple_nxv2i8_3t(target("riscv.vector.tuple", <vscale x 2 x i8>, 3) undef, ptr %base, i32 %vl, i32 4) - %1 = call <vscale x 1 x bfloat> @llvm.riscv.tuple.extract.nxv1bf16.triscv.vector.tuple_nxv2i8_3t(target("riscv.vector.tuple", <vscale x 2 x i8>, 3) %0, i32 1) - ret <vscale x 1 x bfloat> %1 + ret target("riscv.vector.tuple", <vscale x 2 x i8>, 3) %0 } - -define <vscale x 1 x bfloat> @test_vlseg3_mask_nxv1bf16_triscv.vector.tuple_nxv2i8_3t(ptr %base, i32 %vl, <vscale x 1 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 2 x i8>, 3) @test_vlseg3_mask_nxv1bf16_triscv.vector.tuple_nxv2i8_3t(ptr %base, i32 %vl, <vscale x 1 x i1> %mask) { ; CHECK-LABEL: test_vlseg3_mask_nxv1bf16_triscv.vector.tuple_nxv2i8_3t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, ma -; CHECK-NEXT: vlseg3e16.v v7, (a0), v0.t +; CHECK-NEXT: vlseg3e16.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 2 x i8>, 3) @llvm.riscv.vlseg3.mask.triscv.vector.tuple_nxv2i8_3t.nxv1i1(target("riscv.vector.tuple", <vscale x 2 x i8>, 3) undef, ptr %base, <vscale x 1 x i1> %mask, i32 %vl, i32 1, i32 4) - %1 = call <vscale x 1 x bfloat> @llvm.riscv.tuple.extract.nxv1bf16.triscv.vector.tuple_nxv2i8_3t(target("riscv.vector.tuple", <vscale x 2 x i8>, 3) %0, i32 1) - ret <vscale x 1 x bfloat> %1 + ret target("riscv.vector.tuple", <vscale x 2 x i8>, 3) %0 } - - -define <vscale x 2 x bfloat> @test_vlseg3_nxv2bf16_triscv.vector.tuple_nxv4i8_3t(ptr %base, i32 %vl) { +define target("riscv.vector.tuple", <vscale x 4 x i8>, 3) @test_vlseg3_nxv2bf16_triscv.vector.tuple_nxv4i8_3t(ptr %base, i32 %vl) { ; CHECK-LABEL: test_vlseg3_nxv2bf16_triscv.vector.tuple_nxv4i8_3t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, ma -; CHECK-NEXT: vlseg3e16.v v7, (a0) +; CHECK-NEXT: vlseg3e16.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 3) @llvm.riscv.vlseg3.triscv.vector.tuple_nxv4i8_3t(target("riscv.vector.tuple", <vscale x 4 x i8>, 3) undef, ptr %base, i32 %vl, i32 4) - %1 = call <vscale x 2 x bfloat> @llvm.riscv.tuple.extract.nxv2bf16.triscv.vector.tuple_nxv4i8_3t(target("riscv.vector.tuple", <vscale x 4 x i8>, 3) %0, i32 1) - ret <vscale x 2 x bfloat> %1 + ret target("riscv.vector.tuple", <vscale x 4 x i8>, 3) %0 } - -define <vscale x 2 x bfloat> @test_vlseg3_mask_nxv2bf16_triscv.vector.tuple_nxv4i8_3t(ptr %base, i32 %vl, <vscale x 2 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 4 x i8>, 3) @test_vlseg3_mask_nxv2bf16_triscv.vector.tuple_nxv4i8_3t(ptr %base, i32 %vl, <vscale x 2 x i1> %mask) { ; CHECK-LABEL: test_vlseg3_mask_nxv2bf16_triscv.vector.tuple_nxv4i8_3t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, ma -; CHECK-NEXT: vlseg3e16.v v7, (a0), v0.t +; CHECK-NEXT: vlseg3e16.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 3) @llvm.riscv.vlseg3.mask.triscv.vector.tuple_nxv4i8_3t.nxv2i1(target("riscv.vector.tuple", <vscale x 4 x i8>, 3) undef, ptr %base, <vscale x 2 x i1> %mask, i32 %vl, i32 1, i32 4) - %1 = call <vscale x 2 x bfloat> @llvm.riscv.tuple.extract.nxv2bf16.triscv.vector.tuple_nxv4i8_3t(target("riscv.vector.tuple", <vscale x 4 x i8>, 3) %0, i32 1) - ret <vscale x 2 x bfloat> %1 + ret target("riscv.vector.tuple", <vscale x 4 x i8>, 3) %0 } - - -define <vscale x 4 x bfloat> @test_vlseg3_nxv4bf16_triscv.vector.tuple_nxv8i8_3t(ptr %base, i32 %vl) { +define target("riscv.vector.tuple", <vscale x 8 x i8>, 3) @test_vlseg3_nxv4bf16_triscv.vector.tuple_nxv8i8_3t(ptr %base, i32 %vl) { ; CHECK-LABEL: test_vlseg3_nxv4bf16_triscv.vector.tuple_nxv8i8_3t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma -; CHECK-NEXT: vlseg3e16.v v7, (a0) +; CHECK-NEXT: vlseg3e16.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 3) @llvm.riscv.vlseg3.triscv.vector.tuple_nxv8i8_3t(target("riscv.vector.tuple", <vscale x 8 x i8>, 3) undef, ptr %base, i32 %vl, i32 4) - %1 = call <vscale x 4 x bfloat> @llvm.riscv.tuple.extract.nxv4bf16.triscv.vector.tuple_nxv8i8_3t(target("riscv.vector.tuple", <vscale x 8 x i8>, 3) %0, i32 1) - ret <vscale x 4 x bfloat> %1 + ret target("riscv.vector.tuple", <vscale x 8 x i8>, 3) %0 } - -define <vscale x 4 x bfloat> @test_vlseg3_mask_nxv4bf16_triscv.vector.tuple_nxv8i8_3t(ptr %base, i32 %vl, <vscale x 4 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 8 x i8>, 3) @test_vlseg3_mask_nxv4bf16_triscv.vector.tuple_nxv8i8_3t(ptr %base, i32 %vl, <vscale x 4 x i1> %mask) { ; CHECK-LABEL: test_vlseg3_mask_nxv4bf16_triscv.vector.tuple_nxv8i8_3t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma -; CHECK-NEXT: vlseg3e16.v v7, (a0), v0.t +; CHECK-NEXT: vlseg3e16.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 3) @llvm.riscv.vlseg3.mask.triscv.vector.tuple_nxv8i8_3t.nxv4i1(target("riscv.vector.tuple", <vscale x 8 x i8>, 3) undef, ptr %base, <vscale x 4 x i1> %mask, i32 %vl, i32 1, i32 4) - %1 = call <vscale x 4 x bfloat> @llvm.riscv.tuple.extract.nxv4bf16.triscv.vector.tuple_nxv8i8_3t(target("riscv.vector.tuple", <vscale x 8 x i8>, 3) %0, i32 1) - ret <vscale x 4 x bfloat> %1 + ret target("riscv.vector.tuple", <vscale x 8 x i8>, 3) %0 } - - -define <vscale x 8 x bfloat> @test_vlseg3_nxv8bf16_triscv.vector.tuple_nxv16i8_3t(ptr %base, i32 %vl) { +define target("riscv.vector.tuple", <vscale x 16 x i8>, 3) @test_vlseg3_nxv8bf16_triscv.vector.tuple_nxv16i8_3t(ptr %base, i32 %vl) { ; CHECK-LABEL: test_vlseg3_nxv8bf16_triscv.vector.tuple_nxv16i8_3t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, ma -; CHECK-NEXT: vlseg3e16.v v6, (a0) +; CHECK-NEXT: vlseg3e16.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 3) @llvm.riscv.vlseg3.triscv.vector.tuple_nxv16i8_3t(target("riscv.vector.tuple", <vscale x 16 x i8>, 3) undef, ptr %base, i32 %vl, i32 4) - %1 = call <vscale x 8 x bfloat> @llvm.riscv.tuple.extract.nxv8bf16.triscv.vector.tuple_nxv16i8_3t(target("riscv.vector.tuple", <vscale x 16 x i8>, 3) %0, i32 1) - ret <vscale x 8 x bfloat> %1 + ret target("riscv.vector.tuple", <vscale x 16 x i8>, 3) %0 } - -define <vscale x 8 x bfloat> @test_vlseg3_mask_nxv8bf16_triscv.vector.tuple_nxv16i8_3t(ptr %base, i32 %vl, <vscale x 8 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 16 x i8>, 3) @test_vlseg3_mask_nxv8bf16_triscv.vector.tuple_nxv16i8_3t(ptr %base, i32 %vl, <vscale x 8 x i1> %mask) { ; CHECK-LABEL: test_vlseg3_mask_nxv8bf16_triscv.vector.tuple_nxv16i8_3t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, ma -; CHECK-NEXT: vlseg3e16.v v6, (a0), v0.t +; CHECK-NEXT: vlseg3e16.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 3) @llvm.riscv.vlseg3.mask.triscv.vector.tuple_nxv16i8_3t.nxv8i1(target("riscv.vector.tuple", <vscale x 16 x i8>, 3) undef, ptr %base, <vscale x 8 x i1> %mask, i32 %vl, i32 1, i32 4) - %1 = call <vscale x 8 x bfloat> @llvm.riscv.tuple.extract.nxv8bf16.triscv.vector.tuple_nxv16i8_3t(target("riscv.vector.tuple", <vscale x 16 x i8>, 3) %0, i32 1) - ret <vscale x 8 x bfloat> %1 + ret target("riscv.vector.tuple", <vscale x 16 x i8>, 3) %0 } - - -define <vscale x 1 x bfloat> @test_vlseg4_nxv1bf16_triscv.vector.tuple_nxv2i8_4t(ptr %base, i32 %vl) { +define target("riscv.vector.tuple", <vscale x 2 x i8>, 4) @test_vlseg4_nxv1bf16_triscv.vector.tuple_nxv2i8_4t(ptr %base, i32 %vl) { ; CHECK-LABEL: test_vlseg4_nxv1bf16_triscv.vector.tuple_nxv2i8_4t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, ma -; CHECK-NEXT: vlseg4e16.v v7, (a0) +; CHECK-NEXT: vlseg4e16.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 2 x i8>, 4) @llvm.riscv.vlseg4.triscv.vector.tuple_nxv2i8_4t(target("riscv.vector.tuple", <vscale x 2 x i8>, 4) undef, ptr %base, i32 %vl, i32 4) - %1 = call <vscale x 1 x bfloat> @llvm.riscv.tuple.extract.nxv1bf16.triscv.vector.tuple_nxv2i8_4t(target("riscv.vector.tuple", <vscale x 2 x i8>, 4) %0, i32 1) - ret <vscale x 1 x bfloat> %1 + ret target("riscv.vector.tuple", <vscale x 2 x i8>, 4) %0 } - -define <vscale x 1 x bfloat> @test_vlseg4_mask_nxv1bf16_triscv.vector.tuple_nxv2i8_4t(ptr %base, i32 %vl, <vscale x 1 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 2 x i8>, 4) @test_vlseg4_mask_nxv1bf16_triscv.vector.tuple_nxv2i8_4t(ptr %base, i32 %vl, <vscale x 1 x i1> %mask) { ; CHECK-LABEL: test_vlseg4_mask_nxv1bf16_triscv.vector.tuple_nxv2i8_4t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, ma -; CHECK-NEXT: vlseg4e16.v v7, (a0), v0.t +; CHECK-NEXT: vlseg4e16.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 2 x i8>, 4) @llvm.riscv.vlseg4.mask.triscv.vector.tuple_nxv2i8_4t.nxv1i1(target("riscv.vector.tuple", <vscale x 2 x i8>, 4) undef, ptr %base, <vscale x 1 x i1> %mask, i32 %vl, i32 1, i32 4) - %1 = call <vscale x 1 x bfloat> @llvm.riscv.tuple.extract.nxv1bf16.triscv.vector.tuple_nxv2i8_4t(target("riscv.vector.tuple", <vscale x 2 x i8>, 4) %0, i32 1) - ret <vscale x 1 x bfloat> %1 + ret target("riscv.vector.tuple", <vscale x 2 x i8>, 4) %0 } - - -define <vscale x 2 x bfloat> @test_vlseg4_nxv2bf16_triscv.vector.tuple_nxv4i8_4t(ptr %base, i32 %vl) { +define target("riscv.vector.tuple", <vscale x 4 x i8>, 4) @test_vlseg4_nxv2bf16_triscv.vector.tuple_nxv4i8_4t(ptr %base, i32 %vl) { ; CHECK-LABEL: test_vlseg4_nxv2bf16_triscv.vector.tuple_nxv4i8_4t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, ma -; CHECK-NEXT: vlseg4e16.v v7, (a0) +; CHECK-NEXT: vlseg4e16.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 4) @llvm.riscv.vlseg4.triscv.vector.tuple_nxv4i8_4t(target("riscv.vector.tuple", <vscale x 4 x i8>, 4) undef, ptr %base, i32 %vl, i32 4) - %1 = call <vscale x 2 x bfloat> @llvm.riscv.tuple.extract.nxv2bf16.triscv.vector.tuple_nxv4i8_4t(target("riscv.vector.tuple", <vscale x 4 x i8>, 4) %0, i32 1) - ret <vscale x 2 x bfloat> %1 + ret target("riscv.vector.tuple", <vscale x 4 x i8>, 4) %0 } - -define <vscale x 2 x bfloat> @test_vlseg4_mask_nxv2bf16_triscv.vector.tuple_nxv4i8_4t(ptr %base, i32 %vl, <vscale x 2 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 4 x i8>, 4) @test_vlseg4_mask_nxv2bf16_triscv.vector.tuple_nxv4i8_4t(ptr %base, i32 %vl, <vscale x 2 x i1> %mask) { ; CHECK-LABEL: test_vlseg4_mask_nxv2bf16_triscv.vector.tuple_nxv4i8_4t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, ma -; CHECK-NEXT: vlseg4e16.v v7, (a0), v0.t +; CHECK-NEXT: vlseg4e16.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 4) @llvm.riscv.vlseg4.mask.triscv.vector.tuple_nxv4i8_4t.nxv2i1(target("riscv.vector.tuple", <vscale x 4 x i8>, 4) undef, ptr %base, <vscale x 2 x i1> %mask, i32 %vl, i32 1, i32 4) - %1 = call <vscale x 2 x bfloat> @llvm.riscv.tuple.extract.nxv2bf16.triscv.vector.tuple_nxv4i8_4t(target("riscv.vector.tuple", <vscale x 4 x i8>, 4) %0, i32 1) - ret <vscale x 2 x bfloat> %1 + ret target("riscv.vector.tuple", <vscale x 4 x i8>, 4) %0 } - - -define <vscale x 4 x bfloat> @test_vlseg4_nxv4bf16_triscv.vector.tuple_nxv8i8_4t(ptr %base, i32 %vl) { +define target("riscv.vector.tuple", <vscale x 8 x i8>, 4) @test_vlseg4_nxv4bf16_triscv.vector.tuple_nxv8i8_4t(ptr %base, i32 %vl) { ; CHECK-LABEL: test_vlseg4_nxv4bf16_triscv.vector.tuple_nxv8i8_4t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma -; CHECK-NEXT: vlseg4e16.v v7, (a0) +; CHECK-NEXT: vlseg4e16.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 4) @llvm.riscv.vlseg4.triscv.vector.tuple_nxv8i8_4t(target("riscv.vector.tuple", <vscale x 8 x i8>, 4) undef, ptr %base, i32 %vl, i32 4) - %1 = call <vscale x 4 x bfloat> @llvm.riscv.tuple.extract.nxv4bf16.triscv.vector.tuple_nxv8i8_4t(target("riscv.vector.tuple", <vscale x 8 x i8>, 4) %0, i32 1) - ret <vscale x 4 x bfloat> %1 + ret target("riscv.vector.tuple", <vscale x 8 x i8>, 4) %0 } - -define <vscale x 4 x bfloat> @test_vlseg4_mask_nxv4bf16_triscv.vector.tuple_nxv8i8_4t(ptr %base, i32 %vl, <vscale x 4 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 8 x i8>, 4) @test_vlseg4_mask_nxv4bf16_triscv.vector.tuple_nxv8i8_4t(ptr %base, i32 %vl, <vscale x 4 x i1> %mask) { ; CHECK-LABEL: test_vlseg4_mask_nxv4bf16_triscv.vector.tuple_nxv8i8_4t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma -; CHECK-NEXT: vlseg4e16.v v7, (a0), v0.t +; CHECK-NEXT: vlseg4e16.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 4) @llvm.riscv.vlseg4.mask.triscv.vector.tuple_nxv8i8_4t.nxv4i1(target("riscv.vector.tuple", <vscale x 8 x i8>, 4) undef, ptr %base, <vscale x 4 x i1> %mask, i32 %vl, i32 1, i32 4) - %1 = call <vscale x 4 x bfloat> @llvm.riscv.tuple.extract.nxv4bf16.triscv.vector.tuple_nxv8i8_4t(target("riscv.vector.tuple", <vscale x 8 x i8>, 4) %0, i32 1) - ret <vscale x 4 x bfloat> %1 + ret target("riscv.vector.tuple", <vscale x 8 x i8>, 4) %0 } - - -define <vscale x 8 x bfloat> @test_vlseg4_nxv8bf16_triscv.vector.tuple_nxv16i8_4t(ptr %base, i32 %vl) { +define target("riscv.vector.tuple", <vscale x 16 x i8>, 4) @test_vlseg4_nxv8bf16_triscv.vector.tuple_nxv16i8_4t(ptr %base, i32 %vl) { ; CHECK-LABEL: test_vlseg4_nxv8bf16_triscv.vector.tuple_nxv16i8_4t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, ma -; CHECK-NEXT: vlseg4e16.v v6, (a0) +; CHECK-NEXT: vlseg4e16.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 4) @llvm.riscv.vlseg4.triscv.vector.tuple_nxv16i8_4t(target("riscv.vector.tuple", <vscale x 16 x i8>, 4) undef, ptr %base, i32 %vl, i32 4) - %1 = call <vscale x 8 x bfloat> @llvm.riscv.tuple.extract.nxv8bf16.triscv.vector.tuple_nxv16i8_4t(target("riscv.vector.tuple", <vscale x 16 x i8>, 4) %0, i32 1) - ret <vscale x 8 x bfloat> %1 + ret target("riscv.vector.tuple", <vscale x 16 x i8>, 4) %0 } - -define <vscale x 8 x bfloat> @test_vlseg4_mask_nxv8bf16_triscv.vector.tuple_nxv16i8_4t(ptr %base, i32 %vl, <vscale x 8 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 16 x i8>, 4) @test_vlseg4_mask_nxv8bf16_triscv.vector.tuple_nxv16i8_4t(ptr %base, i32 %vl, <vscale x 8 x i1> %mask) { ; CHECK-LABEL: test_vlseg4_mask_nxv8bf16_triscv.vector.tuple_nxv16i8_4t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, ma -; CHECK-NEXT: vlseg4e16.v v6, (a0), v0.t +; CHECK-NEXT: vlseg4e16.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 4) @llvm.riscv.vlseg4.mask.triscv.vector.tuple_nxv16i8_4t.nxv8i1(target("riscv.vector.tuple", <vscale x 16 x i8>, 4) undef, ptr %base, <vscale x 8 x i1> %mask, i32 %vl, i32 1, i32 4) - %1 = call <vscale x 8 x bfloat> @llvm.riscv.tuple.extract.nxv8bf16.triscv.vector.tuple_nxv16i8_4t(target("riscv.vector.tuple", <vscale x 16 x i8>, 4) %0, i32 1) - ret <vscale x 8 x bfloat> %1 + ret target("riscv.vector.tuple", <vscale x 16 x i8>, 4) %0 } - - -define <vscale x 1 x bfloat> @test_vlseg5_nxv1bf16_triscv.vector.tuple_nxv2i8_5t(ptr %base, i32 %vl) { +define target("riscv.vector.tuple", <vscale x 2 x i8>, 5) @test_vlseg5_nxv1bf16_triscv.vector.tuple_nxv2i8_5t(ptr %base, i32 %vl) { ; CHECK-LABEL: test_vlseg5_nxv1bf16_triscv.vector.tuple_nxv2i8_5t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, ma -; CHECK-NEXT: vlseg5e16.v v7, (a0) +; CHECK-NEXT: vlseg5e16.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 2 x i8>, 5) @llvm.riscv.vlseg5.triscv.vector.tuple_nxv2i8_5t(target("riscv.vector.tuple", <vscale x 2 x i8>, 5) undef, ptr %base, i32 %vl, i32 4) - %1 = call <vscale x 1 x bfloat> @llvm.riscv.tuple.extract.nxv1bf16.triscv.vector.tuple_nxv2i8_5t(target("riscv.vector.tuple", <vscale x 2 x i8>, 5) %0, i32 1) - ret <vscale x 1 x bfloat> %1 + ret target("riscv.vector.tuple", <vscale x 2 x i8>, 5) %0 } - -define <vscale x 1 x bfloat> @test_vlseg5_mask_nxv1bf16_triscv.vector.tuple_nxv2i8_5t(ptr %base, i32 %vl, <vscale x 1 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 2 x i8>, 5) @test_vlseg5_mask_nxv1bf16_triscv.vector.tuple_nxv2i8_5t(ptr %base, i32 %vl, <vscale x 1 x i1> %mask) { ; CHECK-LABEL: test_vlseg5_mask_nxv1bf16_triscv.vector.tuple_nxv2i8_5t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, ma -; CHECK-NEXT: vlseg5e16.v v7, (a0), v0.t +; CHECK-NEXT: vlseg5e16.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 2 x i8>, 5) @llvm.riscv.vlseg5.mask.triscv.vector.tuple_nxv2i8_5t.nxv1i1(target("riscv.vector.tuple", <vscale x 2 x i8>, 5) undef, ptr %base, <vscale x 1 x i1> %mask, i32 %vl, i32 1, i32 4) - %1 = call <vscale x 1 x bfloat> @llvm.riscv.tuple.extract.nxv1bf16.triscv.vector.tuple_nxv2i8_5t(target("riscv.vector.tuple", <vscale x 2 x i8>, 5) %0, i32 1) - ret <vscale x 1 x bfloat> %1 + ret target("riscv.vector.tuple", <vscale x 2 x i8>, 5) %0 } - - -define <vscale x 2 x bfloat> @test_vlseg5_nxv2bf16_triscv.vector.tuple_nxv4i8_5t(ptr %base, i32 %vl) { +define target("riscv.vector.tuple", <vscale x 4 x i8>, 5) @test_vlseg5_nxv2bf16_triscv.vector.tuple_nxv4i8_5t(ptr %base, i32 %vl) { ; CHECK-LABEL: test_vlseg5_nxv2bf16_triscv.vector.tuple_nxv4i8_5t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, ma -; CHECK-NEXT: vlseg5e16.v v7, (a0) +; CHECK-NEXT: vlseg5e16.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 5) @llvm.riscv.vlseg5.triscv.vector.tuple_nxv4i8_5t(target("riscv.vector.tuple", <vscale x 4 x i8>, 5) undef, ptr %base, i32 %vl, i32 4) - %1 = call <vscale x 2 x bfloat> @llvm.riscv.tuple.extract.nxv2bf16.triscv.vector.tuple_nxv4i8_5t(target("riscv.vector.tuple", <vscale x 4 x i8>, 5) %0, i32 1) - ret <vscale x 2 x bfloat> %1 + ret target("riscv.vector.tuple", <vscale x 4 x i8>, 5) %0 } - -define <vscale x 2 x bfloat> @test_vlseg5_mask_nxv2bf16_triscv.vector.tuple_nxv4i8_5t(ptr %base, i32 %vl, <vscale x 2 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 4 x i8>, 5) @test_vlseg5_mask_nxv2bf16_triscv.vector.tuple_nxv4i8_5t(ptr %base, i32 %vl, <vscale x 2 x i1> %mask) { ; CHECK-LABEL: test_vlseg5_mask_nxv2bf16_triscv.vector.tuple_nxv4i8_5t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, ma -; CHECK-NEXT: vlseg5e16.v v7, (a0), v0.t +; CHECK-NEXT: vlseg5e16.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 5) @llvm.riscv.vlseg5.mask.triscv.vector.tuple_nxv4i8_5t.nxv2i1(target("riscv.vector.tuple", <vscale x 4 x i8>, 5) undef, ptr %base, <vscale x 2 x i1> %mask, i32 %vl, i32 1, i32 4) - %1 = call <vscale x 2 x bfloat> @llvm.riscv.tuple.extract.nxv2bf16.triscv.vector.tuple_nxv4i8_5t(target("riscv.vector.tuple", <vscale x 4 x i8>, 5) %0, i32 1) - ret <vscale x 2 x bfloat> %1 + ret target("riscv.vector.tuple", <vscale x 4 x i8>, 5) %0 } - - -define <vscale x 4 x bfloat> @test_vlseg5_nxv4bf16_triscv.vector.tuple_nxv8i8_5t(ptr %base, i32 %vl) { +define target("riscv.vector.tuple", <vscale x 8 x i8>, 5) @test_vlseg5_nxv4bf16_triscv.vector.tuple_nxv8i8_5t(ptr %base, i32 %vl) { ; CHECK-LABEL: test_vlseg5_nxv4bf16_triscv.vector.tuple_nxv8i8_5t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma -; CHECK-NEXT: vlseg5e16.v v7, (a0) +; CHECK-NEXT: vlseg5e16.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 5) @llvm.riscv.vlseg5.triscv.vector.tuple_nxv8i8_5t(target("riscv.vector.tuple", <vscale x 8 x i8>, 5) undef, ptr %base, i32 %vl, i32 4) - %1 = call <vscale x 4 x bfloat> @llvm.riscv.tuple.extract.nxv4bf16.triscv.vector.tuple_nxv8i8_5t(target("riscv.vector.tuple", <vscale x 8 x i8>, 5) %0, i32 1) - ret <vscale x 4 x bfloat> %1 + ret target("riscv.vector.tuple", <vscale x 8 x i8>, 5) %0 } - -define <vscale x 4 x bfloat> @test_vlseg5_mask_nxv4bf16_triscv.vector.tuple_nxv8i8_5t(ptr %base, i32 %vl, <vscale x 4 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 8 x i8>, 5) @test_vlseg5_mask_nxv4bf16_triscv.vector.tuple_nxv8i8_5t(ptr %base, i32 %vl, <vscale x 4 x i1> %mask) { ; CHECK-LABEL: test_vlseg5_mask_nxv4bf16_triscv.vector.tuple_nxv8i8_5t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma -; CHECK-NEXT: vlseg5e16.v v7, (a0), v0.t +; CHECK-NEXT: vlseg5e16.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 5) @llvm.riscv.vlseg5.mask.triscv.vector.tuple_nxv8i8_5t.nxv4i1(target("riscv.vector.tuple", <vscale x 8 x i8>, 5) undef, ptr %base, <vscale x 4 x i1> %mask, i32 %vl, i32 1, i32 4) - %1 = call <vscale x 4 x bfloat> @llvm.riscv.tuple.extract.nxv4bf16.triscv.vector.tuple_nxv8i8_5t(target("riscv.vector.tuple", <vscale x 8 x i8>, 5) %0, i32 1) - ret <vscale x 4 x bfloat> %1 + ret target("riscv.vector.tuple", <vscale x 8 x i8>, 5) %0 } - - -define <vscale x 1 x bfloat> @test_vlseg6_nxv1bf16_triscv.vector.tuple_nxv2i8_6t(ptr %base, i32 %vl) { +define target("riscv.vector.tuple", <vscale x 2 x i8>, 6) @test_vlseg6_nxv1bf16_triscv.vector.tuple_nxv2i8_6t(ptr %base, i32 %vl) { ; CHECK-LABEL: test_vlseg6_nxv1bf16_triscv.vector.tuple_nxv2i8_6t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, ma -; CHECK-NEXT: vlseg6e16.v v7, (a0) +; CHECK-NEXT: vlseg6e16.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 2 x i8>, 6) @llvm.riscv.vlseg6.triscv.vector.tuple_nxv2i8_6t(target("riscv.vector.tuple", <vscale x 2 x i8>, 6) undef, ptr %base, i32 %vl, i32 4) - %1 = call <vscale x 1 x bfloat> @llvm.riscv.tuple.extract.nxv1bf16.triscv.vector.tuple_nxv2i8_6t(target("riscv.vector.tuple", <vscale x 2 x i8>, 6) %0, i32 1) - ret <vscale x 1 x bfloat> %1 + ret target("riscv.vector.tuple", <vscale x 2 x i8>, 6) %0 } - -define <vscale x 1 x bfloat> @test_vlseg6_mask_nxv1bf16_triscv.vector.tuple_nxv2i8_6t(ptr %base, i32 %vl, <vscale x 1 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 2 x i8>, 6) @test_vlseg6_mask_nxv1bf16_triscv.vector.tuple_nxv2i8_6t(ptr %base, i32 %vl, <vscale x 1 x i1> %mask) { ; CHECK-LABEL: test_vlseg6_mask_nxv1bf16_triscv.vector.tuple_nxv2i8_6t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, ma -; CHECK-NEXT: vlseg6e16.v v7, (a0), v0.t +; CHECK-NEXT: vlseg6e16.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 2 x i8>, 6) @llvm.riscv.vlseg6.mask.triscv.vector.tuple_nxv2i8_6t.nxv1i1(target("riscv.vector.tuple", <vscale x 2 x i8>, 6) undef, ptr %base, <vscale x 1 x i1> %mask, i32 %vl, i32 1, i32 4) - %1 = call <vscale x 1 x bfloat> @llvm.riscv.tuple.extract.nxv1bf16.triscv.vector.tuple_nxv2i8_6t(target("riscv.vector.tuple", <vscale x 2 x i8>, 6) %0, i32 1) - ret <vscale x 1 x bfloat> %1 + ret target("riscv.vector.tuple", <vscale x 2 x i8>, 6) %0 } - - -define <vscale x 2 x bfloat> @test_vlseg6_nxv2bf16_triscv.vector.tuple_nxv4i8_6t(ptr %base, i32 %vl) { +define target("riscv.vector.tuple", <vscale x 4 x i8>, 6) @test_vlseg6_nxv2bf16_triscv.vector.tuple_nxv4i8_6t(ptr %base, i32 %vl) { ; CHECK-LABEL: test_vlseg6_nxv2bf16_triscv.vector.tuple_nxv4i8_6t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, ma -; CHECK-NEXT: vlseg6e16.v v7, (a0) +; CHECK-NEXT: vlseg6e16.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 6) @llvm.riscv.vlseg6.triscv.vector.tuple_nxv4i8_6t(target("riscv.vector.tuple", <vscale x 4 x i8>, 6) undef, ptr %base, i32 %vl, i32 4) - %1 = call <vscale x 2 x bfloat> @llvm.riscv.tuple.extract.nxv2bf16.triscv.vector.tuple_nxv4i8_6t(target("riscv.vector.tuple", <vscale x 4 x i8>, 6) %0, i32 1) - ret <vscale x 2 x bfloat> %1 + ret target("riscv.vector.tuple", <vscale x 4 x i8>, 6) %0 } - -define <vscale x 2 x bfloat> @test_vlseg6_mask_nxv2bf16_triscv.vector.tuple_nxv4i8_6t(ptr %base, i32 %vl, <vscale x 2 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 4 x i8>, 6) @test_vlseg6_mask_nxv2bf16_triscv.vector.tuple_nxv4i8_6t(ptr %base, i32 %vl, <vscale x 2 x i1> %mask) { ; CHECK-LABEL: test_vlseg6_mask_nxv2bf16_triscv.vector.tuple_nxv4i8_6t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, ma -; CHECK-NEXT: vlseg6e16.v v7, (a0), v0.t +; CHECK-NEXT: vlseg6e16.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 6) @llvm.riscv.vlseg6.mask.triscv.vector.tuple_nxv4i8_6t.nxv2i1(target("riscv.vector.tuple", <vscale x 4 x i8>, 6) undef, ptr %base, <vscale x 2 x i1> %mask, i32 %vl, i32 1, i32 4) - %1 = call <vscale x 2 x bfloat> @llvm.riscv.tuple.extract.nxv2bf16.triscv.vector.tuple_nxv4i8_6t(target("riscv.vector.tuple", <vscale x 4 x i8>, 6) %0, i32 1) - ret <vscale x 2 x bfloat> %1 + ret target("riscv.vector.tuple", <vscale x 4 x i8>, 6) %0 } - - -define <vscale x 4 x bfloat> @test_vlseg6_nxv4bf16_triscv.vector.tuple_nxv8i8_6t(ptr %base, i32 %vl) { +define target("riscv.vector.tuple", <vscale x 8 x i8>, 6) @test_vlseg6_nxv4bf16_triscv.vector.tuple_nxv8i8_6t(ptr %base, i32 %vl) { ; CHECK-LABEL: test_vlseg6_nxv4bf16_triscv.vector.tuple_nxv8i8_6t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma -; CHECK-NEXT: vlseg6e16.v v7, (a0) +; CHECK-NEXT: vlseg6e16.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 6) @llvm.riscv.vlseg6.triscv.vector.tuple_nxv8i8_6t(target("riscv.vector.tuple", <vscale x 8 x i8>, 6) undef, ptr %base, i32 %vl, i32 4) - %1 = call <vscale x 4 x bfloat> @llvm.riscv.tuple.extract.nxv4bf16.triscv.vector.tuple_nxv8i8_6t(target("riscv.vector.tuple", <vscale x 8 x i8>, 6) %0, i32 1) - ret <vscale x 4 x bfloat> %1 + ret target("riscv.vector.tuple", <vscale x 8 x i8>, 6) %0 } - -define <vscale x 4 x bfloat> @test_vlseg6_mask_nxv4bf16_triscv.vector.tuple_nxv8i8_6t(ptr %base, i32 %vl, <vscale x 4 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 8 x i8>, 6) @test_vlseg6_mask_nxv4bf16_triscv.vector.tuple_nxv8i8_6t(ptr %base, i32 %vl, <vscale x 4 x i1> %mask) { ; CHECK-LABEL: test_vlseg6_mask_nxv4bf16_triscv.vector.tuple_nxv8i8_6t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma -; CHECK-NEXT: vlseg6e16.v v7, (a0), v0.t +; CHECK-NEXT: vlseg6e16.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 6) @llvm.riscv.vlseg6.mask.triscv.vector.tuple_nxv8i8_6t.nxv4i1(target("riscv.vector.tuple", <vscale x 8 x i8>, 6) undef, ptr %base, <vscale x 4 x i1> %mask, i32 %vl, i32 1, i32 4) - %1 = call <vscale x 4 x bfloat> @llvm.riscv.tuple.extract.nxv4bf16.triscv.vector.tuple_nxv8i8_6t(target("riscv.vector.tuple", <vscale x 8 x i8>, 6) %0, i32 1) - ret <vscale x 4 x bfloat> %1 + ret target("riscv.vector.tuple", <vscale x 8 x i8>, 6) %0 } - - -define <vscale x 1 x bfloat> @test_vlseg7_nxv1bf16_triscv.vector.tuple_nxv2i8_7t(ptr %base, i32 %vl) { +define target("riscv.vector.tuple", <vscale x 2 x i8>, 7) @test_vlseg7_nxv1bf16_triscv.vector.tuple_nxv2i8_7t(ptr %base, i32 %vl) { ; CHECK-LABEL: test_vlseg7_nxv1bf16_triscv.vector.tuple_nxv2i8_7t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, ma -; CHECK-NEXT: vlseg7e16.v v7, (a0) +; CHECK-NEXT: vlseg7e16.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 2 x i8>, 7) @llvm.riscv.vlseg7.triscv.vector.tuple_nxv2i8_7t(target("riscv.vector.tuple", <vscale x 2 x i8>, 7) undef, ptr %base, i32 %vl, i32 4) - %1 = call <vscale x 1 x bfloat> @llvm.riscv.tuple.extract.nxv1bf16.triscv.vector.tuple_nxv2i8_7t(target("riscv.vector.tuple", <vscale x 2 x i8>, 7) %0, i32 1) - ret <vscale x 1 x bfloat> %1 + ret target("riscv.vector.tuple", <vscale x 2 x i8>, 7) %0 } - -define <vscale x 1 x bfloat> @test_vlseg7_mask_nxv1bf16_triscv.vector.tuple_nxv2i8_7t(ptr %base, i32 %vl, <vscale x 1 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 2 x i8>, 7) @test_vlseg7_mask_nxv1bf16_triscv.vector.tuple_nxv2i8_7t(ptr %base, i32 %vl, <vscale x 1 x i1> %mask) { ; CHECK-LABEL: test_vlseg7_mask_nxv1bf16_triscv.vector.tuple_nxv2i8_7t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, ma -; CHECK-NEXT: vlseg7e16.v v7, (a0), v0.t +; CHECK-NEXT: vlseg7e16.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 2 x i8>, 7) @llvm.riscv.vlseg7.mask.triscv.vector.tuple_nxv2i8_7t.nxv1i1(target("riscv.vector.tuple", <vscale x 2 x i8>, 7) undef, ptr %base, <vscale x 1 x i1> %mask, i32 %vl, i32 1, i32 4) - %1 = call <vscale x 1 x bfloat> @llvm.riscv.tuple.extract.nxv1bf16.triscv.vector.tuple_nxv2i8_7t(target("riscv.vector.tuple", <vscale x 2 x i8>, 7) %0, i32 1) - ret <vscale x 1 x bfloat> %1 + ret target("riscv.vector.tuple", <vscale x 2 x i8>, 7) %0 } - - -define <vscale x 2 x bfloat> @test_vlseg7_nxv2bf16_triscv.vector.tuple_nxv4i8_7t(ptr %base, i32 %vl) { +define target("riscv.vector.tuple", <vscale x 4 x i8>, 7) @test_vlseg7_nxv2bf16_triscv.vector.tuple_nxv4i8_7t(ptr %base, i32 %vl) { ; CHECK-LABEL: test_vlseg7_nxv2bf16_triscv.vector.tuple_nxv4i8_7t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, ma -; CHECK-NEXT: vlseg7e16.v v7, (a0) +; CHECK-NEXT: vlseg7e16.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 7) @llvm.riscv.vlseg7.triscv.vector.tuple_nxv4i8_7t(target("riscv.vector.tuple", <vscale x 4 x i8>, 7) undef, ptr %base, i32 %vl, i32 4) - %1 = call <vscale x 2 x bfloat> @llvm.riscv.tuple.extract.nxv2bf16.triscv.vector.tuple_nxv4i8_7t(target("riscv.vector.tuple", <vscale x 4 x i8>, 7) %0, i32 1) - ret <vscale x 2 x bfloat> %1 + ret target("riscv.vector.tuple", <vscale x 4 x i8>, 7) %0 } - -define <vscale x 2 x bfloat> @test_vlseg7_mask_nxv2bf16_triscv.vector.tuple_nxv4i8_7t(ptr %base, i32 %vl, <vscale x 2 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 4 x i8>, 7) @test_vlseg7_mask_nxv2bf16_triscv.vector.tuple_nxv4i8_7t(ptr %base, i32 %vl, <vscale x 2 x i1> %mask) { ; CHECK-LABEL: test_vlseg7_mask_nxv2bf16_triscv.vector.tuple_nxv4i8_7t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, ma -; CHECK-NEXT: vlseg7e16.v v7, (a0), v0.t +; CHECK-NEXT: vlseg7e16.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 7) @llvm.riscv.vlseg7.mask.triscv.vector.tuple_nxv4i8_7t.nxv2i1(target("riscv.vector.tuple", <vscale x 4 x i8>, 7) undef, ptr %base, <vscale x 2 x i1> %mask, i32 %vl, i32 1, i32 4) - %1 = call <vscale x 2 x bfloat> @llvm.riscv.tuple.extract.nxv2bf16.triscv.vector.tuple_nxv4i8_7t(target("riscv.vector.tuple", <vscale x 4 x i8>, 7) %0, i32 1) - ret <vscale x 2 x bfloat> %1 + ret target("riscv.vector.tuple", <vscale x 4 x i8>, 7) %0 } - - -define <vscale x 4 x bfloat> @test_vlseg7_nxv4bf16_triscv.vector.tuple_nxv8i8_7t(ptr %base, i32 %vl) { +define target("riscv.vector.tuple", <vscale x 8 x i8>, 7) @test_vlseg7_nxv4bf16_triscv.vector.tuple_nxv8i8_7t(ptr %base, i32 %vl) { ; CHECK-LABEL: test_vlseg7_nxv4bf16_triscv.vector.tuple_nxv8i8_7t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma -; CHECK-NEXT: vlseg7e16.v v7, (a0) +; CHECK-NEXT: vlseg7e16.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 7) @llvm.riscv.vlseg7.triscv.vector.tuple_nxv8i8_7t(target("riscv.vector.tuple", <vscale x 8 x i8>, 7) undef, ptr %base, i32 %vl, i32 4) - %1 = call <vscale x 4 x bfloat> @llvm.riscv.tuple.extract.nxv4bf16.triscv.vector.tuple_nxv8i8_7t(target("riscv.vector.tuple", <vscale x 8 x i8>, 7) %0, i32 1) - ret <vscale x 4 x bfloat> %1 + ret target("riscv.vector.tuple", <vscale x 8 x i8>, 7) %0 } - -define <vscale x 4 x bfloat> @test_vlseg7_mask_nxv4bf16_triscv.vector.tuple_nxv8i8_7t(ptr %base, i32 %vl, <vscale x 4 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 8 x i8>, 7) @test_vlseg7_mask_nxv4bf16_triscv.vector.tuple_nxv8i8_7t(ptr %base, i32 %vl, <vscale x 4 x i1> %mask) { ; CHECK-LABEL: test_vlseg7_mask_nxv4bf16_triscv.vector.tuple_nxv8i8_7t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma -; CHECK-NEXT: vlseg7e16.v v7, (a0), v0.t +; CHECK-NEXT: vlseg7e16.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 7) @llvm.riscv.vlseg7.mask.triscv.vector.tuple_nxv8i8_7t.nxv4i1(target("riscv.vector.tuple", <vscale x 8 x i8>, 7) undef, ptr %base, <vscale x 4 x i1> %mask, i32 %vl, i32 1, i32 4) - %1 = call <vscale x 4 x bfloat> @llvm.riscv.tuple.extract.nxv4bf16.triscv.vector.tuple_nxv8i8_7t(target("riscv.vector.tuple", <vscale x 8 x i8>, 7) %0, i32 1) - ret <vscale x 4 x bfloat> %1 + ret target("riscv.vector.tuple", <vscale x 8 x i8>, 7) %0 } - - -define <vscale x 1 x bfloat> @test_vlseg8_nxv1bf16_triscv.vector.tuple_nxv2i8_8t(ptr %base, i32 %vl) { +define target("riscv.vector.tuple", <vscale x 2 x i8>, 8) @test_vlseg8_nxv1bf16_triscv.vector.tuple_nxv2i8_8t(ptr %base, i32 %vl) { ; CHECK-LABEL: test_vlseg8_nxv1bf16_triscv.vector.tuple_nxv2i8_8t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, ma -; CHECK-NEXT: vlseg8e16.v v7, (a0) +; CHECK-NEXT: vlseg8e16.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 2 x i8>, 8) @llvm.riscv.vlseg8.triscv.vector.tuple_nxv2i8_8t(target("riscv.vector.tuple", <vscale x 2 x i8>, 8) undef, ptr %base, i32 %vl, i32 4) - %1 = call <vscale x 1 x bfloat> @llvm.riscv.tuple.extract.nxv1bf16.triscv.vector.tuple_nxv2i8_8t(target("riscv.vector.tuple", <vscale x 2 x i8>, 8) %0, i32 1) - ret <vscale x 1 x bfloat> %1 + ret target("riscv.vector.tuple", <vscale x 2 x i8>, 8) %0 } - -define <vscale x 1 x bfloat> @test_vlseg8_mask_nxv1bf16_triscv.vector.tuple_nxv2i8_8t(ptr %base, i32 %vl, <vscale x 1 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 2 x i8>, 8) @test_vlseg8_mask_nxv1bf16_triscv.vector.tuple_nxv2i8_8t(ptr %base, i32 %vl, <vscale x 1 x i1> %mask) { ; CHECK-LABEL: test_vlseg8_mask_nxv1bf16_triscv.vector.tuple_nxv2i8_8t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, ma -; CHECK-NEXT: vlseg8e16.v v7, (a0), v0.t +; CHECK-NEXT: vlseg8e16.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 2 x i8>, 8) @llvm.riscv.vlseg8.mask.triscv.vector.tuple_nxv2i8_8t.nxv1i1(target("riscv.vector.tuple", <vscale x 2 x i8>, 8) undef, ptr %base, <vscale x 1 x i1> %mask, i32 %vl, i32 1, i32 4) - %1 = call <vscale x 1 x bfloat> @llvm.riscv.tuple.extract.nxv1bf16.triscv.vector.tuple_nxv2i8_8t(target("riscv.vector.tuple", <vscale x 2 x i8>, 8) %0, i32 1) - ret <vscale x 1 x bfloat> %1 + ret target("riscv.vector.tuple", <vscale x 2 x i8>, 8) %0 } - - -define <vscale x 2 x bfloat> @test_vlseg8_nxv2bf16_triscv.vector.tuple_nxv4i8_8t(ptr %base, i32 %vl) { +define target("riscv.vector.tuple", <vscale x 4 x i8>, 8) @test_vlseg8_nxv2bf16_triscv.vector.tuple_nxv4i8_8t(ptr %base, i32 %vl) { ; CHECK-LABEL: test_vlseg8_nxv2bf16_triscv.vector.tuple_nxv4i8_8t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, ma -; CHECK-NEXT: vlseg8e16.v v7, (a0) +; CHECK-NEXT: vlseg8e16.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 8) @llvm.riscv.vlseg8.triscv.vector.tuple_nxv4i8_8t(target("riscv.vector.tuple", <vscale x 4 x i8>, 8) undef, ptr %base, i32 %vl, i32 4) - %1 = call <vscale x 2 x bfloat> @llvm.riscv.tuple.extract.nxv2bf16.triscv.vector.tuple_nxv4i8_8t(target("riscv.vector.tuple", <vscale x 4 x i8>, 8) %0, i32 1) - ret <vscale x 2 x bfloat> %1 + ret target("riscv.vector.tuple", <vscale x 4 x i8>, 8) %0 } - -define <vscale x 2 x bfloat> @test_vlseg8_mask_nxv2bf16_triscv.vector.tuple_nxv4i8_8t(ptr %base, i32 %vl, <vscale x 2 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 4 x i8>, 8) @test_vlseg8_mask_nxv2bf16_triscv.vector.tuple_nxv4i8_8t(ptr %base, i32 %vl, <vscale x 2 x i1> %mask) { ; CHECK-LABEL: test_vlseg8_mask_nxv2bf16_triscv.vector.tuple_nxv4i8_8t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, ma -; CHECK-NEXT: vlseg8e16.v v7, (a0), v0.t +; CHECK-NEXT: vlseg8e16.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 8) @llvm.riscv.vlseg8.mask.triscv.vector.tuple_nxv4i8_8t.nxv2i1(target("riscv.vector.tuple", <vscale x 4 x i8>, 8) undef, ptr %base, <vscale x 2 x i1> %mask, i32 %vl, i32 1, i32 4) - %1 = call <vscale x 2 x bfloat> @llvm.riscv.tuple.extract.nxv2bf16.triscv.vector.tuple_nxv4i8_8t(target("riscv.vector.tuple", <vscale x 4 x i8>, 8) %0, i32 1) - ret <vscale x 2 x bfloat> %1 + ret target("riscv.vector.tuple", <vscale x 4 x i8>, 8) %0 } - - -define <vscale x 4 x bfloat> @test_vlseg8_nxv4bf16_triscv.vector.tuple_nxv8i8_8t(ptr %base, i32 %vl) { +define target("riscv.vector.tuple", <vscale x 8 x i8>, 8) @test_vlseg8_nxv4bf16_triscv.vector.tuple_nxv8i8_8t(ptr %base, i32 %vl) { ; CHECK-LABEL: test_vlseg8_nxv4bf16_triscv.vector.tuple_nxv8i8_8t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma -; CHECK-NEXT: vlseg8e16.v v7, (a0) +; CHECK-NEXT: vlseg8e16.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 8) @llvm.riscv.vlseg8.triscv.vector.tuple_nxv8i8_8t(target("riscv.vector.tuple", <vscale x 8 x i8>, 8) undef, ptr %base, i32 %vl, i32 4) - %1 = call <vscale x 4 x bfloat> @llvm.riscv.tuple.extract.nxv4bf16.triscv.vector.tuple_nxv8i8_8t(target("riscv.vector.tuple", <vscale x 8 x i8>, 8) %0, i32 1) - ret <vscale x 4 x bfloat> %1 + ret target("riscv.vector.tuple", <vscale x 8 x i8>, 8) %0 } - -define <vscale x 4 x bfloat> @test_vlseg8_mask_nxv4bf16_triscv.vector.tuple_nxv8i8_8t(ptr %base, i32 %vl, <vscale x 4 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 8 x i8>, 8) @test_vlseg8_mask_nxv4bf16_triscv.vector.tuple_nxv8i8_8t(ptr %base, i32 %vl, <vscale x 4 x i1> %mask) { ; CHECK-LABEL: test_vlseg8_mask_nxv4bf16_triscv.vector.tuple_nxv8i8_8t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma -; CHECK-NEXT: vlseg8e16.v v7, (a0), v0.t +; CHECK-NEXT: vlseg8e16.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 8) @llvm.riscv.vlseg8.mask.triscv.vector.tuple_nxv8i8_8t.nxv4i1(target("riscv.vector.tuple", <vscale x 8 x i8>, 8) undef, ptr %base, <vscale x 4 x i1> %mask, i32 %vl, i32 1, i32 4) - %1 = call <vscale x 4 x bfloat> @llvm.riscv.tuple.extract.nxv4bf16.triscv.vector.tuple_nxv8i8_8t(target("riscv.vector.tuple", <vscale x 8 x i8>, 8) %0, i32 1) - ret <vscale x 4 x bfloat> %1 + ret target("riscv.vector.tuple", <vscale x 8 x i8>, 8) %0 } - diff --git a/llvm/test/CodeGen/RISCV/rvv/vlseg-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vlseg-rv64.ll index 16e5e7b9..faeabaf 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vlseg-rv64.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vlseg-rv64.ll @@ -2,4330 +2,3373 @@ ; RUN: llc -mtriple=riscv64 -mattr=+zve64d,+f,+d,+zvfh,+zvfbfmin \ ; RUN: -verify-machineinstrs < %s | FileCheck %s -declare target("riscv.vector.tuple", <vscale x 1 x i8>, 2) @llvm.riscv.vlseg2.triscv.vector.tuple_nxv1i8_2t(target("riscv.vector.tuple", <vscale x 1 x i8>, 2), ptr, i64, i64) -declare target("riscv.vector.tuple", <vscale x 1 x i8>, 2) @llvm.riscv.vlseg2.mask.triscv.vector.tuple_nxv1i8_2t.nxv1i1(target("riscv.vector.tuple", <vscale x 1 x i8>, 2), ptr, <vscale x 1 x i1>, i64, i64, i64) - -define <vscale x 1 x i8> @test_vlseg2_nxv1i8_triscv.vector.tuple_nxv1i8_2t(ptr %base, i64 %vl) { +define target("riscv.vector.tuple", <vscale x 1 x i8>, 2) @test_vlseg2_nxv1i8_triscv.vector.tuple_nxv1i8_2t(ptr %base, i64 %vl) { ; CHECK-LABEL: test_vlseg2_nxv1i8_triscv.vector.tuple_nxv1i8_2t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, ma -; CHECK-NEXT: vlseg2e8.v v7, (a0) +; CHECK-NEXT: vlseg2e8.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 1 x i8>, 2) @llvm.riscv.vlseg2.triscv.vector.tuple_nxv1i8_2t(target("riscv.vector.tuple", <vscale x 1 x i8>, 2) undef, ptr %base, i64 %vl, i64 3) - %1 = call <vscale x 1 x i8> @llvm.riscv.tuple.extract.nxv1i8.triscv.vector.tuple_nxv1i8_2t(target("riscv.vector.tuple", <vscale x 1 x i8>, 2) %0, i32 1) - ret <vscale x 1 x i8> %1 + ret target("riscv.vector.tuple", <vscale x 1 x i8>, 2) %0 } - -define <vscale x 1 x i8> @test_vlseg2_mask_nxv1i8_triscv.vector.tuple_nxv1i8_2t(ptr %base, i64 %vl, <vscale x 1 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 1 x i8>, 2) @test_vlseg2_mask_nxv1i8_triscv.vector.tuple_nxv1i8_2t(ptr %base, i64 %vl, <vscale x 1 x i1> %mask) { ; CHECK-LABEL: test_vlseg2_mask_nxv1i8_triscv.vector.tuple_nxv1i8_2t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, ma -; CHECK-NEXT: vlseg2e8.v v7, (a0), v0.t +; CHECK-NEXT: vlseg2e8.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 1 x i8>, 2) @llvm.riscv.vlseg2.mask.triscv.vector.tuple_nxv1i8_2t.nxv1i1(target("riscv.vector.tuple", <vscale x 1 x i8>, 2) undef, ptr %base, <vscale x 1 x i1> %mask, i64 %vl, i64 1, i64 3) - %1 = call <vscale x 1 x i8> @llvm.riscv.tuple.extract.nxv1i8.triscv.vector.tuple_nxv1i8_2t(target("riscv.vector.tuple", <vscale x 1 x i8>, 2) %0, i32 1) - ret <vscale x 1 x i8> %1 + ret target("riscv.vector.tuple", <vscale x 1 x i8>, 2) %0 } - -define <vscale x 1 x i8> @test_vlseg2_allonesmask_nxv1i8_triscv.vector.tuple_nxv1i8_2t(ptr %base, i64 %vl) { +define target("riscv.vector.tuple", <vscale x 1 x i8>, 2) @test_vlseg2_allonesmask_nxv1i8_triscv.vector.tuple_nxv1i8_2t(ptr %base, i64 %vl) { ; CHECK-LABEL: test_vlseg2_allonesmask_nxv1i8_triscv.vector.tuple_nxv1i8_2t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, ma -; CHECK-NEXT: vlseg2e8.v v7, (a0) +; CHECK-NEXT: vlseg2e8.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 1 x i8>, 2) @llvm.riscv.vlseg2.mask.triscv.vector.tuple_nxv1i8_2t.nxv1i1(target("riscv.vector.tuple", <vscale x 1 x i8>, 2) undef, ptr %base, <vscale x 1 x i1> splat (i1 true), i64 %vl, i64 1, i64 3) - %1 = call <vscale x 1 x i8> @llvm.riscv.tuple.extract.nxv1i8.triscv.vector.tuple_nxv1i8_2t(target("riscv.vector.tuple", <vscale x 1 x i8>, 2) %0, i32 1) - ret <vscale x 1 x i8> %1 + ret target("riscv.vector.tuple", <vscale x 1 x i8>, 2) %0 } - -declare target("riscv.vector.tuple", <vscale x 2 x i8>, 2) @llvm.riscv.vlseg2.triscv.vector.tuple_nxv2i8_2t(target("riscv.vector.tuple", <vscale x 2 x i8>, 2), ptr, i64, i64) -declare target("riscv.vector.tuple", <vscale x 2 x i8>, 2) @llvm.riscv.vlseg2.mask.triscv.vector.tuple_nxv2i8_2t.nxv2i1(target("riscv.vector.tuple", <vscale x 2 x i8>, 2), ptr, <vscale x 2 x i1>, i64, i64, i64) - -define <vscale x 2 x i8> @test_vlseg2_nxv2i8_triscv.vector.tuple_nxv2i8_2t(ptr %base, i64 %vl) { +define target("riscv.vector.tuple", <vscale x 2 x i8>, 2) @test_vlseg2_nxv2i8_triscv.vector.tuple_nxv2i8_2t(ptr %base, i64 %vl) { ; CHECK-LABEL: test_vlseg2_nxv2i8_triscv.vector.tuple_nxv2i8_2t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, ma -; CHECK-NEXT: vlseg2e8.v v7, (a0) +; CHECK-NEXT: vlseg2e8.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 2 x i8>, 2) @llvm.riscv.vlseg2.triscv.vector.tuple_nxv2i8_2t(target("riscv.vector.tuple", <vscale x 2 x i8>, 2) undef, ptr %base, i64 %vl, i64 3) - %1 = call <vscale x 2 x i8> @llvm.riscv.tuple.extract.nxv2i8.triscv.vector.tuple_nxv2i8_2t(target("riscv.vector.tuple", <vscale x 2 x i8>, 2) %0, i32 1) - ret <vscale x 2 x i8> %1 + ret target("riscv.vector.tuple", <vscale x 2 x i8>, 2) %0 } - -define <vscale x 2 x i8> @test_vlseg2_mask_nxv2i8_triscv.vector.tuple_nxv2i8_2t(ptr %base, i64 %vl, <vscale x 2 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 2 x i8>, 2) @test_vlseg2_mask_nxv2i8_triscv.vector.tuple_nxv2i8_2t(ptr %base, i64 %vl, <vscale x 2 x i1> %mask) { ; CHECK-LABEL: test_vlseg2_mask_nxv2i8_triscv.vector.tuple_nxv2i8_2t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, ma -; CHECK-NEXT: vlseg2e8.v v7, (a0), v0.t +; CHECK-NEXT: vlseg2e8.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 2 x i8>, 2) @llvm.riscv.vlseg2.mask.triscv.vector.tuple_nxv2i8_2t.nxv2i1(target("riscv.vector.tuple", <vscale x 2 x i8>, 2) undef, ptr %base, <vscale x 2 x i1> %mask, i64 %vl, i64 1, i64 3) - %1 = call <vscale x 2 x i8> @llvm.riscv.tuple.extract.nxv2i8.triscv.vector.tuple_nxv2i8_2t(target("riscv.vector.tuple", <vscale x 2 x i8>, 2) %0, i32 1) - ret <vscale x 2 x i8> %1 + ret target("riscv.vector.tuple", <vscale x 2 x i8>, 2) %0 } - -declare target("riscv.vector.tuple", <vscale x 4 x i8>, 2) @llvm.riscv.vlseg2.triscv.vector.tuple_nxv4i8_2t(target("riscv.vector.tuple", <vscale x 4 x i8>, 2), ptr, i64, i64) -declare target("riscv.vector.tuple", <vscale x 4 x i8>, 2) @llvm.riscv.vlseg2.mask.triscv.vector.tuple_nxv4i8_2t.nxv4i1(target("riscv.vector.tuple", <vscale x 4 x i8>, 2), ptr, <vscale x 4 x i1>, i64, i64, i64) - -define <vscale x 4 x i8> @test_vlseg2_nxv4i8_triscv.vector.tuple_nxv4i8_2t(ptr %base, i64 %vl) { +define target("riscv.vector.tuple", <vscale x 4 x i8>, 2) @test_vlseg2_nxv4i8_triscv.vector.tuple_nxv4i8_2t(ptr %base, i64 %vl) { ; CHECK-LABEL: test_vlseg2_nxv4i8_triscv.vector.tuple_nxv4i8_2t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, ma -; CHECK-NEXT: vlseg2e8.v v7, (a0) +; CHECK-NEXT: vlseg2e8.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 2) @llvm.riscv.vlseg2.triscv.vector.tuple_nxv4i8_2t(target("riscv.vector.tuple", <vscale x 4 x i8>, 2) undef, ptr %base, i64 %vl, i64 3) - %1 = call <vscale x 4 x i8> @llvm.riscv.tuple.extract.nxv4i8.triscv.vector.tuple_nxv4i8_2t(target("riscv.vector.tuple", <vscale x 4 x i8>, 2) %0, i32 1) - ret <vscale x 4 x i8> %1 + ret target("riscv.vector.tuple", <vscale x 4 x i8>, 2) %0 } - -define <vscale x 4 x i8> @test_vlseg2_mask_nxv4i8_triscv.vector.tuple_nxv4i8_2t(ptr %base, i64 %vl, <vscale x 4 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 4 x i8>, 2) @test_vlseg2_mask_nxv4i8_triscv.vector.tuple_nxv4i8_2t(ptr %base, i64 %vl, <vscale x 4 x i1> %mask) { ; CHECK-LABEL: test_vlseg2_mask_nxv4i8_triscv.vector.tuple_nxv4i8_2t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, ma -; CHECK-NEXT: vlseg2e8.v v7, (a0), v0.t +; CHECK-NEXT: vlseg2e8.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 2) @llvm.riscv.vlseg2.mask.triscv.vector.tuple_nxv4i8_2t.nxv4i1(target("riscv.vector.tuple", <vscale x 4 x i8>, 2) undef, ptr %base, <vscale x 4 x i1> %mask, i64 %vl, i64 1, i64 3) - %1 = call <vscale x 4 x i8> @llvm.riscv.tuple.extract.nxv4i8.triscv.vector.tuple_nxv4i8_2t(target("riscv.vector.tuple", <vscale x 4 x i8>, 2) %0, i32 1) - ret <vscale x 4 x i8> %1 + ret target("riscv.vector.tuple", <vscale x 4 x i8>, 2) %0 } - -declare target("riscv.vector.tuple", <vscale x 8 x i8>, 2) @llvm.riscv.vlseg2.triscv.vector.tuple_nxv8i8_2t(target("riscv.vector.tuple", <vscale x 8 x i8>, 2), ptr, i64, i64) -declare target("riscv.vector.tuple", <vscale x 8 x i8>, 2) @llvm.riscv.vlseg2.mask.triscv.vector.tuple_nxv8i8_2t.nxv8i1(target("riscv.vector.tuple", <vscale x 8 x i8>, 2), ptr, <vscale x 8 x i1>, i64, i64, i64) - -define <vscale x 8 x i8> @test_vlseg2_nxv8i8_triscv.vector.tuple_nxv8i8_2t(ptr %base, i64 %vl) { +define target("riscv.vector.tuple", <vscale x 8 x i8>, 2) @test_vlseg2_nxv8i8_triscv.vector.tuple_nxv8i8_2t(ptr %base, i64 %vl) { ; CHECK-LABEL: test_vlseg2_nxv8i8_triscv.vector.tuple_nxv8i8_2t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, ma -; CHECK-NEXT: vlseg2e8.v v7, (a0) +; CHECK-NEXT: vlseg2e8.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 2) @llvm.riscv.vlseg2.triscv.vector.tuple_nxv8i8_2t(target("riscv.vector.tuple", <vscale x 8 x i8>, 2) undef, ptr %base, i64 %vl, i64 3) - %1 = call <vscale x 8 x i8> @llvm.riscv.tuple.extract.nxv8i8.triscv.vector.tuple_nxv8i8_2t(target("riscv.vector.tuple", <vscale x 8 x i8>, 2) %0, i32 1) - ret <vscale x 8 x i8> %1 + ret target("riscv.vector.tuple", <vscale x 8 x i8>, 2) %0 } - -define <vscale x 8 x i8> @test_vlseg2_mask_nxv8i8_triscv.vector.tuple_nxv8i8_2t(ptr %base, i64 %vl, <vscale x 8 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 8 x i8>, 2) @test_vlseg2_mask_nxv8i8_triscv.vector.tuple_nxv8i8_2t(ptr %base, i64 %vl, <vscale x 8 x i1> %mask) { ; CHECK-LABEL: test_vlseg2_mask_nxv8i8_triscv.vector.tuple_nxv8i8_2t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, ma -; CHECK-NEXT: vlseg2e8.v v7, (a0), v0.t +; CHECK-NEXT: vlseg2e8.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 2) @llvm.riscv.vlseg2.mask.triscv.vector.tuple_nxv8i8_2t.nxv8i1(target("riscv.vector.tuple", <vscale x 8 x i8>, 2) undef, ptr %base, <vscale x 8 x i1> %mask, i64 %vl, i64 1, i64 3) - %1 = call <vscale x 8 x i8> @llvm.riscv.tuple.extract.nxv8i8.triscv.vector.tuple_nxv8i8_2t(target("riscv.vector.tuple", <vscale x 8 x i8>, 2) %0, i32 1) - ret <vscale x 8 x i8> %1 + ret target("riscv.vector.tuple", <vscale x 8 x i8>, 2) %0 } - -declare target("riscv.vector.tuple", <vscale x 16 x i8>, 2) @llvm.riscv.vlseg2.triscv.vector.tuple_nxv16i8_2t(target("riscv.vector.tuple", <vscale x 16 x i8>, 2), ptr, i64, i64) -declare target("riscv.vector.tuple", <vscale x 16 x i8>, 2) @llvm.riscv.vlseg2.mask.triscv.vector.tuple_nxv16i8_2t.nxv16i1(target("riscv.vector.tuple", <vscale x 16 x i8>, 2), ptr, <vscale x 16 x i1>, i64, i64, i64) - -define <vscale x 16 x i8> @test_vlseg2_nxv16i8_triscv.vector.tuple_nxv16i8_2t(ptr %base, i64 %vl) { +define target("riscv.vector.tuple", <vscale x 16 x i8>, 2) @test_vlseg2_nxv16i8_triscv.vector.tuple_nxv16i8_2t(ptr %base, i64 %vl) { ; CHECK-LABEL: test_vlseg2_nxv16i8_triscv.vector.tuple_nxv16i8_2t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e8, m2, ta, ma -; CHECK-NEXT: vlseg2e8.v v6, (a0) +; CHECK-NEXT: vlseg2e8.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 2) @llvm.riscv.vlseg2.triscv.vector.tuple_nxv16i8_2t(target("riscv.vector.tuple", <vscale x 16 x i8>, 2) undef, ptr %base, i64 %vl, i64 3) - %1 = call <vscale x 16 x i8> @llvm.riscv.tuple.extract.nxv16i8.triscv.vector.tuple_nxv16i8_2t(target("riscv.vector.tuple", <vscale x 16 x i8>, 2) %0, i32 1) - ret <vscale x 16 x i8> %1 + ret target("riscv.vector.tuple", <vscale x 16 x i8>, 2) %0 } - -define <vscale x 16 x i8> @test_vlseg2_mask_nxv16i8_triscv.vector.tuple_nxv16i8_2t(ptr %base, i64 %vl, <vscale x 16 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 16 x i8>, 2) @test_vlseg2_mask_nxv16i8_triscv.vector.tuple_nxv16i8_2t(ptr %base, i64 %vl, <vscale x 16 x i1> %mask) { ; CHECK-LABEL: test_vlseg2_mask_nxv16i8_triscv.vector.tuple_nxv16i8_2t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e8, m2, ta, ma -; CHECK-NEXT: vlseg2e8.v v6, (a0), v0.t +; CHECK-NEXT: vlseg2e8.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 2) @llvm.riscv.vlseg2.mask.triscv.vector.tuple_nxv16i8_2t.nxv16i1(target("riscv.vector.tuple", <vscale x 16 x i8>, 2) undef, ptr %base, <vscale x 16 x i1> %mask, i64 %vl, i64 1, i64 3) - %1 = call <vscale x 16 x i8> @llvm.riscv.tuple.extract.nxv16i8.triscv.vector.tuple_nxv16i8_2t(target("riscv.vector.tuple", <vscale x 16 x i8>, 2) %0, i32 1) - ret <vscale x 16 x i8> %1 + ret target("riscv.vector.tuple", <vscale x 16 x i8>, 2) %0 } - -declare target("riscv.vector.tuple", <vscale x 32 x i8>, 2) @llvm.riscv.vlseg2.triscv.vector.tuple_nxv32i8_2t(target("riscv.vector.tuple", <vscale x 32 x i8>, 2), ptr, i64, i64) -declare target("riscv.vector.tuple", <vscale x 32 x i8>, 2) @llvm.riscv.vlseg2.mask.triscv.vector.tuple_nxv32i8_2t.nxv32i1(target("riscv.vector.tuple", <vscale x 32 x i8>, 2), ptr, <vscale x 32 x i1>, i64, i64, i64) - -define <vscale x 32 x i8> @test_vlseg2_nxv32i8_triscv.vector.tuple_nxv32i8_2t(ptr %base, i64 %vl) { +define target("riscv.vector.tuple", <vscale x 32 x i8>, 2) @test_vlseg2_nxv32i8_triscv.vector.tuple_nxv32i8_2t(ptr %base, i64 %vl) { ; CHECK-LABEL: test_vlseg2_nxv32i8_triscv.vector.tuple_nxv32i8_2t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e8, m4, ta, ma -; CHECK-NEXT: vlseg2e8.v v4, (a0) +; CHECK-NEXT: vlseg2e8.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 32 x i8>, 2) @llvm.riscv.vlseg2.triscv.vector.tuple_nxv32i8_2t(target("riscv.vector.tuple", <vscale x 32 x i8>, 2) undef, ptr %base, i64 %vl, i64 3) - %1 = call <vscale x 32 x i8> @llvm.riscv.tuple.extract.nxv32i8.triscv.vector.tuple_nxv32i8_2t(target("riscv.vector.tuple", <vscale x 32 x i8>, 2) %0, i32 1) - ret <vscale x 32 x i8> %1 + ret target("riscv.vector.tuple", <vscale x 32 x i8>, 2) %0 } - -define <vscale x 32 x i8> @test_vlseg2_mask_nxv32i8_triscv.vector.tuple_nxv32i8_2t(ptr %base, i64 %vl, <vscale x 32 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 32 x i8>, 2) @test_vlseg2_mask_nxv32i8_triscv.vector.tuple_nxv32i8_2t(ptr %base, i64 %vl, <vscale x 32 x i1> %mask) { ; CHECK-LABEL: test_vlseg2_mask_nxv32i8_triscv.vector.tuple_nxv32i8_2t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e8, m4, ta, ma -; CHECK-NEXT: vlseg2e8.v v4, (a0), v0.t +; CHECK-NEXT: vlseg2e8.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 32 x i8>, 2) @llvm.riscv.vlseg2.mask.triscv.vector.tuple_nxv32i8_2t.nxv32i1(target("riscv.vector.tuple", <vscale x 32 x i8>, 2) undef, ptr %base, <vscale x 32 x i1> %mask, i64 %vl, i64 1, i64 3) - %1 = call <vscale x 32 x i8> @llvm.riscv.tuple.extract.nxv32i8.triscv.vector.tuple_nxv32i8_2t(target("riscv.vector.tuple", <vscale x 32 x i8>, 2) %0, i32 1) - ret <vscale x 32 x i8> %1 + ret target("riscv.vector.tuple", <vscale x 32 x i8>, 2) %0 } - -declare target("riscv.vector.tuple", <vscale x 1 x i8>, 3) @llvm.riscv.vlseg3.triscv.vector.tuple_nxv1i8_3t(target("riscv.vector.tuple", <vscale x 1 x i8>, 3), ptr, i64, i64) -declare target("riscv.vector.tuple", <vscale x 1 x i8>, 3) @llvm.riscv.vlseg3.mask.triscv.vector.tuple_nxv1i8_3t.nxv1i1(target("riscv.vector.tuple", <vscale x 1 x i8>, 3), ptr, <vscale x 1 x i1>, i64, i64, i64) - -define <vscale x 1 x i8> @test_vlseg3_nxv1i8_triscv.vector.tuple_nxv1i8_3t(ptr %base, i64 %vl) { +define target("riscv.vector.tuple", <vscale x 1 x i8>, 3) @test_vlseg3_nxv1i8_triscv.vector.tuple_nxv1i8_3t(ptr %base, i64 %vl) { ; CHECK-LABEL: test_vlseg3_nxv1i8_triscv.vector.tuple_nxv1i8_3t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, ma -; CHECK-NEXT: vlseg3e8.v v7, (a0) +; CHECK-NEXT: vlseg3e8.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 1 x i8>, 3) @llvm.riscv.vlseg3.triscv.vector.tuple_nxv1i8_3t(target("riscv.vector.tuple", <vscale x 1 x i8>, 3) undef, ptr %base, i64 %vl, i64 3) - %1 = call <vscale x 1 x i8> @llvm.riscv.tuple.extract.nxv1i8.triscv.vector.tuple_nxv1i8_3t(target("riscv.vector.tuple", <vscale x 1 x i8>, 3) %0, i32 1) - ret <vscale x 1 x i8> %1 + ret target("riscv.vector.tuple", <vscale x 1 x i8>, 3) %0 } - -define <vscale x 1 x i8> @test_vlseg3_mask_nxv1i8_triscv.vector.tuple_nxv1i8_3t(ptr %base, i64 %vl, <vscale x 1 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 1 x i8>, 3) @test_vlseg3_mask_nxv1i8_triscv.vector.tuple_nxv1i8_3t(ptr %base, i64 %vl, <vscale x 1 x i1> %mask) { ; CHECK-LABEL: test_vlseg3_mask_nxv1i8_triscv.vector.tuple_nxv1i8_3t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, ma -; CHECK-NEXT: vlseg3e8.v v7, (a0), v0.t +; CHECK-NEXT: vlseg3e8.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 1 x i8>, 3) @llvm.riscv.vlseg3.mask.triscv.vector.tuple_nxv1i8_3t.nxv1i1(target("riscv.vector.tuple", <vscale x 1 x i8>, 3) undef, ptr %base, <vscale x 1 x i1> %mask, i64 %vl, i64 1, i64 3) - %1 = call <vscale x 1 x i8> @llvm.riscv.tuple.extract.nxv1i8.triscv.vector.tuple_nxv1i8_3t(target("riscv.vector.tuple", <vscale x 1 x i8>, 3) %0, i32 1) - ret <vscale x 1 x i8> %1 + ret target("riscv.vector.tuple", <vscale x 1 x i8>, 3) %0 } - -define <vscale x 1 x i8> @test_vlseg3_allonesmask_nxv1i8_triscv.vector.tuple_nxv1i8_3t(ptr %base, i64 %vl, <vscale x 1 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 1 x i8>, 3) @test_vlseg3_allonesmask_nxv1i8_triscv.vector.tuple_nxv1i8_3t(ptr %base, i64 %vl, <vscale x 1 x i1> %mask) { ; CHECK-LABEL: test_vlseg3_allonesmask_nxv1i8_triscv.vector.tuple_nxv1i8_3t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, ma -; CHECK-NEXT: vlseg3e8.v v7, (a0) +; CHECK-NEXT: vlseg3e8.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 1 x i8>, 3) @llvm.riscv.vlseg3.mask.triscv.vector.tuple_nxv1i8_3t.nxv1i1(target("riscv.vector.tuple", <vscale x 1 x i8>, 3) undef, ptr %base, <vscale x 1 x i1> splat (i1 true), i64 %vl, i64 1, i64 3) - %1 = call <vscale x 1 x i8> @llvm.riscv.tuple.extract.nxv1i8.triscv.vector.tuple_nxv1i8_3t(target("riscv.vector.tuple", <vscale x 1 x i8>, 3) %0, i32 1) - ret <vscale x 1 x i8> %1 + ret target("riscv.vector.tuple", <vscale x 1 x i8>, 3) %0 } - -declare target("riscv.vector.tuple", <vscale x 2 x i8>, 3) @llvm.riscv.vlseg3.triscv.vector.tuple_nxv2i8_3t(target("riscv.vector.tuple", <vscale x 2 x i8>, 3), ptr, i64, i64) -declare target("riscv.vector.tuple", <vscale x 2 x i8>, 3) @llvm.riscv.vlseg3.mask.triscv.vector.tuple_nxv2i8_3t.nxv2i1(target("riscv.vector.tuple", <vscale x 2 x i8>, 3), ptr, <vscale x 2 x i1>, i64, i64, i64) - -define <vscale x 2 x i8> @test_vlseg3_nxv2i8_triscv.vector.tuple_nxv2i8_3t(ptr %base, i64 %vl) { +define target("riscv.vector.tuple", <vscale x 2 x i8>, 3) @test_vlseg3_nxv2i8_triscv.vector.tuple_nxv2i8_3t(ptr %base, i64 %vl) { ; CHECK-LABEL: test_vlseg3_nxv2i8_triscv.vector.tuple_nxv2i8_3t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, ma -; CHECK-NEXT: vlseg3e8.v v7, (a0) +; CHECK-NEXT: vlseg3e8.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 2 x i8>, 3) @llvm.riscv.vlseg3.triscv.vector.tuple_nxv2i8_3t(target("riscv.vector.tuple", <vscale x 2 x i8>, 3) undef, ptr %base, i64 %vl, i64 3) - %1 = call <vscale x 2 x i8> @llvm.riscv.tuple.extract.nxv2i8.triscv.vector.tuple_nxv2i8_3t(target("riscv.vector.tuple", <vscale x 2 x i8>, 3) %0, i32 1) - ret <vscale x 2 x i8> %1 + ret target("riscv.vector.tuple", <vscale x 2 x i8>, 3) %0 } - -define <vscale x 2 x i8> @test_vlseg3_mask_nxv2i8_triscv.vector.tuple_nxv2i8_3t(ptr %base, i64 %vl, <vscale x 2 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 2 x i8>, 3) @test_vlseg3_mask_nxv2i8_triscv.vector.tuple_nxv2i8_3t(ptr %base, i64 %vl, <vscale x 2 x i1> %mask) { ; CHECK-LABEL: test_vlseg3_mask_nxv2i8_triscv.vector.tuple_nxv2i8_3t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, ma -; CHECK-NEXT: vlseg3e8.v v7, (a0), v0.t +; CHECK-NEXT: vlseg3e8.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 2 x i8>, 3) @llvm.riscv.vlseg3.mask.triscv.vector.tuple_nxv2i8_3t.nxv2i1(target("riscv.vector.tuple", <vscale x 2 x i8>, 3) undef, ptr %base, <vscale x 2 x i1> %mask, i64 %vl, i64 1, i64 3) - %1 = call <vscale x 2 x i8> @llvm.riscv.tuple.extract.nxv2i8.triscv.vector.tuple_nxv2i8_3t(target("riscv.vector.tuple", <vscale x 2 x i8>, 3) %0, i32 1) - ret <vscale x 2 x i8> %1 + ret target("riscv.vector.tuple", <vscale x 2 x i8>, 3) %0 } - -declare target("riscv.vector.tuple", <vscale x 4 x i8>, 3) @llvm.riscv.vlseg3.triscv.vector.tuple_nxv4i8_3t(target("riscv.vector.tuple", <vscale x 4 x i8>, 3), ptr, i64, i64) -declare target("riscv.vector.tuple", <vscale x 4 x i8>, 3) @llvm.riscv.vlseg3.mask.triscv.vector.tuple_nxv4i8_3t.nxv4i1(target("riscv.vector.tuple", <vscale x 4 x i8>, 3), ptr, <vscale x 4 x i1>, i64, i64, i64) - -define <vscale x 4 x i8> @test_vlseg3_nxv4i8_triscv.vector.tuple_nxv4i8_3t(ptr %base, i64 %vl) { +define target("riscv.vector.tuple", <vscale x 4 x i8>, 3) @test_vlseg3_nxv4i8_triscv.vector.tuple_nxv4i8_3t(ptr %base, i64 %vl) { ; CHECK-LABEL: test_vlseg3_nxv4i8_triscv.vector.tuple_nxv4i8_3t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, ma -; CHECK-NEXT: vlseg3e8.v v7, (a0) +; CHECK-NEXT: vlseg3e8.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 3) @llvm.riscv.vlseg3.triscv.vector.tuple_nxv4i8_3t(target("riscv.vector.tuple", <vscale x 4 x i8>, 3) undef, ptr %base, i64 %vl, i64 3) - %1 = call <vscale x 4 x i8> @llvm.riscv.tuple.extract.nxv4i8.triscv.vector.tuple_nxv4i8_3t(target("riscv.vector.tuple", <vscale x 4 x i8>, 3) %0, i32 1) - ret <vscale x 4 x i8> %1 + ret target("riscv.vector.tuple", <vscale x 4 x i8>, 3) %0 } - -define <vscale x 4 x i8> @test_vlseg3_mask_nxv4i8_triscv.vector.tuple_nxv4i8_3t(ptr %base, i64 %vl, <vscale x 4 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 4 x i8>, 3) @test_vlseg3_mask_nxv4i8_triscv.vector.tuple_nxv4i8_3t(ptr %base, i64 %vl, <vscale x 4 x i1> %mask) { ; CHECK-LABEL: test_vlseg3_mask_nxv4i8_triscv.vector.tuple_nxv4i8_3t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, ma -; CHECK-NEXT: vlseg3e8.v v7, (a0), v0.t +; CHECK-NEXT: vlseg3e8.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 3) @llvm.riscv.vlseg3.mask.triscv.vector.tuple_nxv4i8_3t.nxv4i1(target("riscv.vector.tuple", <vscale x 4 x i8>, 3) undef, ptr %base, <vscale x 4 x i1> %mask, i64 %vl, i64 1, i64 3) - %1 = call <vscale x 4 x i8> @llvm.riscv.tuple.extract.nxv4i8.triscv.vector.tuple_nxv4i8_3t(target("riscv.vector.tuple", <vscale x 4 x i8>, 3) %0, i32 1) - ret <vscale x 4 x i8> %1 + ret target("riscv.vector.tuple", <vscale x 4 x i8>, 3) %0 } - -declare target("riscv.vector.tuple", <vscale x 8 x i8>, 3) @llvm.riscv.vlseg3.triscv.vector.tuple_nxv8i8_3t(target("riscv.vector.tuple", <vscale x 8 x i8>, 3), ptr, i64, i64) -declare target("riscv.vector.tuple", <vscale x 8 x i8>, 3) @llvm.riscv.vlseg3.mask.triscv.vector.tuple_nxv8i8_3t.nxv8i1(target("riscv.vector.tuple", <vscale x 8 x i8>, 3), ptr, <vscale x 8 x i1>, i64, i64, i64) - -define <vscale x 8 x i8> @test_vlseg3_nxv8i8_triscv.vector.tuple_nxv8i8_3t(ptr %base, i64 %vl) { +define target("riscv.vector.tuple", <vscale x 8 x i8>, 3) @test_vlseg3_nxv8i8_triscv.vector.tuple_nxv8i8_3t(ptr %base, i64 %vl) { ; CHECK-LABEL: test_vlseg3_nxv8i8_triscv.vector.tuple_nxv8i8_3t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, ma -; CHECK-NEXT: vlseg3e8.v v7, (a0) +; CHECK-NEXT: vlseg3e8.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 3) @llvm.riscv.vlseg3.triscv.vector.tuple_nxv8i8_3t(target("riscv.vector.tuple", <vscale x 8 x i8>, 3) undef, ptr %base, i64 %vl, i64 3) - %1 = call <vscale x 8 x i8> @llvm.riscv.tuple.extract.nxv8i8.triscv.vector.tuple_nxv8i8_3t(target("riscv.vector.tuple", <vscale x 8 x i8>, 3) %0, i32 1) - ret <vscale x 8 x i8> %1 + ret target("riscv.vector.tuple", <vscale x 8 x i8>, 3) %0 } - -define <vscale x 8 x i8> @test_vlseg3_mask_nxv8i8_triscv.vector.tuple_nxv8i8_3t(ptr %base, i64 %vl, <vscale x 8 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 8 x i8>, 3) @test_vlseg3_mask_nxv8i8_triscv.vector.tuple_nxv8i8_3t(ptr %base, i64 %vl, <vscale x 8 x i1> %mask) { ; CHECK-LABEL: test_vlseg3_mask_nxv8i8_triscv.vector.tuple_nxv8i8_3t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, ma -; CHECK-NEXT: vlseg3e8.v v7, (a0), v0.t +; CHECK-NEXT: vlseg3e8.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 3) @llvm.riscv.vlseg3.mask.triscv.vector.tuple_nxv8i8_3t.nxv8i1(target("riscv.vector.tuple", <vscale x 8 x i8>, 3) undef, ptr %base, <vscale x 8 x i1> %mask, i64 %vl, i64 1, i64 3) - %1 = call <vscale x 8 x i8> @llvm.riscv.tuple.extract.nxv8i8.triscv.vector.tuple_nxv8i8_3t(target("riscv.vector.tuple", <vscale x 8 x i8>, 3) %0, i32 1) - ret <vscale x 8 x i8> %1 + ret target("riscv.vector.tuple", <vscale x 8 x i8>, 3) %0 } - -declare target("riscv.vector.tuple", <vscale x 16 x i8>, 3) @llvm.riscv.vlseg3.triscv.vector.tuple_nxv16i8_3t(target("riscv.vector.tuple", <vscale x 16 x i8>, 3), ptr, i64, i64) -declare target("riscv.vector.tuple", <vscale x 16 x i8>, 3) @llvm.riscv.vlseg3.mask.triscv.vector.tuple_nxv16i8_3t.nxv16i1(target("riscv.vector.tuple", <vscale x 16 x i8>, 3), ptr, <vscale x 16 x i1>, i64, i64, i64) - -define <vscale x 16 x i8> @test_vlseg3_nxv16i8_triscv.vector.tuple_nxv16i8_3t(ptr %base, i64 %vl) { +define target("riscv.vector.tuple", <vscale x 16 x i8>, 3) @test_vlseg3_nxv16i8_triscv.vector.tuple_nxv16i8_3t(ptr %base, i64 %vl) { ; CHECK-LABEL: test_vlseg3_nxv16i8_triscv.vector.tuple_nxv16i8_3t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e8, m2, ta, ma -; CHECK-NEXT: vlseg3e8.v v6, (a0) +; CHECK-NEXT: vlseg3e8.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 3) @llvm.riscv.vlseg3.triscv.vector.tuple_nxv16i8_3t(target("riscv.vector.tuple", <vscale x 16 x i8>, 3) undef, ptr %base, i64 %vl, i64 3) - %1 = call <vscale x 16 x i8> @llvm.riscv.tuple.extract.nxv16i8.triscv.vector.tuple_nxv16i8_3t(target("riscv.vector.tuple", <vscale x 16 x i8>, 3) %0, i32 1) - ret <vscale x 16 x i8> %1 + ret target("riscv.vector.tuple", <vscale x 16 x i8>, 3) %0 } - -define <vscale x 16 x i8> @test_vlseg3_mask_nxv16i8_triscv.vector.tuple_nxv16i8_3t(ptr %base, i64 %vl, <vscale x 16 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 16 x i8>, 3) @test_vlseg3_mask_nxv16i8_triscv.vector.tuple_nxv16i8_3t(ptr %base, i64 %vl, <vscale x 16 x i1> %mask) { ; CHECK-LABEL: test_vlseg3_mask_nxv16i8_triscv.vector.tuple_nxv16i8_3t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e8, m2, ta, ma -; CHECK-NEXT: vlseg3e8.v v6, (a0), v0.t +; CHECK-NEXT: vlseg3e8.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 3) @llvm.riscv.vlseg3.mask.triscv.vector.tuple_nxv16i8_3t.nxv16i1(target("riscv.vector.tuple", <vscale x 16 x i8>, 3) undef, ptr %base, <vscale x 16 x i1> %mask, i64 %vl, i64 1, i64 3) - %1 = call <vscale x 16 x i8> @llvm.riscv.tuple.extract.nxv16i8.triscv.vector.tuple_nxv16i8_3t(target("riscv.vector.tuple", <vscale x 16 x i8>, 3) %0, i32 1) - ret <vscale x 16 x i8> %1 + ret target("riscv.vector.tuple", <vscale x 16 x i8>, 3) %0 } - -declare target("riscv.vector.tuple", <vscale x 1 x i8>, 4) @llvm.riscv.vlseg4.triscv.vector.tuple_nxv1i8_4t(target("riscv.vector.tuple", <vscale x 1 x i8>, 4), ptr, i64, i64) -declare target("riscv.vector.tuple", <vscale x 1 x i8>, 4) @llvm.riscv.vlseg4.mask.triscv.vector.tuple_nxv1i8_4t.nxv1i1(target("riscv.vector.tuple", <vscale x 1 x i8>, 4), ptr, <vscale x 1 x i1>, i64, i64, i64) - -define <vscale x 1 x i8> @test_vlseg4_nxv1i8_triscv.vector.tuple_nxv1i8_4t(ptr %base, i64 %vl) { +define target("riscv.vector.tuple", <vscale x 1 x i8>, 4) @test_vlseg4_nxv1i8_triscv.vector.tuple_nxv1i8_4t(ptr %base, i64 %vl) { ; CHECK-LABEL: test_vlseg4_nxv1i8_triscv.vector.tuple_nxv1i8_4t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, ma -; CHECK-NEXT: vlseg4e8.v v7, (a0) +; CHECK-NEXT: vlseg4e8.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 1 x i8>, 4) @llvm.riscv.vlseg4.triscv.vector.tuple_nxv1i8_4t(target("riscv.vector.tuple", <vscale x 1 x i8>, 4) undef, ptr %base, i64 %vl, i64 3) - %1 = call <vscale x 1 x i8> @llvm.riscv.tuple.extract.nxv1i8.triscv.vector.tuple_nxv1i8_4t(target("riscv.vector.tuple", <vscale x 1 x i8>, 4) %0, i32 1) - ret <vscale x 1 x i8> %1 + ret target("riscv.vector.tuple", <vscale x 1 x i8>, 4) %0 } - -define <vscale x 1 x i8> @test_vlseg4_mask_nxv1i8_triscv.vector.tuple_nxv1i8_4t(ptr %base, i64 %vl, <vscale x 1 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 1 x i8>, 4) @test_vlseg4_mask_nxv1i8_triscv.vector.tuple_nxv1i8_4t(ptr %base, i64 %vl, <vscale x 1 x i1> %mask) { ; CHECK-LABEL: test_vlseg4_mask_nxv1i8_triscv.vector.tuple_nxv1i8_4t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, ma -; CHECK-NEXT: vlseg4e8.v v7, (a0), v0.t +; CHECK-NEXT: vlseg4e8.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 1 x i8>, 4) @llvm.riscv.vlseg4.mask.triscv.vector.tuple_nxv1i8_4t.nxv1i1(target("riscv.vector.tuple", <vscale x 1 x i8>, 4) undef, ptr %base, <vscale x 1 x i1> %mask, i64 %vl, i64 1, i64 3) - %1 = call <vscale x 1 x i8> @llvm.riscv.tuple.extract.nxv1i8.triscv.vector.tuple_nxv1i8_4t(target("riscv.vector.tuple", <vscale x 1 x i8>, 4) %0, i32 1) - ret <vscale x 1 x i8> %1 + ret target("riscv.vector.tuple", <vscale x 1 x i8>, 4) %0 } - -define <vscale x 1 x i8> @test_vlseg4_allonesmask_nxv1i8_triscv.vector.tuple_nxv1i8_4t(ptr %base, i64 %vl, <vscale x 1 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 1 x i8>, 4) @test_vlseg4_allonesmask_nxv1i8_triscv.vector.tuple_nxv1i8_4t(ptr %base, i64 %vl, <vscale x 1 x i1> %mask) { ; CHECK-LABEL: test_vlseg4_allonesmask_nxv1i8_triscv.vector.tuple_nxv1i8_4t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, ma -; CHECK-NEXT: vlseg4e8.v v7, (a0) +; CHECK-NEXT: vlseg4e8.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 1 x i8>, 4) @llvm.riscv.vlseg4.mask.triscv.vector.tuple_nxv1i8_4t.nxv1i1(target("riscv.vector.tuple", <vscale x 1 x i8>, 4) undef, ptr %base, <vscale x 1 x i1> splat (i1 true), i64 %vl, i64 1, i64 3) - %1 = call <vscale x 1 x i8> @llvm.riscv.tuple.extract.nxv1i8.triscv.vector.tuple_nxv1i8_4t(target("riscv.vector.tuple", <vscale x 1 x i8>, 4) %0, i32 1) - ret <vscale x 1 x i8> %1 + ret target("riscv.vector.tuple", <vscale x 1 x i8>, 4) %0 } - -declare target("riscv.vector.tuple", <vscale x 2 x i8>, 4) @llvm.riscv.vlseg4.triscv.vector.tuple_nxv2i8_4t(target("riscv.vector.tuple", <vscale x 2 x i8>, 4), ptr, i64, i64) -declare target("riscv.vector.tuple", <vscale x 2 x i8>, 4) @llvm.riscv.vlseg4.mask.triscv.vector.tuple_nxv2i8_4t.nxv2i1(target("riscv.vector.tuple", <vscale x 2 x i8>, 4), ptr, <vscale x 2 x i1>, i64, i64, i64) - -define <vscale x 2 x i8> @test_vlseg4_nxv2i8_triscv.vector.tuple_nxv2i8_4t(ptr %base, i64 %vl) { +define target("riscv.vector.tuple", <vscale x 2 x i8>, 4) @test_vlseg4_nxv2i8_triscv.vector.tuple_nxv2i8_4t(ptr %base, i64 %vl) { ; CHECK-LABEL: test_vlseg4_nxv2i8_triscv.vector.tuple_nxv2i8_4t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, ma -; CHECK-NEXT: vlseg4e8.v v7, (a0) +; CHECK-NEXT: vlseg4e8.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 2 x i8>, 4) @llvm.riscv.vlseg4.triscv.vector.tuple_nxv2i8_4t(target("riscv.vector.tuple", <vscale x 2 x i8>, 4) undef, ptr %base, i64 %vl, i64 3) - %1 = call <vscale x 2 x i8> @llvm.riscv.tuple.extract.nxv2i8.triscv.vector.tuple_nxv2i8_4t(target("riscv.vector.tuple", <vscale x 2 x i8>, 4) %0, i32 1) - ret <vscale x 2 x i8> %1 + ret target("riscv.vector.tuple", <vscale x 2 x i8>, 4) %0 } - -define <vscale x 2 x i8> @test_vlseg4_mask_nxv2i8_triscv.vector.tuple_nxv2i8_4t(ptr %base, i64 %vl, <vscale x 2 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 2 x i8>, 4) @test_vlseg4_mask_nxv2i8_triscv.vector.tuple_nxv2i8_4t(ptr %base, i64 %vl, <vscale x 2 x i1> %mask) { ; CHECK-LABEL: test_vlseg4_mask_nxv2i8_triscv.vector.tuple_nxv2i8_4t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, ma -; CHECK-NEXT: vlseg4e8.v v7, (a0), v0.t +; CHECK-NEXT: vlseg4e8.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 2 x i8>, 4) @llvm.riscv.vlseg4.mask.triscv.vector.tuple_nxv2i8_4t.nxv2i1(target("riscv.vector.tuple", <vscale x 2 x i8>, 4) undef, ptr %base, <vscale x 2 x i1> %mask, i64 %vl, i64 1, i64 3) - %1 = call <vscale x 2 x i8> @llvm.riscv.tuple.extract.nxv2i8.triscv.vector.tuple_nxv2i8_4t(target("riscv.vector.tuple", <vscale x 2 x i8>, 4) %0, i32 1) - ret <vscale x 2 x i8> %1 + ret target("riscv.vector.tuple", <vscale x 2 x i8>, 4) %0 } - -declare target("riscv.vector.tuple", <vscale x 4 x i8>, 4) @llvm.riscv.vlseg4.triscv.vector.tuple_nxv4i8_4t(target("riscv.vector.tuple", <vscale x 4 x i8>, 4), ptr, i64, i64) -declare target("riscv.vector.tuple", <vscale x 4 x i8>, 4) @llvm.riscv.vlseg4.mask.triscv.vector.tuple_nxv4i8_4t.nxv4i1(target("riscv.vector.tuple", <vscale x 4 x i8>, 4), ptr, <vscale x 4 x i1>, i64, i64, i64) - -define <vscale x 4 x i8> @test_vlseg4_nxv4i8_triscv.vector.tuple_nxv4i8_4t(ptr %base, i64 %vl) { +define target("riscv.vector.tuple", <vscale x 4 x i8>, 4) @test_vlseg4_nxv4i8_triscv.vector.tuple_nxv4i8_4t(ptr %base, i64 %vl) { ; CHECK-LABEL: test_vlseg4_nxv4i8_triscv.vector.tuple_nxv4i8_4t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, ma -; CHECK-NEXT: vlseg4e8.v v7, (a0) +; CHECK-NEXT: vlseg4e8.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 4) @llvm.riscv.vlseg4.triscv.vector.tuple_nxv4i8_4t(target("riscv.vector.tuple", <vscale x 4 x i8>, 4) undef, ptr %base, i64 %vl, i64 3) - %1 = call <vscale x 4 x i8> @llvm.riscv.tuple.extract.nxv4i8.triscv.vector.tuple_nxv4i8_4t(target("riscv.vector.tuple", <vscale x 4 x i8>, 4) %0, i32 1) - ret <vscale x 4 x i8> %1 + ret target("riscv.vector.tuple", <vscale x 4 x i8>, 4) %0 } - -define <vscale x 4 x i8> @test_vlseg4_mask_nxv4i8_triscv.vector.tuple_nxv4i8_4t(ptr %base, i64 %vl, <vscale x 4 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 4 x i8>, 4) @test_vlseg4_mask_nxv4i8_triscv.vector.tuple_nxv4i8_4t(ptr %base, i64 %vl, <vscale x 4 x i1> %mask) { ; CHECK-LABEL: test_vlseg4_mask_nxv4i8_triscv.vector.tuple_nxv4i8_4t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, ma -; CHECK-NEXT: vlseg4e8.v v7, (a0), v0.t +; CHECK-NEXT: vlseg4e8.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 4) @llvm.riscv.vlseg4.mask.triscv.vector.tuple_nxv4i8_4t.nxv4i1(target("riscv.vector.tuple", <vscale x 4 x i8>, 4) undef, ptr %base, <vscale x 4 x i1> %mask, i64 %vl, i64 1, i64 3) - %1 = call <vscale x 4 x i8> @llvm.riscv.tuple.extract.nxv4i8.triscv.vector.tuple_nxv4i8_4t(target("riscv.vector.tuple", <vscale x 4 x i8>, 4) %0, i32 1) - ret <vscale x 4 x i8> %1 + ret target("riscv.vector.tuple", <vscale x 4 x i8>, 4) %0 } - -declare target("riscv.vector.tuple", <vscale x 8 x i8>, 4) @llvm.riscv.vlseg4.triscv.vector.tuple_nxv8i8_4t(target("riscv.vector.tuple", <vscale x 8 x i8>, 4), ptr, i64, i64) -declare target("riscv.vector.tuple", <vscale x 8 x i8>, 4) @llvm.riscv.vlseg4.mask.triscv.vector.tuple_nxv8i8_4t.nxv8i1(target("riscv.vector.tuple", <vscale x 8 x i8>, 4), ptr, <vscale x 8 x i1>, i64, i64, i64) - -define <vscale x 8 x i8> @test_vlseg4_nxv8i8_triscv.vector.tuple_nxv8i8_4t(ptr %base, i64 %vl) { +define target("riscv.vector.tuple", <vscale x 8 x i8>, 4) @test_vlseg4_nxv8i8_triscv.vector.tuple_nxv8i8_4t(ptr %base, i64 %vl) { ; CHECK-LABEL: test_vlseg4_nxv8i8_triscv.vector.tuple_nxv8i8_4t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, ma -; CHECK-NEXT: vlseg4e8.v v7, (a0) +; CHECK-NEXT: vlseg4e8.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 4) @llvm.riscv.vlseg4.triscv.vector.tuple_nxv8i8_4t(target("riscv.vector.tuple", <vscale x 8 x i8>, 4) undef, ptr %base, i64 %vl, i64 3) - %1 = call <vscale x 8 x i8> @llvm.riscv.tuple.extract.nxv8i8.triscv.vector.tuple_nxv8i8_4t(target("riscv.vector.tuple", <vscale x 8 x i8>, 4) %0, i32 1) - ret <vscale x 8 x i8> %1 + ret target("riscv.vector.tuple", <vscale x 8 x i8>, 4) %0 } - -define <vscale x 8 x i8> @test_vlseg4_mask_nxv8i8_triscv.vector.tuple_nxv8i8_4t(ptr %base, i64 %vl, <vscale x 8 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 8 x i8>, 4) @test_vlseg4_mask_nxv8i8_triscv.vector.tuple_nxv8i8_4t(ptr %base, i64 %vl, <vscale x 8 x i1> %mask) { ; CHECK-LABEL: test_vlseg4_mask_nxv8i8_triscv.vector.tuple_nxv8i8_4t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, ma -; CHECK-NEXT: vlseg4e8.v v7, (a0), v0.t +; CHECK-NEXT: vlseg4e8.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 4) @llvm.riscv.vlseg4.mask.triscv.vector.tuple_nxv8i8_4t.nxv8i1(target("riscv.vector.tuple", <vscale x 8 x i8>, 4) undef, ptr %base, <vscale x 8 x i1> %mask, i64 %vl, i64 1, i64 3) - %1 = call <vscale x 8 x i8> @llvm.riscv.tuple.extract.nxv8i8.triscv.vector.tuple_nxv8i8_4t(target("riscv.vector.tuple", <vscale x 8 x i8>, 4) %0, i32 1) - ret <vscale x 8 x i8> %1 + ret target("riscv.vector.tuple", <vscale x 8 x i8>, 4) %0 } - -declare target("riscv.vector.tuple", <vscale x 16 x i8>, 4) @llvm.riscv.vlseg4.triscv.vector.tuple_nxv16i8_4t(target("riscv.vector.tuple", <vscale x 16 x i8>, 4), ptr, i64, i64) -declare target("riscv.vector.tuple", <vscale x 16 x i8>, 4) @llvm.riscv.vlseg4.mask.triscv.vector.tuple_nxv16i8_4t.nxv16i1(target("riscv.vector.tuple", <vscale x 16 x i8>, 4), ptr, <vscale x 16 x i1>, i64, i64, i64) - -define <vscale x 16 x i8> @test_vlseg4_nxv16i8_triscv.vector.tuple_nxv16i8_4t(ptr %base, i64 %vl) { +define target("riscv.vector.tuple", <vscale x 16 x i8>, 4) @test_vlseg4_nxv16i8_triscv.vector.tuple_nxv16i8_4t(ptr %base, i64 %vl) { ; CHECK-LABEL: test_vlseg4_nxv16i8_triscv.vector.tuple_nxv16i8_4t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e8, m2, ta, ma -; CHECK-NEXT: vlseg4e8.v v6, (a0) +; CHECK-NEXT: vlseg4e8.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 4) @llvm.riscv.vlseg4.triscv.vector.tuple_nxv16i8_4t(target("riscv.vector.tuple", <vscale x 16 x i8>, 4) undef, ptr %base, i64 %vl, i64 3) - %1 = call <vscale x 16 x i8> @llvm.riscv.tuple.extract.nxv16i8.triscv.vector.tuple_nxv16i8_4t(target("riscv.vector.tuple", <vscale x 16 x i8>, 4) %0, i32 1) - ret <vscale x 16 x i8> %1 + ret target("riscv.vector.tuple", <vscale x 16 x i8>, 4) %0 } - -define <vscale x 16 x i8> @test_vlseg4_mask_nxv16i8_triscv.vector.tuple_nxv16i8_4t(ptr %base, i64 %vl, <vscale x 16 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 16 x i8>, 4) @test_vlseg4_mask_nxv16i8_triscv.vector.tuple_nxv16i8_4t(ptr %base, i64 %vl, <vscale x 16 x i1> %mask) { ; CHECK-LABEL: test_vlseg4_mask_nxv16i8_triscv.vector.tuple_nxv16i8_4t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e8, m2, ta, ma -; CHECK-NEXT: vlseg4e8.v v6, (a0), v0.t +; CHECK-NEXT: vlseg4e8.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 4) @llvm.riscv.vlseg4.mask.triscv.vector.tuple_nxv16i8_4t.nxv16i1(target("riscv.vector.tuple", <vscale x 16 x i8>, 4) undef, ptr %base, <vscale x 16 x i1> %mask, i64 %vl, i64 1, i64 3) - %1 = call <vscale x 16 x i8> @llvm.riscv.tuple.extract.nxv16i8.triscv.vector.tuple_nxv16i8_4t(target("riscv.vector.tuple", <vscale x 16 x i8>, 4) %0, i32 1) - ret <vscale x 16 x i8> %1 + ret target("riscv.vector.tuple", <vscale x 16 x i8>, 4) %0 } - -declare target("riscv.vector.tuple", <vscale x 1 x i8>, 5) @llvm.riscv.vlseg5.triscv.vector.tuple_nxv1i8_5t(target("riscv.vector.tuple", <vscale x 1 x i8>, 5), ptr, i64, i64) -declare target("riscv.vector.tuple", <vscale x 1 x i8>, 5) @llvm.riscv.vlseg5.mask.triscv.vector.tuple_nxv1i8_5t.nxv1i1(target("riscv.vector.tuple", <vscale x 1 x i8>, 5), ptr, <vscale x 1 x i1>, i64, i64, i64) - -define <vscale x 1 x i8> @test_vlseg5_nxv1i8_triscv.vector.tuple_nxv1i8_5t(ptr %base, i64 %vl) { +define target("riscv.vector.tuple", <vscale x 1 x i8>, 5) @test_vlseg5_nxv1i8_triscv.vector.tuple_nxv1i8_5t(ptr %base, i64 %vl) { ; CHECK-LABEL: test_vlseg5_nxv1i8_triscv.vector.tuple_nxv1i8_5t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, ma -; CHECK-NEXT: vlseg5e8.v v7, (a0) +; CHECK-NEXT: vlseg5e8.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 1 x i8>, 5) @llvm.riscv.vlseg5.triscv.vector.tuple_nxv1i8_5t(target("riscv.vector.tuple", <vscale x 1 x i8>, 5) undef, ptr %base, i64 %vl, i64 3) - %1 = call <vscale x 1 x i8> @llvm.riscv.tuple.extract.nxv1i8.triscv.vector.tuple_nxv1i8_5t(target("riscv.vector.tuple", <vscale x 1 x i8>, 5) %0, i32 1) - ret <vscale x 1 x i8> %1 + ret target("riscv.vector.tuple", <vscale x 1 x i8>, 5) %0 } - -define <vscale x 1 x i8> @test_vlseg5_mask_nxv1i8_triscv.vector.tuple_nxv1i8_5t(ptr %base, i64 %vl, <vscale x 1 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 1 x i8>, 5) @test_vlseg5_mask_nxv1i8_triscv.vector.tuple_nxv1i8_5t(ptr %base, i64 %vl, <vscale x 1 x i1> %mask) { ; CHECK-LABEL: test_vlseg5_mask_nxv1i8_triscv.vector.tuple_nxv1i8_5t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, ma -; CHECK-NEXT: vlseg5e8.v v7, (a0), v0.t +; CHECK-NEXT: vlseg5e8.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 1 x i8>, 5) @llvm.riscv.vlseg5.mask.triscv.vector.tuple_nxv1i8_5t.nxv1i1(target("riscv.vector.tuple", <vscale x 1 x i8>, 5) undef, ptr %base, <vscale x 1 x i1> %mask, i64 %vl, i64 1, i64 3) - %1 = call <vscale x 1 x i8> @llvm.riscv.tuple.extract.nxv1i8.triscv.vector.tuple_nxv1i8_5t(target("riscv.vector.tuple", <vscale x 1 x i8>, 5) %0, i32 1) - ret <vscale x 1 x i8> %1 + ret target("riscv.vector.tuple", <vscale x 1 x i8>, 5) %0 } - -define <vscale x 1 x i8> @test_vlseg5_allonesmask_nxv1i8_triscv.vector.tuple_nxv1i8_5t(ptr %base, i64 %vl, <vscale x 1 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 1 x i8>, 5) @test_vlseg5_allonesmask_nxv1i8_triscv.vector.tuple_nxv1i8_5t(ptr %base, i64 %vl, <vscale x 1 x i1> %mask) { ; CHECK-LABEL: test_vlseg5_allonesmask_nxv1i8_triscv.vector.tuple_nxv1i8_5t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, ma -; CHECK-NEXT: vlseg5e8.v v7, (a0) +; CHECK-NEXT: vlseg5e8.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 1 x i8>, 5) @llvm.riscv.vlseg5.mask.triscv.vector.tuple_nxv1i8_5t.nxv1i1(target("riscv.vector.tuple", <vscale x 1 x i8>, 5) undef, ptr %base, <vscale x 1 x i1> splat (i1 true), i64 %vl, i64 1, i64 3) - %1 = call <vscale x 1 x i8> @llvm.riscv.tuple.extract.nxv1i8.triscv.vector.tuple_nxv1i8_5t(target("riscv.vector.tuple", <vscale x 1 x i8>, 5) %0, i32 1) - ret <vscale x 1 x i8> %1 + ret target("riscv.vector.tuple", <vscale x 1 x i8>, 5) %0 } - -declare target("riscv.vector.tuple", <vscale x 2 x i8>, 5) @llvm.riscv.vlseg5.triscv.vector.tuple_nxv2i8_5t(target("riscv.vector.tuple", <vscale x 2 x i8>, 5), ptr, i64, i64) -declare target("riscv.vector.tuple", <vscale x 2 x i8>, 5) @llvm.riscv.vlseg5.mask.triscv.vector.tuple_nxv2i8_5t.nxv2i1(target("riscv.vector.tuple", <vscale x 2 x i8>, 5), ptr, <vscale x 2 x i1>, i64, i64, i64) - -define <vscale x 2 x i8> @test_vlseg5_nxv2i8_triscv.vector.tuple_nxv2i8_5t(ptr %base, i64 %vl) { +define target("riscv.vector.tuple", <vscale x 2 x i8>, 5) @test_vlseg5_nxv2i8_triscv.vector.tuple_nxv2i8_5t(ptr %base, i64 %vl) { ; CHECK-LABEL: test_vlseg5_nxv2i8_triscv.vector.tuple_nxv2i8_5t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, ma -; CHECK-NEXT: vlseg5e8.v v7, (a0) +; CHECK-NEXT: vlseg5e8.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 2 x i8>, 5) @llvm.riscv.vlseg5.triscv.vector.tuple_nxv2i8_5t(target("riscv.vector.tuple", <vscale x 2 x i8>, 5) undef, ptr %base, i64 %vl, i64 3) - %1 = call <vscale x 2 x i8> @llvm.riscv.tuple.extract.nxv2i8.triscv.vector.tuple_nxv2i8_5t(target("riscv.vector.tuple", <vscale x 2 x i8>, 5) %0, i32 1) - ret <vscale x 2 x i8> %1 + ret target("riscv.vector.tuple", <vscale x 2 x i8>, 5) %0 } - -define <vscale x 2 x i8> @test_vlseg5_mask_nxv2i8_triscv.vector.tuple_nxv2i8_5t(ptr %base, i64 %vl, <vscale x 2 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 2 x i8>, 5) @test_vlseg5_mask_nxv2i8_triscv.vector.tuple_nxv2i8_5t(ptr %base, i64 %vl, <vscale x 2 x i1> %mask) { ; CHECK-LABEL: test_vlseg5_mask_nxv2i8_triscv.vector.tuple_nxv2i8_5t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, ma -; CHECK-NEXT: vlseg5e8.v v7, (a0), v0.t +; CHECK-NEXT: vlseg5e8.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 2 x i8>, 5) @llvm.riscv.vlseg5.mask.triscv.vector.tuple_nxv2i8_5t.nxv2i1(target("riscv.vector.tuple", <vscale x 2 x i8>, 5) undef, ptr %base, <vscale x 2 x i1> %mask, i64 %vl, i64 1, i64 3) - %1 = call <vscale x 2 x i8> @llvm.riscv.tuple.extract.nxv2i8.triscv.vector.tuple_nxv2i8_5t(target("riscv.vector.tuple", <vscale x 2 x i8>, 5) %0, i32 1) - ret <vscale x 2 x i8> %1 + ret target("riscv.vector.tuple", <vscale x 2 x i8>, 5) %0 } - -declare target("riscv.vector.tuple", <vscale x 4 x i8>, 5) @llvm.riscv.vlseg5.triscv.vector.tuple_nxv4i8_5t(target("riscv.vector.tuple", <vscale x 4 x i8>, 5), ptr, i64, i64) -declare target("riscv.vector.tuple", <vscale x 4 x i8>, 5) @llvm.riscv.vlseg5.mask.triscv.vector.tuple_nxv4i8_5t.nxv4i1(target("riscv.vector.tuple", <vscale x 4 x i8>, 5), ptr, <vscale x 4 x i1>, i64, i64, i64) - -define <vscale x 4 x i8> @test_vlseg5_nxv4i8_triscv.vector.tuple_nxv4i8_5t(ptr %base, i64 %vl) { +define target("riscv.vector.tuple", <vscale x 4 x i8>, 5) @test_vlseg5_nxv4i8_triscv.vector.tuple_nxv4i8_5t(ptr %base, i64 %vl) { ; CHECK-LABEL: test_vlseg5_nxv4i8_triscv.vector.tuple_nxv4i8_5t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, ma -; CHECK-NEXT: vlseg5e8.v v7, (a0) +; CHECK-NEXT: vlseg5e8.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 5) @llvm.riscv.vlseg5.triscv.vector.tuple_nxv4i8_5t(target("riscv.vector.tuple", <vscale x 4 x i8>, 5) undef, ptr %base, i64 %vl, i64 3) - %1 = call <vscale x 4 x i8> @llvm.riscv.tuple.extract.nxv4i8.triscv.vector.tuple_nxv4i8_5t(target("riscv.vector.tuple", <vscale x 4 x i8>, 5) %0, i32 1) - ret <vscale x 4 x i8> %1 + ret target("riscv.vector.tuple", <vscale x 4 x i8>, 5) %0 } - -define <vscale x 4 x i8> @test_vlseg5_mask_nxv4i8_triscv.vector.tuple_nxv4i8_5t(ptr %base, i64 %vl, <vscale x 4 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 4 x i8>, 5) @test_vlseg5_mask_nxv4i8_triscv.vector.tuple_nxv4i8_5t(ptr %base, i64 %vl, <vscale x 4 x i1> %mask) { ; CHECK-LABEL: test_vlseg5_mask_nxv4i8_triscv.vector.tuple_nxv4i8_5t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, ma -; CHECK-NEXT: vlseg5e8.v v7, (a0), v0.t +; CHECK-NEXT: vlseg5e8.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 5) @llvm.riscv.vlseg5.mask.triscv.vector.tuple_nxv4i8_5t.nxv4i1(target("riscv.vector.tuple", <vscale x 4 x i8>, 5) undef, ptr %base, <vscale x 4 x i1> %mask, i64 %vl, i64 1, i64 3) - %1 = call <vscale x 4 x i8> @llvm.riscv.tuple.extract.nxv4i8.triscv.vector.tuple_nxv4i8_5t(target("riscv.vector.tuple", <vscale x 4 x i8>, 5) %0, i32 1) - ret <vscale x 4 x i8> %1 + ret target("riscv.vector.tuple", <vscale x 4 x i8>, 5) %0 } - -declare target("riscv.vector.tuple", <vscale x 8 x i8>, 5) @llvm.riscv.vlseg5.triscv.vector.tuple_nxv8i8_5t(target("riscv.vector.tuple", <vscale x 8 x i8>, 5), ptr, i64, i64) -declare target("riscv.vector.tuple", <vscale x 8 x i8>, 5) @llvm.riscv.vlseg5.mask.triscv.vector.tuple_nxv8i8_5t.nxv8i1(target("riscv.vector.tuple", <vscale x 8 x i8>, 5), ptr, <vscale x 8 x i1>, i64, i64, i64) - -define <vscale x 8 x i8> @test_vlseg5_nxv8i8_triscv.vector.tuple_nxv8i8_5t(ptr %base, i64 %vl) { +define target("riscv.vector.tuple", <vscale x 8 x i8>, 5) @test_vlseg5_nxv8i8_triscv.vector.tuple_nxv8i8_5t(ptr %base, i64 %vl) { ; CHECK-LABEL: test_vlseg5_nxv8i8_triscv.vector.tuple_nxv8i8_5t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, ma -; CHECK-NEXT: vlseg5e8.v v7, (a0) +; CHECK-NEXT: vlseg5e8.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 5) @llvm.riscv.vlseg5.triscv.vector.tuple_nxv8i8_5t(target("riscv.vector.tuple", <vscale x 8 x i8>, 5) undef, ptr %base, i64 %vl, i64 3) - %1 = call <vscale x 8 x i8> @llvm.riscv.tuple.extract.nxv8i8.triscv.vector.tuple_nxv8i8_5t(target("riscv.vector.tuple", <vscale x 8 x i8>, 5) %0, i32 1) - ret <vscale x 8 x i8> %1 + ret target("riscv.vector.tuple", <vscale x 8 x i8>, 5) %0 } - -define <vscale x 8 x i8> @test_vlseg5_mask_nxv8i8_triscv.vector.tuple_nxv8i8_5t(ptr %base, i64 %vl, <vscale x 8 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 8 x i8>, 5) @test_vlseg5_mask_nxv8i8_triscv.vector.tuple_nxv8i8_5t(ptr %base, i64 %vl, <vscale x 8 x i1> %mask) { ; CHECK-LABEL: test_vlseg5_mask_nxv8i8_triscv.vector.tuple_nxv8i8_5t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, ma -; CHECK-NEXT: vlseg5e8.v v7, (a0), v0.t +; CHECK-NEXT: vlseg5e8.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 5) @llvm.riscv.vlseg5.mask.triscv.vector.tuple_nxv8i8_5t.nxv8i1(target("riscv.vector.tuple", <vscale x 8 x i8>, 5) undef, ptr %base, <vscale x 8 x i1> %mask, i64 %vl, i64 1, i64 3) - %1 = call <vscale x 8 x i8> @llvm.riscv.tuple.extract.nxv8i8.triscv.vector.tuple_nxv8i8_5t(target("riscv.vector.tuple", <vscale x 8 x i8>, 5) %0, i32 1) - ret <vscale x 8 x i8> %1 + ret target("riscv.vector.tuple", <vscale x 8 x i8>, 5) %0 } - -declare target("riscv.vector.tuple", <vscale x 1 x i8>, 6) @llvm.riscv.vlseg6.triscv.vector.tuple_nxv1i8_6t(target("riscv.vector.tuple", <vscale x 1 x i8>, 6), ptr, i64, i64) -declare target("riscv.vector.tuple", <vscale x 1 x i8>, 6) @llvm.riscv.vlseg6.mask.triscv.vector.tuple_nxv1i8_6t.nxv1i1(target("riscv.vector.tuple", <vscale x 1 x i8>, 6), ptr, <vscale x 1 x i1>, i64, i64, i64) - -define <vscale x 1 x i8> @test_vlseg6_nxv1i8_triscv.vector.tuple_nxv1i8_6t(ptr %base, i64 %vl) { +define target("riscv.vector.tuple", <vscale x 1 x i8>, 6) @test_vlseg6_nxv1i8_triscv.vector.tuple_nxv1i8_6t(ptr %base, i64 %vl) { ; CHECK-LABEL: test_vlseg6_nxv1i8_triscv.vector.tuple_nxv1i8_6t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, ma -; CHECK-NEXT: vlseg6e8.v v7, (a0) +; CHECK-NEXT: vlseg6e8.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 1 x i8>, 6) @llvm.riscv.vlseg6.triscv.vector.tuple_nxv1i8_6t(target("riscv.vector.tuple", <vscale x 1 x i8>, 6) undef, ptr %base, i64 %vl, i64 3) - %1 = call <vscale x 1 x i8> @llvm.riscv.tuple.extract.nxv1i8.triscv.vector.tuple_nxv1i8_6t(target("riscv.vector.tuple", <vscale x 1 x i8>, 6) %0, i32 1) - ret <vscale x 1 x i8> %1 + ret target("riscv.vector.tuple", <vscale x 1 x i8>, 6) %0 } - -define <vscale x 1 x i8> @test_vlseg6_mask_nxv1i8_triscv.vector.tuple_nxv1i8_6t(ptr %base, i64 %vl, <vscale x 1 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 1 x i8>, 6) @test_vlseg6_mask_nxv1i8_triscv.vector.tuple_nxv1i8_6t(ptr %base, i64 %vl, <vscale x 1 x i1> %mask) { ; CHECK-LABEL: test_vlseg6_mask_nxv1i8_triscv.vector.tuple_nxv1i8_6t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, ma -; CHECK-NEXT: vlseg6e8.v v7, (a0), v0.t +; CHECK-NEXT: vlseg6e8.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 1 x i8>, 6) @llvm.riscv.vlseg6.mask.triscv.vector.tuple_nxv1i8_6t.nxv1i1(target("riscv.vector.tuple", <vscale x 1 x i8>, 6) undef, ptr %base, <vscale x 1 x i1> %mask, i64 %vl, i64 1, i64 3) - %1 = call <vscale x 1 x i8> @llvm.riscv.tuple.extract.nxv1i8.triscv.vector.tuple_nxv1i8_6t(target("riscv.vector.tuple", <vscale x 1 x i8>, 6) %0, i32 1) - ret <vscale x 1 x i8> %1 + ret target("riscv.vector.tuple", <vscale x 1 x i8>, 6) %0 } - -define <vscale x 1 x i8> @test_vlseg6_allonesmask_nxv1i8_triscv.vector.tuple_nxv1i8_6t(ptr %base, i64 %vl, <vscale x 1 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 1 x i8>, 6) @test_vlseg6_allonesmask_nxv1i8_triscv.vector.tuple_nxv1i8_6t(ptr %base, i64 %vl, <vscale x 1 x i1> %mask) { ; CHECK-LABEL: test_vlseg6_allonesmask_nxv1i8_triscv.vector.tuple_nxv1i8_6t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, ma -; CHECK-NEXT: vlseg6e8.v v7, (a0) +; CHECK-NEXT: vlseg6e8.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 1 x i8>, 6) @llvm.riscv.vlseg6.mask.triscv.vector.tuple_nxv1i8_6t.nxv1i1(target("riscv.vector.tuple", <vscale x 1 x i8>, 6) undef, ptr %base, <vscale x 1 x i1> splat (i1 true), i64 %vl, i64 1, i64 3) - %1 = call <vscale x 1 x i8> @llvm.riscv.tuple.extract.nxv1i8.triscv.vector.tuple_nxv1i8_6t(target("riscv.vector.tuple", <vscale x 1 x i8>, 6) %0, i32 1) - ret <vscale x 1 x i8> %1 + ret target("riscv.vector.tuple", <vscale x 1 x i8>, 6) %0 } - -declare target("riscv.vector.tuple", <vscale x 2 x i8>, 6) @llvm.riscv.vlseg6.triscv.vector.tuple_nxv2i8_6t(target("riscv.vector.tuple", <vscale x 2 x i8>, 6), ptr, i64, i64) -declare target("riscv.vector.tuple", <vscale x 2 x i8>, 6) @llvm.riscv.vlseg6.mask.triscv.vector.tuple_nxv2i8_6t.nxv2i1(target("riscv.vector.tuple", <vscale x 2 x i8>, 6), ptr, <vscale x 2 x i1>, i64, i64, i64) - -define <vscale x 2 x i8> @test_vlseg6_nxv2i8_triscv.vector.tuple_nxv2i8_6t(ptr %base, i64 %vl) { +define target("riscv.vector.tuple", <vscale x 2 x i8>, 6) @test_vlseg6_nxv2i8_triscv.vector.tuple_nxv2i8_6t(ptr %base, i64 %vl) { ; CHECK-LABEL: test_vlseg6_nxv2i8_triscv.vector.tuple_nxv2i8_6t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, ma -; CHECK-NEXT: vlseg6e8.v v7, (a0) +; CHECK-NEXT: vlseg6e8.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 2 x i8>, 6) @llvm.riscv.vlseg6.triscv.vector.tuple_nxv2i8_6t(target("riscv.vector.tuple", <vscale x 2 x i8>, 6) undef, ptr %base, i64 %vl, i64 3) - %1 = call <vscale x 2 x i8> @llvm.riscv.tuple.extract.nxv2i8.triscv.vector.tuple_nxv2i8_6t(target("riscv.vector.tuple", <vscale x 2 x i8>, 6) %0, i32 1) - ret <vscale x 2 x i8> %1 + ret target("riscv.vector.tuple", <vscale x 2 x i8>, 6) %0 } - -define <vscale x 2 x i8> @test_vlseg6_mask_nxv2i8_triscv.vector.tuple_nxv2i8_6t(ptr %base, i64 %vl, <vscale x 2 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 2 x i8>, 6) @test_vlseg6_mask_nxv2i8_triscv.vector.tuple_nxv2i8_6t(ptr %base, i64 %vl, <vscale x 2 x i1> %mask) { ; CHECK-LABEL: test_vlseg6_mask_nxv2i8_triscv.vector.tuple_nxv2i8_6t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, ma -; CHECK-NEXT: vlseg6e8.v v7, (a0), v0.t +; CHECK-NEXT: vlseg6e8.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 2 x i8>, 6) @llvm.riscv.vlseg6.mask.triscv.vector.tuple_nxv2i8_6t.nxv2i1(target("riscv.vector.tuple", <vscale x 2 x i8>, 6) undef, ptr %base, <vscale x 2 x i1> %mask, i64 %vl, i64 1, i64 3) - %1 = call <vscale x 2 x i8> @llvm.riscv.tuple.extract.nxv2i8.triscv.vector.tuple_nxv2i8_6t(target("riscv.vector.tuple", <vscale x 2 x i8>, 6) %0, i32 1) - ret <vscale x 2 x i8> %1 + ret target("riscv.vector.tuple", <vscale x 2 x i8>, 6) %0 } - -declare target("riscv.vector.tuple", <vscale x 4 x i8>, 6) @llvm.riscv.vlseg6.triscv.vector.tuple_nxv4i8_6t(target("riscv.vector.tuple", <vscale x 4 x i8>, 6), ptr, i64, i64) -declare target("riscv.vector.tuple", <vscale x 4 x i8>, 6) @llvm.riscv.vlseg6.mask.triscv.vector.tuple_nxv4i8_6t.nxv4i1(target("riscv.vector.tuple", <vscale x 4 x i8>, 6), ptr, <vscale x 4 x i1>, i64, i64, i64) - -define <vscale x 4 x i8> @test_vlseg6_nxv4i8_triscv.vector.tuple_nxv4i8_6t(ptr %base, i64 %vl) { +define target("riscv.vector.tuple", <vscale x 4 x i8>, 6) @test_vlseg6_nxv4i8_triscv.vector.tuple_nxv4i8_6t(ptr %base, i64 %vl) { ; CHECK-LABEL: test_vlseg6_nxv4i8_triscv.vector.tuple_nxv4i8_6t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, ma -; CHECK-NEXT: vlseg6e8.v v7, (a0) +; CHECK-NEXT: vlseg6e8.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 6) @llvm.riscv.vlseg6.triscv.vector.tuple_nxv4i8_6t(target("riscv.vector.tuple", <vscale x 4 x i8>, 6) undef, ptr %base, i64 %vl, i64 3) - %1 = call <vscale x 4 x i8> @llvm.riscv.tuple.extract.nxv4i8.triscv.vector.tuple_nxv4i8_6t(target("riscv.vector.tuple", <vscale x 4 x i8>, 6) %0, i32 1) - ret <vscale x 4 x i8> %1 + ret target("riscv.vector.tuple", <vscale x 4 x i8>, 6) %0 } - -define <vscale x 4 x i8> @test_vlseg6_mask_nxv4i8_triscv.vector.tuple_nxv4i8_6t(ptr %base, i64 %vl, <vscale x 4 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 4 x i8>, 6) @test_vlseg6_mask_nxv4i8_triscv.vector.tuple_nxv4i8_6t(ptr %base, i64 %vl, <vscale x 4 x i1> %mask) { ; CHECK-LABEL: test_vlseg6_mask_nxv4i8_triscv.vector.tuple_nxv4i8_6t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, ma -; CHECK-NEXT: vlseg6e8.v v7, (a0), v0.t +; CHECK-NEXT: vlseg6e8.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 6) @llvm.riscv.vlseg6.mask.triscv.vector.tuple_nxv4i8_6t.nxv4i1(target("riscv.vector.tuple", <vscale x 4 x i8>, 6) undef, ptr %base, <vscale x 4 x i1> %mask, i64 %vl, i64 1, i64 3) - %1 = call <vscale x 4 x i8> @llvm.riscv.tuple.extract.nxv4i8.triscv.vector.tuple_nxv4i8_6t(target("riscv.vector.tuple", <vscale x 4 x i8>, 6) %0, i32 1) - ret <vscale x 4 x i8> %1 + ret target("riscv.vector.tuple", <vscale x 4 x i8>, 6) %0 } - -declare target("riscv.vector.tuple", <vscale x 8 x i8>, 6) @llvm.riscv.vlseg6.triscv.vector.tuple_nxv8i8_6t(target("riscv.vector.tuple", <vscale x 8 x i8>, 6), ptr, i64, i64) -declare target("riscv.vector.tuple", <vscale x 8 x i8>, 6) @llvm.riscv.vlseg6.mask.triscv.vector.tuple_nxv8i8_6t.nxv8i1(target("riscv.vector.tuple", <vscale x 8 x i8>, 6), ptr, <vscale x 8 x i1>, i64, i64, i64) - -define <vscale x 8 x i8> @test_vlseg6_nxv8i8_triscv.vector.tuple_nxv8i8_6t(ptr %base, i64 %vl) { +define target("riscv.vector.tuple", <vscale x 8 x i8>, 6) @test_vlseg6_nxv8i8_triscv.vector.tuple_nxv8i8_6t(ptr %base, i64 %vl) { ; CHECK-LABEL: test_vlseg6_nxv8i8_triscv.vector.tuple_nxv8i8_6t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, ma -; CHECK-NEXT: vlseg6e8.v v7, (a0) +; CHECK-NEXT: vlseg6e8.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 6) @llvm.riscv.vlseg6.triscv.vector.tuple_nxv8i8_6t(target("riscv.vector.tuple", <vscale x 8 x i8>, 6) undef, ptr %base, i64 %vl, i64 3) - %1 = call <vscale x 8 x i8> @llvm.riscv.tuple.extract.nxv8i8.triscv.vector.tuple_nxv8i8_6t(target("riscv.vector.tuple", <vscale x 8 x i8>, 6) %0, i32 1) - ret <vscale x 8 x i8> %1 + ret target("riscv.vector.tuple", <vscale x 8 x i8>, 6) %0 } - -define <vscale x 8 x i8> @test_vlseg6_mask_nxv8i8_triscv.vector.tuple_nxv8i8_6t(ptr %base, i64 %vl, <vscale x 8 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 8 x i8>, 6) @test_vlseg6_mask_nxv8i8_triscv.vector.tuple_nxv8i8_6t(ptr %base, i64 %vl, <vscale x 8 x i1> %mask) { ; CHECK-LABEL: test_vlseg6_mask_nxv8i8_triscv.vector.tuple_nxv8i8_6t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, ma -; CHECK-NEXT: vlseg6e8.v v7, (a0), v0.t +; CHECK-NEXT: vlseg6e8.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 6) @llvm.riscv.vlseg6.mask.triscv.vector.tuple_nxv8i8_6t.nxv8i1(target("riscv.vector.tuple", <vscale x 8 x i8>, 6) undef, ptr %base, <vscale x 8 x i1> %mask, i64 %vl, i64 1, i64 3) - %1 = call <vscale x 8 x i8> @llvm.riscv.tuple.extract.nxv8i8.triscv.vector.tuple_nxv8i8_6t(target("riscv.vector.tuple", <vscale x 8 x i8>, 6) %0, i32 1) - ret <vscale x 8 x i8> %1 + ret target("riscv.vector.tuple", <vscale x 8 x i8>, 6) %0 } - -declare target("riscv.vector.tuple", <vscale x 1 x i8>, 7) @llvm.riscv.vlseg7.triscv.vector.tuple_nxv1i8_7t(target("riscv.vector.tuple", <vscale x 1 x i8>, 7), ptr, i64, i64) -declare target("riscv.vector.tuple", <vscale x 1 x i8>, 7) @llvm.riscv.vlseg7.mask.triscv.vector.tuple_nxv1i8_7t.nxv1i1(target("riscv.vector.tuple", <vscale x 1 x i8>, 7), ptr, <vscale x 1 x i1>, i64, i64, i64) - -define <vscale x 1 x i8> @test_vlseg7_nxv1i8_triscv.vector.tuple_nxv1i8_7t(ptr %base, i64 %vl) { +define target("riscv.vector.tuple", <vscale x 1 x i8>, 7) @test_vlseg7_nxv1i8_triscv.vector.tuple_nxv1i8_7t(ptr %base, i64 %vl) { ; CHECK-LABEL: test_vlseg7_nxv1i8_triscv.vector.tuple_nxv1i8_7t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, ma -; CHECK-NEXT: vlseg7e8.v v7, (a0) +; CHECK-NEXT: vlseg7e8.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 1 x i8>, 7) @llvm.riscv.vlseg7.triscv.vector.tuple_nxv1i8_7t(target("riscv.vector.tuple", <vscale x 1 x i8>, 7) undef, ptr %base, i64 %vl, i64 3) - %1 = call <vscale x 1 x i8> @llvm.riscv.tuple.extract.nxv1i8.triscv.vector.tuple_nxv1i8_7t(target("riscv.vector.tuple", <vscale x 1 x i8>, 7) %0, i32 1) - ret <vscale x 1 x i8> %1 + ret target("riscv.vector.tuple", <vscale x 1 x i8>, 7) %0 } - -define <vscale x 1 x i8> @test_vlseg7_mask_nxv1i8_triscv.vector.tuple_nxv1i8_7t(ptr %base, i64 %vl, <vscale x 1 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 1 x i8>, 7) @test_vlseg7_mask_nxv1i8_triscv.vector.tuple_nxv1i8_7t(ptr %base, i64 %vl, <vscale x 1 x i1> %mask) { ; CHECK-LABEL: test_vlseg7_mask_nxv1i8_triscv.vector.tuple_nxv1i8_7t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, ma -; CHECK-NEXT: vlseg7e8.v v7, (a0), v0.t +; CHECK-NEXT: vlseg7e8.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 1 x i8>, 7) @llvm.riscv.vlseg7.mask.triscv.vector.tuple_nxv1i8_7t.nxv1i1(target("riscv.vector.tuple", <vscale x 1 x i8>, 7) undef, ptr %base, <vscale x 1 x i1> %mask, i64 %vl, i64 1, i64 3) - %1 = call <vscale x 1 x i8> @llvm.riscv.tuple.extract.nxv1i8.triscv.vector.tuple_nxv1i8_7t(target("riscv.vector.tuple", <vscale x 1 x i8>, 7) %0, i32 1) - ret <vscale x 1 x i8> %1 + ret target("riscv.vector.tuple", <vscale x 1 x i8>, 7) %0 } - -define <vscale x 1 x i8> @test_vlseg7_allonesmask_nxv1i8_triscv.vector.tuple_nxv1i8_7t(ptr %base, i64 %vl, <vscale x 1 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 1 x i8>, 7) @test_vlseg7_allonesmask_nxv1i8_triscv.vector.tuple_nxv1i8_7t(ptr %base, i64 %vl, <vscale x 1 x i1> %mask) { ; CHECK-LABEL: test_vlseg7_allonesmask_nxv1i8_triscv.vector.tuple_nxv1i8_7t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, ma -; CHECK-NEXT: vlseg7e8.v v7, (a0) +; CHECK-NEXT: vlseg7e8.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 1 x i8>, 7) @llvm.riscv.vlseg7.mask.triscv.vector.tuple_nxv1i8_7t.nxv1i1(target("riscv.vector.tuple", <vscale x 1 x i8>, 7) undef, ptr %base, <vscale x 1 x i1> splat (i1 true), i64 %vl, i64 1, i64 3) - %1 = call <vscale x 1 x i8> @llvm.riscv.tuple.extract.nxv1i8.triscv.vector.tuple_nxv1i8_7t(target("riscv.vector.tuple", <vscale x 1 x i8>, 7) %0, i32 1) - ret <vscale x 1 x i8> %1 + ret target("riscv.vector.tuple", <vscale x 1 x i8>, 7) %0 } - -declare target("riscv.vector.tuple", <vscale x 2 x i8>, 7) @llvm.riscv.vlseg7.triscv.vector.tuple_nxv2i8_7t(target("riscv.vector.tuple", <vscale x 2 x i8>, 7), ptr, i64, i64) -declare target("riscv.vector.tuple", <vscale x 2 x i8>, 7) @llvm.riscv.vlseg7.mask.triscv.vector.tuple_nxv2i8_7t.nxv2i1(target("riscv.vector.tuple", <vscale x 2 x i8>, 7), ptr, <vscale x 2 x i1>, i64, i64, i64) - -define <vscale x 2 x i8> @test_vlseg7_nxv2i8_triscv.vector.tuple_nxv2i8_7t(ptr %base, i64 %vl) { +define target("riscv.vector.tuple", <vscale x 2 x i8>, 7) @test_vlseg7_nxv2i8_triscv.vector.tuple_nxv2i8_7t(ptr %base, i64 %vl) { ; CHECK-LABEL: test_vlseg7_nxv2i8_triscv.vector.tuple_nxv2i8_7t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, ma -; CHECK-NEXT: vlseg7e8.v v7, (a0) +; CHECK-NEXT: vlseg7e8.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 2 x i8>, 7) @llvm.riscv.vlseg7.triscv.vector.tuple_nxv2i8_7t(target("riscv.vector.tuple", <vscale x 2 x i8>, 7) undef, ptr %base, i64 %vl, i64 3) - %1 = call <vscale x 2 x i8> @llvm.riscv.tuple.extract.nxv2i8.triscv.vector.tuple_nxv2i8_7t(target("riscv.vector.tuple", <vscale x 2 x i8>, 7) %0, i32 1) - ret <vscale x 2 x i8> %1 + ret target("riscv.vector.tuple", <vscale x 2 x i8>, 7) %0 } - -define <vscale x 2 x i8> @test_vlseg7_mask_nxv2i8_triscv.vector.tuple_nxv2i8_7t(ptr %base, i64 %vl, <vscale x 2 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 2 x i8>, 7) @test_vlseg7_mask_nxv2i8_triscv.vector.tuple_nxv2i8_7t(ptr %base, i64 %vl, <vscale x 2 x i1> %mask) { ; CHECK-LABEL: test_vlseg7_mask_nxv2i8_triscv.vector.tuple_nxv2i8_7t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, ma -; CHECK-NEXT: vlseg7e8.v v7, (a0), v0.t +; CHECK-NEXT: vlseg7e8.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 2 x i8>, 7) @llvm.riscv.vlseg7.mask.triscv.vector.tuple_nxv2i8_7t.nxv2i1(target("riscv.vector.tuple", <vscale x 2 x i8>, 7) undef, ptr %base, <vscale x 2 x i1> %mask, i64 %vl, i64 1, i64 3) - %1 = call <vscale x 2 x i8> @llvm.riscv.tuple.extract.nxv2i8.triscv.vector.tuple_nxv2i8_7t(target("riscv.vector.tuple", <vscale x 2 x i8>, 7) %0, i32 1) - ret <vscale x 2 x i8> %1 + ret target("riscv.vector.tuple", <vscale x 2 x i8>, 7) %0 } - -declare target("riscv.vector.tuple", <vscale x 4 x i8>, 7) @llvm.riscv.vlseg7.triscv.vector.tuple_nxv4i8_7t(target("riscv.vector.tuple", <vscale x 4 x i8>, 7), ptr, i64, i64) -declare target("riscv.vector.tuple", <vscale x 4 x i8>, 7) @llvm.riscv.vlseg7.mask.triscv.vector.tuple_nxv4i8_7t.nxv4i1(target("riscv.vector.tuple", <vscale x 4 x i8>, 7), ptr, <vscale x 4 x i1>, i64, i64, i64) - -define <vscale x 4 x i8> @test_vlseg7_nxv4i8_triscv.vector.tuple_nxv4i8_7t(ptr %base, i64 %vl) { +define target("riscv.vector.tuple", <vscale x 4 x i8>, 7) @test_vlseg7_nxv4i8_triscv.vector.tuple_nxv4i8_7t(ptr %base, i64 %vl) { ; CHECK-LABEL: test_vlseg7_nxv4i8_triscv.vector.tuple_nxv4i8_7t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, ma -; CHECK-NEXT: vlseg7e8.v v7, (a0) +; CHECK-NEXT: vlseg7e8.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 7) @llvm.riscv.vlseg7.triscv.vector.tuple_nxv4i8_7t(target("riscv.vector.tuple", <vscale x 4 x i8>, 7) undef, ptr %base, i64 %vl, i64 3) - %1 = call <vscale x 4 x i8> @llvm.riscv.tuple.extract.nxv4i8.triscv.vector.tuple_nxv4i8_7t(target("riscv.vector.tuple", <vscale x 4 x i8>, 7) %0, i32 1) - ret <vscale x 4 x i8> %1 + ret target("riscv.vector.tuple", <vscale x 4 x i8>, 7) %0 } - -define <vscale x 4 x i8> @test_vlseg7_mask_nxv4i8_triscv.vector.tuple_nxv4i8_7t(ptr %base, i64 %vl, <vscale x 4 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 4 x i8>, 7) @test_vlseg7_mask_nxv4i8_triscv.vector.tuple_nxv4i8_7t(ptr %base, i64 %vl, <vscale x 4 x i1> %mask) { ; CHECK-LABEL: test_vlseg7_mask_nxv4i8_triscv.vector.tuple_nxv4i8_7t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, ma -; CHECK-NEXT: vlseg7e8.v v7, (a0), v0.t +; CHECK-NEXT: vlseg7e8.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 7) @llvm.riscv.vlseg7.mask.triscv.vector.tuple_nxv4i8_7t.nxv4i1(target("riscv.vector.tuple", <vscale x 4 x i8>, 7) undef, ptr %base, <vscale x 4 x i1> %mask, i64 %vl, i64 1, i64 3) - %1 = call <vscale x 4 x i8> @llvm.riscv.tuple.extract.nxv4i8.triscv.vector.tuple_nxv4i8_7t(target("riscv.vector.tuple", <vscale x 4 x i8>, 7) %0, i32 1) - ret <vscale x 4 x i8> %1 + ret target("riscv.vector.tuple", <vscale x 4 x i8>, 7) %0 } - -declare target("riscv.vector.tuple", <vscale x 8 x i8>, 7) @llvm.riscv.vlseg7.triscv.vector.tuple_nxv8i8_7t(target("riscv.vector.tuple", <vscale x 8 x i8>, 7), ptr, i64, i64) -declare target("riscv.vector.tuple", <vscale x 8 x i8>, 7) @llvm.riscv.vlseg7.mask.triscv.vector.tuple_nxv8i8_7t.nxv8i1(target("riscv.vector.tuple", <vscale x 8 x i8>, 7), ptr, <vscale x 8 x i1>, i64, i64, i64) - -define <vscale x 8 x i8> @test_vlseg7_nxv8i8_triscv.vector.tuple_nxv8i8_7t(ptr %base, i64 %vl) { +define target("riscv.vector.tuple", <vscale x 8 x i8>, 7) @test_vlseg7_nxv8i8_triscv.vector.tuple_nxv8i8_7t(ptr %base, i64 %vl) { ; CHECK-LABEL: test_vlseg7_nxv8i8_triscv.vector.tuple_nxv8i8_7t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, ma -; CHECK-NEXT: vlseg7e8.v v7, (a0) +; CHECK-NEXT: vlseg7e8.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 7) @llvm.riscv.vlseg7.triscv.vector.tuple_nxv8i8_7t(target("riscv.vector.tuple", <vscale x 8 x i8>, 7) undef, ptr %base, i64 %vl, i64 3) - %1 = call <vscale x 8 x i8> @llvm.riscv.tuple.extract.nxv8i8.triscv.vector.tuple_nxv8i8_7t(target("riscv.vector.tuple", <vscale x 8 x i8>, 7) %0, i32 1) - ret <vscale x 8 x i8> %1 + ret target("riscv.vector.tuple", <vscale x 8 x i8>, 7) %0 } - -define <vscale x 8 x i8> @test_vlseg7_mask_nxv8i8_triscv.vector.tuple_nxv8i8_7t(ptr %base, i64 %vl, <vscale x 8 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 8 x i8>, 7) @test_vlseg7_mask_nxv8i8_triscv.vector.tuple_nxv8i8_7t(ptr %base, i64 %vl, <vscale x 8 x i1> %mask) { ; CHECK-LABEL: test_vlseg7_mask_nxv8i8_triscv.vector.tuple_nxv8i8_7t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, ma -; CHECK-NEXT: vlseg7e8.v v7, (a0), v0.t +; CHECK-NEXT: vlseg7e8.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 7) @llvm.riscv.vlseg7.mask.triscv.vector.tuple_nxv8i8_7t.nxv8i1(target("riscv.vector.tuple", <vscale x 8 x i8>, 7) undef, ptr %base, <vscale x 8 x i1> %mask, i64 %vl, i64 1, i64 3) - %1 = call <vscale x 8 x i8> @llvm.riscv.tuple.extract.nxv8i8.triscv.vector.tuple_nxv8i8_7t(target("riscv.vector.tuple", <vscale x 8 x i8>, 7) %0, i32 1) - ret <vscale x 8 x i8> %1 + ret target("riscv.vector.tuple", <vscale x 8 x i8>, 7) %0 } - -declare target("riscv.vector.tuple", <vscale x 1 x i8>, 8) @llvm.riscv.vlseg8.triscv.vector.tuple_nxv1i8_8t(target("riscv.vector.tuple", <vscale x 1 x i8>, 8), ptr, i64, i64) -declare target("riscv.vector.tuple", <vscale x 1 x i8>, 8) @llvm.riscv.vlseg8.mask.triscv.vector.tuple_nxv1i8_8t.nxv1i1(target("riscv.vector.tuple", <vscale x 1 x i8>, 8), ptr, <vscale x 1 x i1>, i64, i64, i64) - -define <vscale x 1 x i8> @test_vlseg8_nxv1i8_triscv.vector.tuple_nxv1i8_8t(ptr %base, i64 %vl) { +define target("riscv.vector.tuple", <vscale x 1 x i8>, 8) @test_vlseg8_nxv1i8_triscv.vector.tuple_nxv1i8_8t(ptr %base, i64 %vl) { ; CHECK-LABEL: test_vlseg8_nxv1i8_triscv.vector.tuple_nxv1i8_8t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, ma -; CHECK-NEXT: vlseg8e8.v v7, (a0) +; CHECK-NEXT: vlseg8e8.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 1 x i8>, 8) @llvm.riscv.vlseg8.triscv.vector.tuple_nxv1i8_8t(target("riscv.vector.tuple", <vscale x 1 x i8>, 8) undef, ptr %base, i64 %vl, i64 3) - %1 = call <vscale x 1 x i8> @llvm.riscv.tuple.extract.nxv1i8.triscv.vector.tuple_nxv1i8_8t(target("riscv.vector.tuple", <vscale x 1 x i8>, 8) %0, i32 1) - ret <vscale x 1 x i8> %1 + ret target("riscv.vector.tuple", <vscale x 1 x i8>, 8) %0 } - -define <vscale x 1 x i8> @test_vlseg8_mask_nxv1i8_triscv.vector.tuple_nxv1i8_8t(ptr %base, i64 %vl, <vscale x 1 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 1 x i8>, 8) @test_vlseg8_mask_nxv1i8_triscv.vector.tuple_nxv1i8_8t(ptr %base, i64 %vl, <vscale x 1 x i1> %mask) { ; CHECK-LABEL: test_vlseg8_mask_nxv1i8_triscv.vector.tuple_nxv1i8_8t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, ma -; CHECK-NEXT: vlseg8e8.v v7, (a0), v0.t +; CHECK-NEXT: vlseg8e8.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 1 x i8>, 8) @llvm.riscv.vlseg8.mask.triscv.vector.tuple_nxv1i8_8t.nxv1i1(target("riscv.vector.tuple", <vscale x 1 x i8>, 8) undef, ptr %base, <vscale x 1 x i1> %mask, i64 %vl, i64 1, i64 3) - %1 = call <vscale x 1 x i8> @llvm.riscv.tuple.extract.nxv1i8.triscv.vector.tuple_nxv1i8_8t(target("riscv.vector.tuple", <vscale x 1 x i8>, 8) %0, i32 1) - ret <vscale x 1 x i8> %1 + ret target("riscv.vector.tuple", <vscale x 1 x i8>, 8) %0 } - -define <vscale x 1 x i8> @test_vlseg8_allonesmask_nxv1i8_triscv.vector.tuple_nxv1i8_8t(ptr %base, i64 %vl, <vscale x 1 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 1 x i8>, 8) @test_vlseg8_allonesmask_nxv1i8_triscv.vector.tuple_nxv1i8_8t(ptr %base, i64 %vl, <vscale x 1 x i1> %mask) { ; CHECK-LABEL: test_vlseg8_allonesmask_nxv1i8_triscv.vector.tuple_nxv1i8_8t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, ma -; CHECK-NEXT: vlseg8e8.v v7, (a0) +; CHECK-NEXT: vlseg8e8.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 1 x i8>, 8) @llvm.riscv.vlseg8.mask.triscv.vector.tuple_nxv1i8_8t.nxv1i1(target("riscv.vector.tuple", <vscale x 1 x i8>, 8) undef, ptr %base, <vscale x 1 x i1> splat (i1 true), i64 %vl, i64 1, i64 3) - %1 = call <vscale x 1 x i8> @llvm.riscv.tuple.extract.nxv1i8.triscv.vector.tuple_nxv1i8_8t(target("riscv.vector.tuple", <vscale x 1 x i8>, 8) %0, i32 1) - ret <vscale x 1 x i8> %1 + ret target("riscv.vector.tuple", <vscale x 1 x i8>, 8) %0 } - -declare target("riscv.vector.tuple", <vscale x 2 x i8>, 8) @llvm.riscv.vlseg8.triscv.vector.tuple_nxv2i8_8t(target("riscv.vector.tuple", <vscale x 2 x i8>, 8), ptr, i64, i64) -declare target("riscv.vector.tuple", <vscale x 2 x i8>, 8) @llvm.riscv.vlseg8.mask.triscv.vector.tuple_nxv2i8_8t.nxv2i1(target("riscv.vector.tuple", <vscale x 2 x i8>, 8), ptr, <vscale x 2 x i1>, i64, i64, i64) - -define <vscale x 2 x i8> @test_vlseg8_nxv2i8_triscv.vector.tuple_nxv2i8_8t(ptr %base, i64 %vl) { +define target("riscv.vector.tuple", <vscale x 2 x i8>, 8) @test_vlseg8_nxv2i8_triscv.vector.tuple_nxv2i8_8t(ptr %base, i64 %vl) { ; CHECK-LABEL: test_vlseg8_nxv2i8_triscv.vector.tuple_nxv2i8_8t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, ma -; CHECK-NEXT: vlseg8e8.v v7, (a0) +; CHECK-NEXT: vlseg8e8.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 2 x i8>, 8) @llvm.riscv.vlseg8.triscv.vector.tuple_nxv2i8_8t(target("riscv.vector.tuple", <vscale x 2 x i8>, 8) undef, ptr %base, i64 %vl, i64 3) - %1 = call <vscale x 2 x i8> @llvm.riscv.tuple.extract.nxv2i8.triscv.vector.tuple_nxv2i8_8t(target("riscv.vector.tuple", <vscale x 2 x i8>, 8) %0, i32 1) - ret <vscale x 2 x i8> %1 + ret target("riscv.vector.tuple", <vscale x 2 x i8>, 8) %0 } - -define <vscale x 2 x i8> @test_vlseg8_mask_nxv2i8_triscv.vector.tuple_nxv2i8_8t(ptr %base, i64 %vl, <vscale x 2 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 2 x i8>, 8) @test_vlseg8_mask_nxv2i8_triscv.vector.tuple_nxv2i8_8t(ptr %base, i64 %vl, <vscale x 2 x i1> %mask) { ; CHECK-LABEL: test_vlseg8_mask_nxv2i8_triscv.vector.tuple_nxv2i8_8t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, ma -; CHECK-NEXT: vlseg8e8.v v7, (a0), v0.t +; CHECK-NEXT: vlseg8e8.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 2 x i8>, 8) @llvm.riscv.vlseg8.mask.triscv.vector.tuple_nxv2i8_8t.nxv2i1(target("riscv.vector.tuple", <vscale x 2 x i8>, 8) undef, ptr %base, <vscale x 2 x i1> %mask, i64 %vl, i64 1, i64 3) - %1 = call <vscale x 2 x i8> @llvm.riscv.tuple.extract.nxv2i8.triscv.vector.tuple_nxv2i8_8t(target("riscv.vector.tuple", <vscale x 2 x i8>, 8) %0, i32 1) - ret <vscale x 2 x i8> %1 + ret target("riscv.vector.tuple", <vscale x 2 x i8>, 8) %0 } - -declare target("riscv.vector.tuple", <vscale x 4 x i8>, 8) @llvm.riscv.vlseg8.triscv.vector.tuple_nxv4i8_8t(target("riscv.vector.tuple", <vscale x 4 x i8>, 8), ptr, i64, i64) -declare target("riscv.vector.tuple", <vscale x 4 x i8>, 8) @llvm.riscv.vlseg8.mask.triscv.vector.tuple_nxv4i8_8t.nxv4i1(target("riscv.vector.tuple", <vscale x 4 x i8>, 8), ptr, <vscale x 4 x i1>, i64, i64, i64) - -define <vscale x 4 x i8> @test_vlseg8_nxv4i8_triscv.vector.tuple_nxv4i8_8t(ptr %base, i64 %vl) { +define target("riscv.vector.tuple", <vscale x 4 x i8>, 8) @test_vlseg8_nxv4i8_triscv.vector.tuple_nxv4i8_8t(ptr %base, i64 %vl) { ; CHECK-LABEL: test_vlseg8_nxv4i8_triscv.vector.tuple_nxv4i8_8t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, ma -; CHECK-NEXT: vlseg8e8.v v7, (a0) +; CHECK-NEXT: vlseg8e8.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 8) @llvm.riscv.vlseg8.triscv.vector.tuple_nxv4i8_8t(target("riscv.vector.tuple", <vscale x 4 x i8>, 8) undef, ptr %base, i64 %vl, i64 3) - %1 = call <vscale x 4 x i8> @llvm.riscv.tuple.extract.nxv4i8.triscv.vector.tuple_nxv4i8_8t(target("riscv.vector.tuple", <vscale x 4 x i8>, 8) %0, i32 1) - ret <vscale x 4 x i8> %1 + ret target("riscv.vector.tuple", <vscale x 4 x i8>, 8) %0 } - -define <vscale x 4 x i8> @test_vlseg8_mask_nxv4i8_triscv.vector.tuple_nxv4i8_8t(ptr %base, i64 %vl, <vscale x 4 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 4 x i8>, 8) @test_vlseg8_mask_nxv4i8_triscv.vector.tuple_nxv4i8_8t(ptr %base, i64 %vl, <vscale x 4 x i1> %mask) { ; CHECK-LABEL: test_vlseg8_mask_nxv4i8_triscv.vector.tuple_nxv4i8_8t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, ma -; CHECK-NEXT: vlseg8e8.v v7, (a0), v0.t +; CHECK-NEXT: vlseg8e8.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 8) @llvm.riscv.vlseg8.mask.triscv.vector.tuple_nxv4i8_8t.nxv4i1(target("riscv.vector.tuple", <vscale x 4 x i8>, 8) undef, ptr %base, <vscale x 4 x i1> %mask, i64 %vl, i64 1, i64 3) - %1 = call <vscale x 4 x i8> @llvm.riscv.tuple.extract.nxv4i8.triscv.vector.tuple_nxv4i8_8t(target("riscv.vector.tuple", <vscale x 4 x i8>, 8) %0, i32 1) - ret <vscale x 4 x i8> %1 + ret target("riscv.vector.tuple", <vscale x 4 x i8>, 8) %0 } - -declare target("riscv.vector.tuple", <vscale x 8 x i8>, 8) @llvm.riscv.vlseg8.triscv.vector.tuple_nxv8i8_8t(target("riscv.vector.tuple", <vscale x 8 x i8>, 8), ptr, i64, i64) -declare target("riscv.vector.tuple", <vscale x 8 x i8>, 8) @llvm.riscv.vlseg8.mask.triscv.vector.tuple_nxv8i8_8t.nxv8i1(target("riscv.vector.tuple", <vscale x 8 x i8>, 8), ptr, <vscale x 8 x i1>, i64, i64, i64) - -define <vscale x 8 x i8> @test_vlseg8_nxv8i8_triscv.vector.tuple_nxv8i8_8t(ptr %base, i64 %vl) { +define target("riscv.vector.tuple", <vscale x 8 x i8>, 8) @test_vlseg8_nxv8i8_triscv.vector.tuple_nxv8i8_8t(ptr %base, i64 %vl) { ; CHECK-LABEL: test_vlseg8_nxv8i8_triscv.vector.tuple_nxv8i8_8t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, ma -; CHECK-NEXT: vlseg8e8.v v7, (a0) +; CHECK-NEXT: vlseg8e8.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 8) @llvm.riscv.vlseg8.triscv.vector.tuple_nxv8i8_8t(target("riscv.vector.tuple", <vscale x 8 x i8>, 8) undef, ptr %base, i64 %vl, i64 3) - %1 = call <vscale x 8 x i8> @llvm.riscv.tuple.extract.nxv8i8.triscv.vector.tuple_nxv8i8_8t(target("riscv.vector.tuple", <vscale x 8 x i8>, 8) %0, i32 1) - ret <vscale x 8 x i8> %1 + ret target("riscv.vector.tuple", <vscale x 8 x i8>, 8) %0 } - -define <vscale x 8 x i8> @test_vlseg8_mask_nxv8i8_triscv.vector.tuple_nxv8i8_8t(ptr %base, i64 %vl, <vscale x 8 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 8 x i8>, 8) @test_vlseg8_mask_nxv8i8_triscv.vector.tuple_nxv8i8_8t(ptr %base, i64 %vl, <vscale x 8 x i1> %mask) { ; CHECK-LABEL: test_vlseg8_mask_nxv8i8_triscv.vector.tuple_nxv8i8_8t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, ma -; CHECK-NEXT: vlseg8e8.v v7, (a0), v0.t +; CHECK-NEXT: vlseg8e8.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 8) @llvm.riscv.vlseg8.mask.triscv.vector.tuple_nxv8i8_8t.nxv8i1(target("riscv.vector.tuple", <vscale x 8 x i8>, 8) undef, ptr %base, <vscale x 8 x i1> %mask, i64 %vl, i64 1, i64 3) - %1 = call <vscale x 8 x i8> @llvm.riscv.tuple.extract.nxv8i8.triscv.vector.tuple_nxv8i8_8t(target("riscv.vector.tuple", <vscale x 8 x i8>, 8) %0, i32 1) - ret <vscale x 8 x i8> %1 + ret target("riscv.vector.tuple", <vscale x 8 x i8>, 8) %0 } - -declare target("riscv.vector.tuple", <vscale x 2 x i8>, 2) @llvm.riscv.vlseg2.mask.triscv.vector.tuple_nxv2i8_2t.nxv1i1(target("riscv.vector.tuple", <vscale x 2 x i8>, 2), ptr, <vscale x 1 x i1>, i64, i64, i64) - -define <vscale x 1 x i16> @test_vlseg2_nxv1i16_triscv.vector.tuple_nxv2i8_2t(ptr %base, i64 %vl) { +define target("riscv.vector.tuple", <vscale x 2 x i8>, 2) @test_vlseg2_nxv1i16_triscv.vector.tuple_nxv2i8_2t(ptr %base, i64 %vl) { ; CHECK-LABEL: test_vlseg2_nxv1i16_triscv.vector.tuple_nxv2i8_2t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, ma -; CHECK-NEXT: vlseg2e16.v v7, (a0) +; CHECK-NEXT: vlseg2e16.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 2 x i8>, 2) @llvm.riscv.vlseg2.triscv.vector.tuple_nxv2i8_2t(target("riscv.vector.tuple", <vscale x 2 x i8>, 2) undef, ptr %base, i64 %vl, i64 4) - %1 = call <vscale x 1 x i16> @llvm.riscv.tuple.extract.nxv1i16.triscv.vector.tuple_nxv2i8_2t(target("riscv.vector.tuple", <vscale x 2 x i8>, 2) %0, i32 1) - ret <vscale x 1 x i16> %1 + ret target("riscv.vector.tuple", <vscale x 2 x i8>, 2) %0 } - -define <vscale x 1 x i16> @test_vlseg2_mask_nxv1i16_triscv.vector.tuple_nxv2i8_2t(ptr %base, i64 %vl, <vscale x 1 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 2 x i8>, 2) @test_vlseg2_mask_nxv1i16_triscv.vector.tuple_nxv2i8_2t(ptr %base, i64 %vl, <vscale x 1 x i1> %mask) { ; CHECK-LABEL: test_vlseg2_mask_nxv1i16_triscv.vector.tuple_nxv2i8_2t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, ma -; CHECK-NEXT: vlseg2e16.v v7, (a0), v0.t +; CHECK-NEXT: vlseg2e16.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 2 x i8>, 2) @llvm.riscv.vlseg2.mask.triscv.vector.tuple_nxv2i8_2t.nxv1i1(target("riscv.vector.tuple", <vscale x 2 x i8>, 2) undef, ptr %base, <vscale x 1 x i1> %mask, i64 %vl, i64 1, i64 4) - %1 = call <vscale x 1 x i16> @llvm.riscv.tuple.extract.nxv1i16.triscv.vector.tuple_nxv2i8_2t(target("riscv.vector.tuple", <vscale x 2 x i8>, 2) %0, i32 1) - ret <vscale x 1 x i16> %1 + ret target("riscv.vector.tuple", <vscale x 2 x i8>, 2) %0 } - -declare target("riscv.vector.tuple", <vscale x 4 x i8>, 2) @llvm.riscv.vlseg2.mask.triscv.vector.tuple_nxv4i8_2t.nxv2i1(target("riscv.vector.tuple", <vscale x 4 x i8>, 2), ptr, <vscale x 2 x i1>, i64, i64, i64) - -define <vscale x 2 x i16> @test_vlseg2_nxv2i16_triscv.vector.tuple_nxv4i8_2t(ptr %base, i64 %vl) { +define target("riscv.vector.tuple", <vscale x 4 x i8>, 2) @test_vlseg2_nxv2i16_triscv.vector.tuple_nxv4i8_2t(ptr %base, i64 %vl) { ; CHECK-LABEL: test_vlseg2_nxv2i16_triscv.vector.tuple_nxv4i8_2t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, ma -; CHECK-NEXT: vlseg2e16.v v7, (a0) +; CHECK-NEXT: vlseg2e16.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 2) @llvm.riscv.vlseg2.triscv.vector.tuple_nxv4i8_2t(target("riscv.vector.tuple", <vscale x 4 x i8>, 2) undef, ptr %base, i64 %vl, i64 4) - %1 = call <vscale x 2 x i16> @llvm.riscv.tuple.extract.nxv2i16.triscv.vector.tuple_nxv4i8_2t(target("riscv.vector.tuple", <vscale x 4 x i8>, 2) %0, i32 1) - ret <vscale x 2 x i16> %1 + ret target("riscv.vector.tuple", <vscale x 4 x i8>, 2) %0 } - -define <vscale x 2 x i16> @test_vlseg2_mask_nxv2i16_triscv.vector.tuple_nxv4i8_2t(ptr %base, i64 %vl, <vscale x 2 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 4 x i8>, 2) @test_vlseg2_mask_nxv2i16_triscv.vector.tuple_nxv4i8_2t(ptr %base, i64 %vl, <vscale x 2 x i1> %mask) { ; CHECK-LABEL: test_vlseg2_mask_nxv2i16_triscv.vector.tuple_nxv4i8_2t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, ma -; CHECK-NEXT: vlseg2e16.v v7, (a0), v0.t +; CHECK-NEXT: vlseg2e16.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 2) @llvm.riscv.vlseg2.mask.triscv.vector.tuple_nxv4i8_2t.nxv2i1(target("riscv.vector.tuple", <vscale x 4 x i8>, 2) undef, ptr %base, <vscale x 2 x i1> %mask, i64 %vl, i64 1, i64 4) - %1 = call <vscale x 2 x i16> @llvm.riscv.tuple.extract.nxv2i16.triscv.vector.tuple_nxv4i8_2t(target("riscv.vector.tuple", <vscale x 4 x i8>, 2) %0, i32 1) - ret <vscale x 2 x i16> %1 + ret target("riscv.vector.tuple", <vscale x 4 x i8>, 2) %0 } - -declare target("riscv.vector.tuple", <vscale x 8 x i8>, 2) @llvm.riscv.vlseg2.mask.triscv.vector.tuple_nxv8i8_2t.nxv4i1(target("riscv.vector.tuple", <vscale x 8 x i8>, 2), ptr, <vscale x 4 x i1>, i64, i64, i64) - -define <vscale x 4 x i16> @test_vlseg2_nxv4i16_triscv.vector.tuple_nxv8i8_2t(ptr %base, i64 %vl) { +define target("riscv.vector.tuple", <vscale x 8 x i8>, 2) @test_vlseg2_nxv4i16_triscv.vector.tuple_nxv8i8_2t(ptr %base, i64 %vl) { ; CHECK-LABEL: test_vlseg2_nxv4i16_triscv.vector.tuple_nxv8i8_2t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma -; CHECK-NEXT: vlseg2e16.v v7, (a0) +; CHECK-NEXT: vlseg2e16.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 2) @llvm.riscv.vlseg2.triscv.vector.tuple_nxv8i8_2t(target("riscv.vector.tuple", <vscale x 8 x i8>, 2) undef, ptr %base, i64 %vl, i64 4) - %1 = call <vscale x 4 x i16> @llvm.riscv.tuple.extract.nxv4i16.triscv.vector.tuple_nxv8i8_2t(target("riscv.vector.tuple", <vscale x 8 x i8>, 2) %0, i32 1) - ret <vscale x 4 x i16> %1 + ret target("riscv.vector.tuple", <vscale x 8 x i8>, 2) %0 } - -define <vscale x 4 x i16> @test_vlseg2_mask_nxv4i16_triscv.vector.tuple_nxv8i8_2t(ptr %base, i64 %vl, <vscale x 4 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 8 x i8>, 2) @test_vlseg2_mask_nxv4i16_triscv.vector.tuple_nxv8i8_2t(ptr %base, i64 %vl, <vscale x 4 x i1> %mask) { ; CHECK-LABEL: test_vlseg2_mask_nxv4i16_triscv.vector.tuple_nxv8i8_2t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma -; CHECK-NEXT: vlseg2e16.v v7, (a0), v0.t +; CHECK-NEXT: vlseg2e16.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 2) @llvm.riscv.vlseg2.mask.triscv.vector.tuple_nxv8i8_2t.nxv4i1(target("riscv.vector.tuple", <vscale x 8 x i8>, 2) undef, ptr %base, <vscale x 4 x i1> %mask, i64 %vl, i64 1, i64 4) - %1 = call <vscale x 4 x i16> @llvm.riscv.tuple.extract.nxv4i16.triscv.vector.tuple_nxv8i8_2t(target("riscv.vector.tuple", <vscale x 8 x i8>, 2) %0, i32 1) - ret <vscale x 4 x i16> %1 + ret target("riscv.vector.tuple", <vscale x 8 x i8>, 2) %0 } - -declare target("riscv.vector.tuple", <vscale x 16 x i8>, 2) @llvm.riscv.vlseg2.mask.triscv.vector.tuple_nxv16i8_2t.nxv8i1(target("riscv.vector.tuple", <vscale x 16 x i8>, 2), ptr, <vscale x 8 x i1>, i64, i64, i64) - -define <vscale x 8 x i16> @test_vlseg2_nxv8i16_triscv.vector.tuple_nxv16i8_2t(ptr %base, i64 %vl) { +define target("riscv.vector.tuple", <vscale x 16 x i8>, 2) @test_vlseg2_nxv8i16_triscv.vector.tuple_nxv16i8_2t(ptr %base, i64 %vl) { ; CHECK-LABEL: test_vlseg2_nxv8i16_triscv.vector.tuple_nxv16i8_2t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, ma -; CHECK-NEXT: vlseg2e16.v v6, (a0) +; CHECK-NEXT: vlseg2e16.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 2) @llvm.riscv.vlseg2.triscv.vector.tuple_nxv16i8_2t(target("riscv.vector.tuple", <vscale x 16 x i8>, 2) undef, ptr %base, i64 %vl, i64 4) - %1 = call <vscale x 8 x i16> @llvm.riscv.tuple.extract.nxv8i16.triscv.vector.tuple_nxv16i8_2t(target("riscv.vector.tuple", <vscale x 16 x i8>, 2) %0, i32 1) - ret <vscale x 8 x i16> %1 + ret target("riscv.vector.tuple", <vscale x 16 x i8>, 2) %0 } - -define <vscale x 8 x i16> @test_vlseg2_mask_nxv8i16_triscv.vector.tuple_nxv16i8_2t(ptr %base, i64 %vl, <vscale x 8 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 16 x i8>, 2) @test_vlseg2_mask_nxv8i16_triscv.vector.tuple_nxv16i8_2t(ptr %base, i64 %vl, <vscale x 8 x i1> %mask) { ; CHECK-LABEL: test_vlseg2_mask_nxv8i16_triscv.vector.tuple_nxv16i8_2t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, ma -; CHECK-NEXT: vlseg2e16.v v6, (a0), v0.t +; CHECK-NEXT: vlseg2e16.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 2) @llvm.riscv.vlseg2.mask.triscv.vector.tuple_nxv16i8_2t.nxv8i1(target("riscv.vector.tuple", <vscale x 16 x i8>, 2) undef, ptr %base, <vscale x 8 x i1> %mask, i64 %vl, i64 1, i64 4) - %1 = call <vscale x 8 x i16> @llvm.riscv.tuple.extract.nxv8i16.triscv.vector.tuple_nxv16i8_2t(target("riscv.vector.tuple", <vscale x 16 x i8>, 2) %0, i32 1) - ret <vscale x 8 x i16> %1 + ret target("riscv.vector.tuple", <vscale x 16 x i8>, 2) %0 } - -declare target("riscv.vector.tuple", <vscale x 32 x i8>, 2) @llvm.riscv.vlseg2.mask.triscv.vector.tuple_nxv32i8_2t.nxv16i1(target("riscv.vector.tuple", <vscale x 32 x i8>, 2), ptr, <vscale x 16 x i1>, i64, i64, i64) - -define <vscale x 16 x i16> @test_vlseg2_nxv16i16_triscv.vector.tuple_nxv32i8_2t(ptr %base, i64 %vl) { +define target("riscv.vector.tuple", <vscale x 32 x i8>, 2) @test_vlseg2_nxv16i16_triscv.vector.tuple_nxv32i8_2t(ptr %base, i64 %vl) { ; CHECK-LABEL: test_vlseg2_nxv16i16_triscv.vector.tuple_nxv32i8_2t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, ma -; CHECK-NEXT: vlseg2e16.v v4, (a0) +; CHECK-NEXT: vlseg2e16.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 32 x i8>, 2) @llvm.riscv.vlseg2.triscv.vector.tuple_nxv32i8_2t(target("riscv.vector.tuple", <vscale x 32 x i8>, 2) undef, ptr %base, i64 %vl, i64 4) - %1 = call <vscale x 16 x i16> @llvm.riscv.tuple.extract.nxv16i16.triscv.vector.tuple_nxv32i8_2t(target("riscv.vector.tuple", <vscale x 32 x i8>, 2) %0, i32 1) - ret <vscale x 16 x i16> %1 + ret target("riscv.vector.tuple", <vscale x 32 x i8>, 2) %0 } - -define <vscale x 16 x i16> @test_vlseg2_mask_nxv16i16_triscv.vector.tuple_nxv32i8_2t(ptr %base, i64 %vl, <vscale x 16 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 32 x i8>, 2) @test_vlseg2_mask_nxv16i16_triscv.vector.tuple_nxv32i8_2t(ptr %base, i64 %vl, <vscale x 16 x i1> %mask) { ; CHECK-LABEL: test_vlseg2_mask_nxv16i16_triscv.vector.tuple_nxv32i8_2t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, ma -; CHECK-NEXT: vlseg2e16.v v4, (a0), v0.t +; CHECK-NEXT: vlseg2e16.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 32 x i8>, 2) @llvm.riscv.vlseg2.mask.triscv.vector.tuple_nxv32i8_2t.nxv16i1(target("riscv.vector.tuple", <vscale x 32 x i8>, 2) undef, ptr %base, <vscale x 16 x i1> %mask, i64 %vl, i64 1, i64 4) - %1 = call <vscale x 16 x i16> @llvm.riscv.tuple.extract.nxv16i16.triscv.vector.tuple_nxv32i8_2t(target("riscv.vector.tuple", <vscale x 32 x i8>, 2) %0, i32 1) - ret <vscale x 16 x i16> %1 + ret target("riscv.vector.tuple", <vscale x 32 x i8>, 2) %0 } - -declare target("riscv.vector.tuple", <vscale x 2 x i8>, 3) @llvm.riscv.vlseg3.mask.triscv.vector.tuple_nxv2i8_3t.nxv1i1(target("riscv.vector.tuple", <vscale x 2 x i8>, 3), ptr, <vscale x 1 x i1>, i64, i64, i64) - -define <vscale x 1 x i16> @test_vlseg3_nxv1i16_triscv.vector.tuple_nxv2i8_3t(ptr %base, i64 %vl) { +define target("riscv.vector.tuple", <vscale x 2 x i8>, 3) @test_vlseg3_nxv1i16_triscv.vector.tuple_nxv2i8_3t(ptr %base, i64 %vl) { ; CHECK-LABEL: test_vlseg3_nxv1i16_triscv.vector.tuple_nxv2i8_3t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, ma -; CHECK-NEXT: vlseg3e16.v v7, (a0) +; CHECK-NEXT: vlseg3e16.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 2 x i8>, 3) @llvm.riscv.vlseg3.triscv.vector.tuple_nxv2i8_3t(target("riscv.vector.tuple", <vscale x 2 x i8>, 3) undef, ptr %base, i64 %vl, i64 4) - %1 = call <vscale x 1 x i16> @llvm.riscv.tuple.extract.nxv1i16.triscv.vector.tuple_nxv2i8_3t(target("riscv.vector.tuple", <vscale x 2 x i8>, 3) %0, i32 1) - ret <vscale x 1 x i16> %1 + ret target("riscv.vector.tuple", <vscale x 2 x i8>, 3) %0 } - -define <vscale x 1 x i16> @test_vlseg3_mask_nxv1i16_triscv.vector.tuple_nxv2i8_3t(ptr %base, i64 %vl, <vscale x 1 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 2 x i8>, 3) @test_vlseg3_mask_nxv1i16_triscv.vector.tuple_nxv2i8_3t(ptr %base, i64 %vl, <vscale x 1 x i1> %mask) { ; CHECK-LABEL: test_vlseg3_mask_nxv1i16_triscv.vector.tuple_nxv2i8_3t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, ma -; CHECK-NEXT: vlseg3e16.v v7, (a0), v0.t +; CHECK-NEXT: vlseg3e16.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 2 x i8>, 3) @llvm.riscv.vlseg3.mask.triscv.vector.tuple_nxv2i8_3t.nxv1i1(target("riscv.vector.tuple", <vscale x 2 x i8>, 3) undef, ptr %base, <vscale x 1 x i1> %mask, i64 %vl, i64 1, i64 4) - %1 = call <vscale x 1 x i16> @llvm.riscv.tuple.extract.nxv1i16.triscv.vector.tuple_nxv2i8_3t(target("riscv.vector.tuple", <vscale x 2 x i8>, 3) %0, i32 1) - ret <vscale x 1 x i16> %1 + ret target("riscv.vector.tuple", <vscale x 2 x i8>, 3) %0 } - -declare target("riscv.vector.tuple", <vscale x 4 x i8>, 3) @llvm.riscv.vlseg3.mask.triscv.vector.tuple_nxv4i8_3t.nxv2i1(target("riscv.vector.tuple", <vscale x 4 x i8>, 3), ptr, <vscale x 2 x i1>, i64, i64, i64) - -define <vscale x 2 x i16> @test_vlseg3_nxv2i16_triscv.vector.tuple_nxv4i8_3t(ptr %base, i64 %vl) { +define target("riscv.vector.tuple", <vscale x 4 x i8>, 3) @test_vlseg3_nxv2i16_triscv.vector.tuple_nxv4i8_3t(ptr %base, i64 %vl) { ; CHECK-LABEL: test_vlseg3_nxv2i16_triscv.vector.tuple_nxv4i8_3t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, ma -; CHECK-NEXT: vlseg3e16.v v7, (a0) +; CHECK-NEXT: vlseg3e16.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 3) @llvm.riscv.vlseg3.triscv.vector.tuple_nxv4i8_3t(target("riscv.vector.tuple", <vscale x 4 x i8>, 3) undef, ptr %base, i64 %vl, i64 4) - %1 = call <vscale x 2 x i16> @llvm.riscv.tuple.extract.nxv2i16.triscv.vector.tuple_nxv4i8_3t(target("riscv.vector.tuple", <vscale x 4 x i8>, 3) %0, i32 1) - ret <vscale x 2 x i16> %1 + ret target("riscv.vector.tuple", <vscale x 4 x i8>, 3) %0 } - -define <vscale x 2 x i16> @test_vlseg3_mask_nxv2i16_triscv.vector.tuple_nxv4i8_3t(ptr %base, i64 %vl, <vscale x 2 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 4 x i8>, 3) @test_vlseg3_mask_nxv2i16_triscv.vector.tuple_nxv4i8_3t(ptr %base, i64 %vl, <vscale x 2 x i1> %mask) { ; CHECK-LABEL: test_vlseg3_mask_nxv2i16_triscv.vector.tuple_nxv4i8_3t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, ma -; CHECK-NEXT: vlseg3e16.v v7, (a0), v0.t +; CHECK-NEXT: vlseg3e16.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 3) @llvm.riscv.vlseg3.mask.triscv.vector.tuple_nxv4i8_3t.nxv2i1(target("riscv.vector.tuple", <vscale x 4 x i8>, 3) undef, ptr %base, <vscale x 2 x i1> %mask, i64 %vl, i64 1, i64 4) - %1 = call <vscale x 2 x i16> @llvm.riscv.tuple.extract.nxv2i16.triscv.vector.tuple_nxv4i8_3t(target("riscv.vector.tuple", <vscale x 4 x i8>, 3) %0, i32 1) - ret <vscale x 2 x i16> %1 + ret target("riscv.vector.tuple", <vscale x 4 x i8>, 3) %0 } - -declare target("riscv.vector.tuple", <vscale x 8 x i8>, 3) @llvm.riscv.vlseg3.mask.triscv.vector.tuple_nxv8i8_3t.nxv4i1(target("riscv.vector.tuple", <vscale x 8 x i8>, 3), ptr, <vscale x 4 x i1>, i64, i64, i64) - -define <vscale x 4 x i16> @test_vlseg3_nxv4i16_triscv.vector.tuple_nxv8i8_3t(ptr %base, i64 %vl) { +define target("riscv.vector.tuple", <vscale x 8 x i8>, 3) @test_vlseg3_nxv4i16_triscv.vector.tuple_nxv8i8_3t(ptr %base, i64 %vl) { ; CHECK-LABEL: test_vlseg3_nxv4i16_triscv.vector.tuple_nxv8i8_3t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma -; CHECK-NEXT: vlseg3e16.v v7, (a0) +; CHECK-NEXT: vlseg3e16.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 3) @llvm.riscv.vlseg3.triscv.vector.tuple_nxv8i8_3t(target("riscv.vector.tuple", <vscale x 8 x i8>, 3) undef, ptr %base, i64 %vl, i64 4) - %1 = call <vscale x 4 x i16> @llvm.riscv.tuple.extract.nxv4i16.triscv.vector.tuple_nxv8i8_3t(target("riscv.vector.tuple", <vscale x 8 x i8>, 3) %0, i32 1) - ret <vscale x 4 x i16> %1 + ret target("riscv.vector.tuple", <vscale x 8 x i8>, 3) %0 } - -define <vscale x 4 x i16> @test_vlseg3_mask_nxv4i16_triscv.vector.tuple_nxv8i8_3t(ptr %base, i64 %vl, <vscale x 4 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 8 x i8>, 3) @test_vlseg3_mask_nxv4i16_triscv.vector.tuple_nxv8i8_3t(ptr %base, i64 %vl, <vscale x 4 x i1> %mask) { ; CHECK-LABEL: test_vlseg3_mask_nxv4i16_triscv.vector.tuple_nxv8i8_3t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma -; CHECK-NEXT: vlseg3e16.v v7, (a0), v0.t +; CHECK-NEXT: vlseg3e16.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 3) @llvm.riscv.vlseg3.mask.triscv.vector.tuple_nxv8i8_3t.nxv4i1(target("riscv.vector.tuple", <vscale x 8 x i8>, 3) undef, ptr %base, <vscale x 4 x i1> %mask, i64 %vl, i64 1, i64 4) - %1 = call <vscale x 4 x i16> @llvm.riscv.tuple.extract.nxv4i16.triscv.vector.tuple_nxv8i8_3t(target("riscv.vector.tuple", <vscale x 8 x i8>, 3) %0, i32 1) - ret <vscale x 4 x i16> %1 + ret target("riscv.vector.tuple", <vscale x 8 x i8>, 3) %0 } - -declare target("riscv.vector.tuple", <vscale x 16 x i8>, 3) @llvm.riscv.vlseg3.mask.triscv.vector.tuple_nxv16i8_3t.nxv8i1(target("riscv.vector.tuple", <vscale x 16 x i8>, 3), ptr, <vscale x 8 x i1>, i64, i64, i64) - -define <vscale x 8 x i16> @test_vlseg3_nxv8i16_triscv.vector.tuple_nxv16i8_3t(ptr %base, i64 %vl) { +define target("riscv.vector.tuple", <vscale x 16 x i8>, 3) @test_vlseg3_nxv8i16_triscv.vector.tuple_nxv16i8_3t(ptr %base, i64 %vl) { ; CHECK-LABEL: test_vlseg3_nxv8i16_triscv.vector.tuple_nxv16i8_3t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, ma -; CHECK-NEXT: vlseg3e16.v v6, (a0) +; CHECK-NEXT: vlseg3e16.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 3) @llvm.riscv.vlseg3.triscv.vector.tuple_nxv16i8_3t(target("riscv.vector.tuple", <vscale x 16 x i8>, 3) undef, ptr %base, i64 %vl, i64 4) - %1 = call <vscale x 8 x i16> @llvm.riscv.tuple.extract.nxv8i16.triscv.vector.tuple_nxv16i8_3t(target("riscv.vector.tuple", <vscale x 16 x i8>, 3) %0, i32 1) - ret <vscale x 8 x i16> %1 + ret target("riscv.vector.tuple", <vscale x 16 x i8>, 3) %0 } - -define <vscale x 8 x i16> @test_vlseg3_mask_nxv8i16_triscv.vector.tuple_nxv16i8_3t(ptr %base, i64 %vl, <vscale x 8 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 16 x i8>, 3) @test_vlseg3_mask_nxv8i16_triscv.vector.tuple_nxv16i8_3t(ptr %base, i64 %vl, <vscale x 8 x i1> %mask) { ; CHECK-LABEL: test_vlseg3_mask_nxv8i16_triscv.vector.tuple_nxv16i8_3t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, ma -; CHECK-NEXT: vlseg3e16.v v6, (a0), v0.t +; CHECK-NEXT: vlseg3e16.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 3) @llvm.riscv.vlseg3.mask.triscv.vector.tuple_nxv16i8_3t.nxv8i1(target("riscv.vector.tuple", <vscale x 16 x i8>, 3) undef, ptr %base, <vscale x 8 x i1> %mask, i64 %vl, i64 1, i64 4) - %1 = call <vscale x 8 x i16> @llvm.riscv.tuple.extract.nxv8i16.triscv.vector.tuple_nxv16i8_3t(target("riscv.vector.tuple", <vscale x 16 x i8>, 3) %0, i32 1) - ret <vscale x 8 x i16> %1 + ret target("riscv.vector.tuple", <vscale x 16 x i8>, 3) %0 } - -declare target("riscv.vector.tuple", <vscale x 2 x i8>, 4) @llvm.riscv.vlseg4.mask.triscv.vector.tuple_nxv2i8_4t.nxv1i1(target("riscv.vector.tuple", <vscale x 2 x i8>, 4), ptr, <vscale x 1 x i1>, i64, i64, i64) - -define <vscale x 1 x i16> @test_vlseg4_nxv1i16_triscv.vector.tuple_nxv2i8_4t(ptr %base, i64 %vl) { +define target("riscv.vector.tuple", <vscale x 2 x i8>, 4) @test_vlseg4_nxv1i16_triscv.vector.tuple_nxv2i8_4t(ptr %base, i64 %vl) { ; CHECK-LABEL: test_vlseg4_nxv1i16_triscv.vector.tuple_nxv2i8_4t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, ma -; CHECK-NEXT: vlseg4e16.v v7, (a0) +; CHECK-NEXT: vlseg4e16.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 2 x i8>, 4) @llvm.riscv.vlseg4.triscv.vector.tuple_nxv2i8_4t(target("riscv.vector.tuple", <vscale x 2 x i8>, 4) undef, ptr %base, i64 %vl, i64 4) - %1 = call <vscale x 1 x i16> @llvm.riscv.tuple.extract.nxv1i16.triscv.vector.tuple_nxv2i8_4t(target("riscv.vector.tuple", <vscale x 2 x i8>, 4) %0, i32 1) - ret <vscale x 1 x i16> %1 + ret target("riscv.vector.tuple", <vscale x 2 x i8>, 4) %0 } - -define <vscale x 1 x i16> @test_vlseg4_mask_nxv1i16_triscv.vector.tuple_nxv2i8_4t(ptr %base, i64 %vl, <vscale x 1 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 2 x i8>, 4) @test_vlseg4_mask_nxv1i16_triscv.vector.tuple_nxv2i8_4t(ptr %base, i64 %vl, <vscale x 1 x i1> %mask) { ; CHECK-LABEL: test_vlseg4_mask_nxv1i16_triscv.vector.tuple_nxv2i8_4t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, ma -; CHECK-NEXT: vlseg4e16.v v7, (a0), v0.t +; CHECK-NEXT: vlseg4e16.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 2 x i8>, 4) @llvm.riscv.vlseg4.mask.triscv.vector.tuple_nxv2i8_4t.nxv1i1(target("riscv.vector.tuple", <vscale x 2 x i8>, 4) undef, ptr %base, <vscale x 1 x i1> %mask, i64 %vl, i64 1, i64 4) - %1 = call <vscale x 1 x i16> @llvm.riscv.tuple.extract.nxv1i16.triscv.vector.tuple_nxv2i8_4t(target("riscv.vector.tuple", <vscale x 2 x i8>, 4) %0, i32 1) - ret <vscale x 1 x i16> %1 + ret target("riscv.vector.tuple", <vscale x 2 x i8>, 4) %0 } - -declare target("riscv.vector.tuple", <vscale x 4 x i8>, 4) @llvm.riscv.vlseg4.mask.triscv.vector.tuple_nxv4i8_4t.nxv2i1(target("riscv.vector.tuple", <vscale x 4 x i8>, 4), ptr, <vscale x 2 x i1>, i64, i64, i64) - -define <vscale x 2 x i16> @test_vlseg4_nxv2i16_triscv.vector.tuple_nxv4i8_4t(ptr %base, i64 %vl) { +define target("riscv.vector.tuple", <vscale x 4 x i8>, 4) @test_vlseg4_nxv2i16_triscv.vector.tuple_nxv4i8_4t(ptr %base, i64 %vl) { ; CHECK-LABEL: test_vlseg4_nxv2i16_triscv.vector.tuple_nxv4i8_4t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, ma -; CHECK-NEXT: vlseg4e16.v v7, (a0) +; CHECK-NEXT: vlseg4e16.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 4) @llvm.riscv.vlseg4.triscv.vector.tuple_nxv4i8_4t(target("riscv.vector.tuple", <vscale x 4 x i8>, 4) undef, ptr %base, i64 %vl, i64 4) - %1 = call <vscale x 2 x i16> @llvm.riscv.tuple.extract.nxv2i16.triscv.vector.tuple_nxv4i8_4t(target("riscv.vector.tuple", <vscale x 4 x i8>, 4) %0, i32 1) - ret <vscale x 2 x i16> %1 + ret target("riscv.vector.tuple", <vscale x 4 x i8>, 4) %0 } - -define <vscale x 2 x i16> @test_vlseg4_mask_nxv2i16_triscv.vector.tuple_nxv4i8_4t(ptr %base, i64 %vl, <vscale x 2 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 4 x i8>, 4) @test_vlseg4_mask_nxv2i16_triscv.vector.tuple_nxv4i8_4t(ptr %base, i64 %vl, <vscale x 2 x i1> %mask) { ; CHECK-LABEL: test_vlseg4_mask_nxv2i16_triscv.vector.tuple_nxv4i8_4t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, ma -; CHECK-NEXT: vlseg4e16.v v7, (a0), v0.t +; CHECK-NEXT: vlseg4e16.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 4) @llvm.riscv.vlseg4.mask.triscv.vector.tuple_nxv4i8_4t.nxv2i1(target("riscv.vector.tuple", <vscale x 4 x i8>, 4) undef, ptr %base, <vscale x 2 x i1> %mask, i64 %vl, i64 1, i64 4) - %1 = call <vscale x 2 x i16> @llvm.riscv.tuple.extract.nxv2i16.triscv.vector.tuple_nxv4i8_4t(target("riscv.vector.tuple", <vscale x 4 x i8>, 4) %0, i32 1) - ret <vscale x 2 x i16> %1 + ret target("riscv.vector.tuple", <vscale x 4 x i8>, 4) %0 } - -declare target("riscv.vector.tuple", <vscale x 8 x i8>, 4) @llvm.riscv.vlseg4.mask.triscv.vector.tuple_nxv8i8_4t.nxv4i1(target("riscv.vector.tuple", <vscale x 8 x i8>, 4), ptr, <vscale x 4 x i1>, i64, i64, i64) - -define <vscale x 4 x i16> @test_vlseg4_nxv4i16_triscv.vector.tuple_nxv8i8_4t(ptr %base, i64 %vl) { +define target("riscv.vector.tuple", <vscale x 8 x i8>, 4) @test_vlseg4_nxv4i16_triscv.vector.tuple_nxv8i8_4t(ptr %base, i64 %vl) { ; CHECK-LABEL: test_vlseg4_nxv4i16_triscv.vector.tuple_nxv8i8_4t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma -; CHECK-NEXT: vlseg4e16.v v7, (a0) +; CHECK-NEXT: vlseg4e16.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 4) @llvm.riscv.vlseg4.triscv.vector.tuple_nxv8i8_4t(target("riscv.vector.tuple", <vscale x 8 x i8>, 4) undef, ptr %base, i64 %vl, i64 4) - %1 = call <vscale x 4 x i16> @llvm.riscv.tuple.extract.nxv4i16.triscv.vector.tuple_nxv8i8_4t(target("riscv.vector.tuple", <vscale x 8 x i8>, 4) %0, i32 1) - ret <vscale x 4 x i16> %1 + ret target("riscv.vector.tuple", <vscale x 8 x i8>, 4) %0 } - -define <vscale x 4 x i16> @test_vlseg4_mask_nxv4i16_triscv.vector.tuple_nxv8i8_4t(ptr %base, i64 %vl, <vscale x 4 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 8 x i8>, 4) @test_vlseg4_mask_nxv4i16_triscv.vector.tuple_nxv8i8_4t(ptr %base, i64 %vl, <vscale x 4 x i1> %mask) { ; CHECK-LABEL: test_vlseg4_mask_nxv4i16_triscv.vector.tuple_nxv8i8_4t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma -; CHECK-NEXT: vlseg4e16.v v7, (a0), v0.t +; CHECK-NEXT: vlseg4e16.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 4) @llvm.riscv.vlseg4.mask.triscv.vector.tuple_nxv8i8_4t.nxv4i1(target("riscv.vector.tuple", <vscale x 8 x i8>, 4) undef, ptr %base, <vscale x 4 x i1> %mask, i64 %vl, i64 1, i64 4) - %1 = call <vscale x 4 x i16> @llvm.riscv.tuple.extract.nxv4i16.triscv.vector.tuple_nxv8i8_4t(target("riscv.vector.tuple", <vscale x 8 x i8>, 4) %0, i32 1) - ret <vscale x 4 x i16> %1 + ret target("riscv.vector.tuple", <vscale x 8 x i8>, 4) %0 } - -declare target("riscv.vector.tuple", <vscale x 16 x i8>, 4) @llvm.riscv.vlseg4.mask.triscv.vector.tuple_nxv16i8_4t.nxv8i1(target("riscv.vector.tuple", <vscale x 16 x i8>, 4), ptr, <vscale x 8 x i1>, i64, i64, i64) - -define <vscale x 8 x i16> @test_vlseg4_nxv8i16_triscv.vector.tuple_nxv16i8_4t(ptr %base, i64 %vl) { +define target("riscv.vector.tuple", <vscale x 16 x i8>, 4) @test_vlseg4_nxv8i16_triscv.vector.tuple_nxv16i8_4t(ptr %base, i64 %vl) { ; CHECK-LABEL: test_vlseg4_nxv8i16_triscv.vector.tuple_nxv16i8_4t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, ma -; CHECK-NEXT: vlseg4e16.v v6, (a0) +; CHECK-NEXT: vlseg4e16.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 4) @llvm.riscv.vlseg4.triscv.vector.tuple_nxv16i8_4t(target("riscv.vector.tuple", <vscale x 16 x i8>, 4) undef, ptr %base, i64 %vl, i64 4) - %1 = call <vscale x 8 x i16> @llvm.riscv.tuple.extract.nxv8i16.triscv.vector.tuple_nxv16i8_4t(target("riscv.vector.tuple", <vscale x 16 x i8>, 4) %0, i32 1) - ret <vscale x 8 x i16> %1 + ret target("riscv.vector.tuple", <vscale x 16 x i8>, 4) %0 } - -define <vscale x 8 x i16> @test_vlseg4_mask_nxv8i16_triscv.vector.tuple_nxv16i8_4t(ptr %base, i64 %vl, <vscale x 8 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 16 x i8>, 4) @test_vlseg4_mask_nxv8i16_triscv.vector.tuple_nxv16i8_4t(ptr %base, i64 %vl, <vscale x 8 x i1> %mask) { ; CHECK-LABEL: test_vlseg4_mask_nxv8i16_triscv.vector.tuple_nxv16i8_4t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, ma -; CHECK-NEXT: vlseg4e16.v v6, (a0), v0.t +; CHECK-NEXT: vlseg4e16.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 4) @llvm.riscv.vlseg4.mask.triscv.vector.tuple_nxv16i8_4t.nxv8i1(target("riscv.vector.tuple", <vscale x 16 x i8>, 4) undef, ptr %base, <vscale x 8 x i1> %mask, i64 %vl, i64 1, i64 4) - %1 = call <vscale x 8 x i16> @llvm.riscv.tuple.extract.nxv8i16.triscv.vector.tuple_nxv16i8_4t(target("riscv.vector.tuple", <vscale x 16 x i8>, 4) %0, i32 1) - ret <vscale x 8 x i16> %1 + ret target("riscv.vector.tuple", <vscale x 16 x i8>, 4) %0 } - -declare target("riscv.vector.tuple", <vscale x 2 x i8>, 5) @llvm.riscv.vlseg5.mask.triscv.vector.tuple_nxv2i8_5t.nxv1i1(target("riscv.vector.tuple", <vscale x 2 x i8>, 5), ptr, <vscale x 1 x i1>, i64, i64, i64) - -define <vscale x 1 x i16> @test_vlseg5_nxv1i16_triscv.vector.tuple_nxv2i8_5t(ptr %base, i64 %vl) { +define target("riscv.vector.tuple", <vscale x 2 x i8>, 5) @test_vlseg5_nxv1i16_triscv.vector.tuple_nxv2i8_5t(ptr %base, i64 %vl) { ; CHECK-LABEL: test_vlseg5_nxv1i16_triscv.vector.tuple_nxv2i8_5t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, ma -; CHECK-NEXT: vlseg5e16.v v7, (a0) +; CHECK-NEXT: vlseg5e16.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 2 x i8>, 5) @llvm.riscv.vlseg5.triscv.vector.tuple_nxv2i8_5t(target("riscv.vector.tuple", <vscale x 2 x i8>, 5) undef, ptr %base, i64 %vl, i64 4) - %1 = call <vscale x 1 x i16> @llvm.riscv.tuple.extract.nxv1i16.triscv.vector.tuple_nxv2i8_5t(target("riscv.vector.tuple", <vscale x 2 x i8>, 5) %0, i32 1) - ret <vscale x 1 x i16> %1 + ret target("riscv.vector.tuple", <vscale x 2 x i8>, 5) %0 } - -define <vscale x 1 x i16> @test_vlseg5_mask_nxv1i16_triscv.vector.tuple_nxv2i8_5t(ptr %base, i64 %vl, <vscale x 1 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 2 x i8>, 5) @test_vlseg5_mask_nxv1i16_triscv.vector.tuple_nxv2i8_5t(ptr %base, i64 %vl, <vscale x 1 x i1> %mask) { ; CHECK-LABEL: test_vlseg5_mask_nxv1i16_triscv.vector.tuple_nxv2i8_5t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, ma -; CHECK-NEXT: vlseg5e16.v v7, (a0), v0.t +; CHECK-NEXT: vlseg5e16.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 2 x i8>, 5) @llvm.riscv.vlseg5.mask.triscv.vector.tuple_nxv2i8_5t.nxv1i1(target("riscv.vector.tuple", <vscale x 2 x i8>, 5) undef, ptr %base, <vscale x 1 x i1> %mask, i64 %vl, i64 1, i64 4) - %1 = call <vscale x 1 x i16> @llvm.riscv.tuple.extract.nxv1i16.triscv.vector.tuple_nxv2i8_5t(target("riscv.vector.tuple", <vscale x 2 x i8>, 5) %0, i32 1) - ret <vscale x 1 x i16> %1 + ret target("riscv.vector.tuple", <vscale x 2 x i8>, 5) %0 } - -declare target("riscv.vector.tuple", <vscale x 4 x i8>, 5) @llvm.riscv.vlseg5.mask.triscv.vector.tuple_nxv4i8_5t.nxv2i1(target("riscv.vector.tuple", <vscale x 4 x i8>, 5), ptr, <vscale x 2 x i1>, i64, i64, i64) - -define <vscale x 2 x i16> @test_vlseg5_nxv2i16_triscv.vector.tuple_nxv4i8_5t(ptr %base, i64 %vl) { +define target("riscv.vector.tuple", <vscale x 4 x i8>, 5) @test_vlseg5_nxv2i16_triscv.vector.tuple_nxv4i8_5t(ptr %base, i64 %vl) { ; CHECK-LABEL: test_vlseg5_nxv2i16_triscv.vector.tuple_nxv4i8_5t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, ma -; CHECK-NEXT: vlseg5e16.v v7, (a0) +; CHECK-NEXT: vlseg5e16.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 5) @llvm.riscv.vlseg5.triscv.vector.tuple_nxv4i8_5t(target("riscv.vector.tuple", <vscale x 4 x i8>, 5) undef, ptr %base, i64 %vl, i64 4) - %1 = call <vscale x 2 x i16> @llvm.riscv.tuple.extract.nxv2i16.triscv.vector.tuple_nxv4i8_5t(target("riscv.vector.tuple", <vscale x 4 x i8>, 5) %0, i32 1) - ret <vscale x 2 x i16> %1 + ret target("riscv.vector.tuple", <vscale x 4 x i8>, 5) %0 } - -define <vscale x 2 x i16> @test_vlseg5_mask_nxv2i16_triscv.vector.tuple_nxv4i8_5t(ptr %base, i64 %vl, <vscale x 2 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 4 x i8>, 5) @test_vlseg5_mask_nxv2i16_triscv.vector.tuple_nxv4i8_5t(ptr %base, i64 %vl, <vscale x 2 x i1> %mask) { ; CHECK-LABEL: test_vlseg5_mask_nxv2i16_triscv.vector.tuple_nxv4i8_5t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, ma -; CHECK-NEXT: vlseg5e16.v v7, (a0), v0.t +; CHECK-NEXT: vlseg5e16.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 5) @llvm.riscv.vlseg5.mask.triscv.vector.tuple_nxv4i8_5t.nxv2i1(target("riscv.vector.tuple", <vscale x 4 x i8>, 5) undef, ptr %base, <vscale x 2 x i1> %mask, i64 %vl, i64 1, i64 4) - %1 = call <vscale x 2 x i16> @llvm.riscv.tuple.extract.nxv2i16.triscv.vector.tuple_nxv4i8_5t(target("riscv.vector.tuple", <vscale x 4 x i8>, 5) %0, i32 1) - ret <vscale x 2 x i16> %1 + ret target("riscv.vector.tuple", <vscale x 4 x i8>, 5) %0 } - -declare target("riscv.vector.tuple", <vscale x 8 x i8>, 5) @llvm.riscv.vlseg5.mask.triscv.vector.tuple_nxv8i8_5t.nxv4i1(target("riscv.vector.tuple", <vscale x 8 x i8>, 5), ptr, <vscale x 4 x i1>, i64, i64, i64) - -define <vscale x 4 x i16> @test_vlseg5_nxv4i16_triscv.vector.tuple_nxv8i8_5t(ptr %base, i64 %vl) { +define target("riscv.vector.tuple", <vscale x 8 x i8>, 5) @test_vlseg5_nxv4i16_triscv.vector.tuple_nxv8i8_5t(ptr %base, i64 %vl) { ; CHECK-LABEL: test_vlseg5_nxv4i16_triscv.vector.tuple_nxv8i8_5t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma -; CHECK-NEXT: vlseg5e16.v v7, (a0) +; CHECK-NEXT: vlseg5e16.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 5) @llvm.riscv.vlseg5.triscv.vector.tuple_nxv8i8_5t(target("riscv.vector.tuple", <vscale x 8 x i8>, 5) undef, ptr %base, i64 %vl, i64 4) - %1 = call <vscale x 4 x i16> @llvm.riscv.tuple.extract.nxv4i16.triscv.vector.tuple_nxv8i8_5t(target("riscv.vector.tuple", <vscale x 8 x i8>, 5) %0, i32 1) - ret <vscale x 4 x i16> %1 + ret target("riscv.vector.tuple", <vscale x 8 x i8>, 5) %0 } - -define <vscale x 4 x i16> @test_vlseg5_mask_nxv4i16_triscv.vector.tuple_nxv8i8_5t(ptr %base, i64 %vl, <vscale x 4 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 8 x i8>, 5) @test_vlseg5_mask_nxv4i16_triscv.vector.tuple_nxv8i8_5t(ptr %base, i64 %vl, <vscale x 4 x i1> %mask) { ; CHECK-LABEL: test_vlseg5_mask_nxv4i16_triscv.vector.tuple_nxv8i8_5t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma -; CHECK-NEXT: vlseg5e16.v v7, (a0), v0.t +; CHECK-NEXT: vlseg5e16.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 5) @llvm.riscv.vlseg5.mask.triscv.vector.tuple_nxv8i8_5t.nxv4i1(target("riscv.vector.tuple", <vscale x 8 x i8>, 5) undef, ptr %base, <vscale x 4 x i1> %mask, i64 %vl, i64 1, i64 4) - %1 = call <vscale x 4 x i16> @llvm.riscv.tuple.extract.nxv4i16.triscv.vector.tuple_nxv8i8_5t(target("riscv.vector.tuple", <vscale x 8 x i8>, 5) %0, i32 1) - ret <vscale x 4 x i16> %1 + ret target("riscv.vector.tuple", <vscale x 8 x i8>, 5) %0 } - -declare target("riscv.vector.tuple", <vscale x 2 x i8>, 6) @llvm.riscv.vlseg6.mask.triscv.vector.tuple_nxv2i8_6t.nxv1i1(target("riscv.vector.tuple", <vscale x 2 x i8>, 6), ptr, <vscale x 1 x i1>, i64, i64, i64) - -define <vscale x 1 x i16> @test_vlseg6_nxv1i16_triscv.vector.tuple_nxv2i8_6t(ptr %base, i64 %vl) { +define target("riscv.vector.tuple", <vscale x 2 x i8>, 6) @test_vlseg6_nxv1i16_triscv.vector.tuple_nxv2i8_6t(ptr %base, i64 %vl) { ; CHECK-LABEL: test_vlseg6_nxv1i16_triscv.vector.tuple_nxv2i8_6t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, ma -; CHECK-NEXT: vlseg6e16.v v7, (a0) +; CHECK-NEXT: vlseg6e16.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 2 x i8>, 6) @llvm.riscv.vlseg6.triscv.vector.tuple_nxv2i8_6t(target("riscv.vector.tuple", <vscale x 2 x i8>, 6) undef, ptr %base, i64 %vl, i64 4) - %1 = call <vscale x 1 x i16> @llvm.riscv.tuple.extract.nxv1i16.triscv.vector.tuple_nxv2i8_6t(target("riscv.vector.tuple", <vscale x 2 x i8>, 6) %0, i32 1) - ret <vscale x 1 x i16> %1 + ret target("riscv.vector.tuple", <vscale x 2 x i8>, 6) %0 } - -define <vscale x 1 x i16> @test_vlseg6_mask_nxv1i16_triscv.vector.tuple_nxv2i8_6t(ptr %base, i64 %vl, <vscale x 1 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 2 x i8>, 6) @test_vlseg6_mask_nxv1i16_triscv.vector.tuple_nxv2i8_6t(ptr %base, i64 %vl, <vscale x 1 x i1> %mask) { ; CHECK-LABEL: test_vlseg6_mask_nxv1i16_triscv.vector.tuple_nxv2i8_6t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, ma -; CHECK-NEXT: vlseg6e16.v v7, (a0), v0.t +; CHECK-NEXT: vlseg6e16.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 2 x i8>, 6) @llvm.riscv.vlseg6.mask.triscv.vector.tuple_nxv2i8_6t.nxv1i1(target("riscv.vector.tuple", <vscale x 2 x i8>, 6) undef, ptr %base, <vscale x 1 x i1> %mask, i64 %vl, i64 1, i64 4) - %1 = call <vscale x 1 x i16> @llvm.riscv.tuple.extract.nxv1i16.triscv.vector.tuple_nxv2i8_6t(target("riscv.vector.tuple", <vscale x 2 x i8>, 6) %0, i32 1) - ret <vscale x 1 x i16> %1 + ret target("riscv.vector.tuple", <vscale x 2 x i8>, 6) %0 } - -declare target("riscv.vector.tuple", <vscale x 4 x i8>, 6) @llvm.riscv.vlseg6.mask.triscv.vector.tuple_nxv4i8_6t.nxv2i1(target("riscv.vector.tuple", <vscale x 4 x i8>, 6), ptr, <vscale x 2 x i1>, i64, i64, i64) - -define <vscale x 2 x i16> @test_vlseg6_nxv2i16_triscv.vector.tuple_nxv4i8_6t(ptr %base, i64 %vl) { +define target("riscv.vector.tuple", <vscale x 4 x i8>, 6) @test_vlseg6_nxv2i16_triscv.vector.tuple_nxv4i8_6t(ptr %base, i64 %vl) { ; CHECK-LABEL: test_vlseg6_nxv2i16_triscv.vector.tuple_nxv4i8_6t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, ma -; CHECK-NEXT: vlseg6e16.v v7, (a0) +; CHECK-NEXT: vlseg6e16.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 6) @llvm.riscv.vlseg6.triscv.vector.tuple_nxv4i8_6t(target("riscv.vector.tuple", <vscale x 4 x i8>, 6) undef, ptr %base, i64 %vl, i64 4) - %1 = call <vscale x 2 x i16> @llvm.riscv.tuple.extract.nxv2i16.triscv.vector.tuple_nxv4i8_6t(target("riscv.vector.tuple", <vscale x 4 x i8>, 6) %0, i32 1) - ret <vscale x 2 x i16> %1 + ret target("riscv.vector.tuple", <vscale x 4 x i8>, 6) %0 } - -define <vscale x 2 x i16> @test_vlseg6_mask_nxv2i16_triscv.vector.tuple_nxv4i8_6t(ptr %base, i64 %vl, <vscale x 2 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 4 x i8>, 6) @test_vlseg6_mask_nxv2i16_triscv.vector.tuple_nxv4i8_6t(ptr %base, i64 %vl, <vscale x 2 x i1> %mask) { ; CHECK-LABEL: test_vlseg6_mask_nxv2i16_triscv.vector.tuple_nxv4i8_6t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, ma -; CHECK-NEXT: vlseg6e16.v v7, (a0), v0.t +; CHECK-NEXT: vlseg6e16.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 6) @llvm.riscv.vlseg6.mask.triscv.vector.tuple_nxv4i8_6t.nxv2i1(target("riscv.vector.tuple", <vscale x 4 x i8>, 6) undef, ptr %base, <vscale x 2 x i1> %mask, i64 %vl, i64 1, i64 4) - %1 = call <vscale x 2 x i16> @llvm.riscv.tuple.extract.nxv2i16.triscv.vector.tuple_nxv4i8_6t(target("riscv.vector.tuple", <vscale x 4 x i8>, 6) %0, i32 1) - ret <vscale x 2 x i16> %1 + ret target("riscv.vector.tuple", <vscale x 4 x i8>, 6) %0 } - -declare target("riscv.vector.tuple", <vscale x 8 x i8>, 6) @llvm.riscv.vlseg6.mask.triscv.vector.tuple_nxv8i8_6t.nxv4i1(target("riscv.vector.tuple", <vscale x 8 x i8>, 6), ptr, <vscale x 4 x i1>, i64, i64, i64) - -define <vscale x 4 x i16> @test_vlseg6_nxv4i16_triscv.vector.tuple_nxv8i8_6t(ptr %base, i64 %vl) { +define target("riscv.vector.tuple", <vscale x 8 x i8>, 6) @test_vlseg6_nxv4i16_triscv.vector.tuple_nxv8i8_6t(ptr %base, i64 %vl) { ; CHECK-LABEL: test_vlseg6_nxv4i16_triscv.vector.tuple_nxv8i8_6t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma -; CHECK-NEXT: vlseg6e16.v v7, (a0) +; CHECK-NEXT: vlseg6e16.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 6) @llvm.riscv.vlseg6.triscv.vector.tuple_nxv8i8_6t(target("riscv.vector.tuple", <vscale x 8 x i8>, 6) undef, ptr %base, i64 %vl, i64 4) - %1 = call <vscale x 4 x i16> @llvm.riscv.tuple.extract.nxv4i16.triscv.vector.tuple_nxv8i8_6t(target("riscv.vector.tuple", <vscale x 8 x i8>, 6) %0, i32 1) - ret <vscale x 4 x i16> %1 + ret target("riscv.vector.tuple", <vscale x 8 x i8>, 6) %0 } - -define <vscale x 4 x i16> @test_vlseg6_mask_nxv4i16_triscv.vector.tuple_nxv8i8_6t(ptr %base, i64 %vl, <vscale x 4 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 8 x i8>, 6) @test_vlseg6_mask_nxv4i16_triscv.vector.tuple_nxv8i8_6t(ptr %base, i64 %vl, <vscale x 4 x i1> %mask) { ; CHECK-LABEL: test_vlseg6_mask_nxv4i16_triscv.vector.tuple_nxv8i8_6t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma -; CHECK-NEXT: vlseg6e16.v v7, (a0), v0.t +; CHECK-NEXT: vlseg6e16.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 6) @llvm.riscv.vlseg6.mask.triscv.vector.tuple_nxv8i8_6t.nxv4i1(target("riscv.vector.tuple", <vscale x 8 x i8>, 6) undef, ptr %base, <vscale x 4 x i1> %mask, i64 %vl, i64 1, i64 4) - %1 = call <vscale x 4 x i16> @llvm.riscv.tuple.extract.nxv4i16.triscv.vector.tuple_nxv8i8_6t(target("riscv.vector.tuple", <vscale x 8 x i8>, 6) %0, i32 1) - ret <vscale x 4 x i16> %1 + ret target("riscv.vector.tuple", <vscale x 8 x i8>, 6) %0 } - -declare target("riscv.vector.tuple", <vscale x 2 x i8>, 7) @llvm.riscv.vlseg7.mask.triscv.vector.tuple_nxv2i8_7t.nxv1i1(target("riscv.vector.tuple", <vscale x 2 x i8>, 7), ptr, <vscale x 1 x i1>, i64, i64, i64) - -define <vscale x 1 x i16> @test_vlseg7_nxv1i16_triscv.vector.tuple_nxv2i8_7t(ptr %base, i64 %vl) { +define target("riscv.vector.tuple", <vscale x 2 x i8>, 7) @test_vlseg7_nxv1i16_triscv.vector.tuple_nxv2i8_7t(ptr %base, i64 %vl) { ; CHECK-LABEL: test_vlseg7_nxv1i16_triscv.vector.tuple_nxv2i8_7t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, ma -; CHECK-NEXT: vlseg7e16.v v7, (a0) +; CHECK-NEXT: vlseg7e16.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 2 x i8>, 7) @llvm.riscv.vlseg7.triscv.vector.tuple_nxv2i8_7t(target("riscv.vector.tuple", <vscale x 2 x i8>, 7) undef, ptr %base, i64 %vl, i64 4) - %1 = call <vscale x 1 x i16> @llvm.riscv.tuple.extract.nxv1i16.triscv.vector.tuple_nxv2i8_7t(target("riscv.vector.tuple", <vscale x 2 x i8>, 7) %0, i32 1) - ret <vscale x 1 x i16> %1 + ret target("riscv.vector.tuple", <vscale x 2 x i8>, 7) %0 } - -define <vscale x 1 x i16> @test_vlseg7_mask_nxv1i16_triscv.vector.tuple_nxv2i8_7t(ptr %base, i64 %vl, <vscale x 1 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 2 x i8>, 7) @test_vlseg7_mask_nxv1i16_triscv.vector.tuple_nxv2i8_7t(ptr %base, i64 %vl, <vscale x 1 x i1> %mask) { ; CHECK-LABEL: test_vlseg7_mask_nxv1i16_triscv.vector.tuple_nxv2i8_7t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, ma -; CHECK-NEXT: vlseg7e16.v v7, (a0), v0.t +; CHECK-NEXT: vlseg7e16.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 2 x i8>, 7) @llvm.riscv.vlseg7.mask.triscv.vector.tuple_nxv2i8_7t.nxv1i1(target("riscv.vector.tuple", <vscale x 2 x i8>, 7) undef, ptr %base, <vscale x 1 x i1> %mask, i64 %vl, i64 1, i64 4) - %1 = call <vscale x 1 x i16> @llvm.riscv.tuple.extract.nxv1i16.triscv.vector.tuple_nxv2i8_7t(target("riscv.vector.tuple", <vscale x 2 x i8>, 7) %0, i32 1) - ret <vscale x 1 x i16> %1 + ret target("riscv.vector.tuple", <vscale x 2 x i8>, 7) %0 } - -declare target("riscv.vector.tuple", <vscale x 4 x i8>, 7) @llvm.riscv.vlseg7.mask.triscv.vector.tuple_nxv4i8_7t.nxv2i1(target("riscv.vector.tuple", <vscale x 4 x i8>, 7), ptr, <vscale x 2 x i1>, i64, i64, i64) - -define <vscale x 2 x i16> @test_vlseg7_nxv2i16_triscv.vector.tuple_nxv4i8_7t(ptr %base, i64 %vl) { +define target("riscv.vector.tuple", <vscale x 4 x i8>, 7) @test_vlseg7_nxv2i16_triscv.vector.tuple_nxv4i8_7t(ptr %base, i64 %vl) { ; CHECK-LABEL: test_vlseg7_nxv2i16_triscv.vector.tuple_nxv4i8_7t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, ma -; CHECK-NEXT: vlseg7e16.v v7, (a0) +; CHECK-NEXT: vlseg7e16.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 7) @llvm.riscv.vlseg7.triscv.vector.tuple_nxv4i8_7t(target("riscv.vector.tuple", <vscale x 4 x i8>, 7) undef, ptr %base, i64 %vl, i64 4) - %1 = call <vscale x 2 x i16> @llvm.riscv.tuple.extract.nxv2i16.triscv.vector.tuple_nxv4i8_7t(target("riscv.vector.tuple", <vscale x 4 x i8>, 7) %0, i32 1) - ret <vscale x 2 x i16> %1 + ret target("riscv.vector.tuple", <vscale x 4 x i8>, 7) %0 } - -define <vscale x 2 x i16> @test_vlseg7_mask_nxv2i16_triscv.vector.tuple_nxv4i8_7t(ptr %base, i64 %vl, <vscale x 2 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 4 x i8>, 7) @test_vlseg7_mask_nxv2i16_triscv.vector.tuple_nxv4i8_7t(ptr %base, i64 %vl, <vscale x 2 x i1> %mask) { ; CHECK-LABEL: test_vlseg7_mask_nxv2i16_triscv.vector.tuple_nxv4i8_7t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, ma -; CHECK-NEXT: vlseg7e16.v v7, (a0), v0.t +; CHECK-NEXT: vlseg7e16.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 7) @llvm.riscv.vlseg7.mask.triscv.vector.tuple_nxv4i8_7t.nxv2i1(target("riscv.vector.tuple", <vscale x 4 x i8>, 7) undef, ptr %base, <vscale x 2 x i1> %mask, i64 %vl, i64 1, i64 4) - %1 = call <vscale x 2 x i16> @llvm.riscv.tuple.extract.nxv2i16.triscv.vector.tuple_nxv4i8_7t(target("riscv.vector.tuple", <vscale x 4 x i8>, 7) %0, i32 1) - ret <vscale x 2 x i16> %1 + ret target("riscv.vector.tuple", <vscale x 4 x i8>, 7) %0 } - -declare target("riscv.vector.tuple", <vscale x 8 x i8>, 7) @llvm.riscv.vlseg7.mask.triscv.vector.tuple_nxv8i8_7t.nxv4i1(target("riscv.vector.tuple", <vscale x 8 x i8>, 7), ptr, <vscale x 4 x i1>, i64, i64, i64) - -define <vscale x 4 x i16> @test_vlseg7_nxv4i16_triscv.vector.tuple_nxv8i8_7t(ptr %base, i64 %vl) { +define target("riscv.vector.tuple", <vscale x 8 x i8>, 7) @test_vlseg7_nxv4i16_triscv.vector.tuple_nxv8i8_7t(ptr %base, i64 %vl) { ; CHECK-LABEL: test_vlseg7_nxv4i16_triscv.vector.tuple_nxv8i8_7t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma -; CHECK-NEXT: vlseg7e16.v v7, (a0) +; CHECK-NEXT: vlseg7e16.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 7) @llvm.riscv.vlseg7.triscv.vector.tuple_nxv8i8_7t(target("riscv.vector.tuple", <vscale x 8 x i8>, 7) undef, ptr %base, i64 %vl, i64 4) - %1 = call <vscale x 4 x i16> @llvm.riscv.tuple.extract.nxv4i16.triscv.vector.tuple_nxv8i8_7t(target("riscv.vector.tuple", <vscale x 8 x i8>, 7) %0, i32 1) - ret <vscale x 4 x i16> %1 + ret target("riscv.vector.tuple", <vscale x 8 x i8>, 7) %0 } - -define <vscale x 4 x i16> @test_vlseg7_mask_nxv4i16_triscv.vector.tuple_nxv8i8_7t(ptr %base, i64 %vl, <vscale x 4 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 8 x i8>, 7) @test_vlseg7_mask_nxv4i16_triscv.vector.tuple_nxv8i8_7t(ptr %base, i64 %vl, <vscale x 4 x i1> %mask) { ; CHECK-LABEL: test_vlseg7_mask_nxv4i16_triscv.vector.tuple_nxv8i8_7t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma -; CHECK-NEXT: vlseg7e16.v v7, (a0), v0.t +; CHECK-NEXT: vlseg7e16.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 7) @llvm.riscv.vlseg7.mask.triscv.vector.tuple_nxv8i8_7t.nxv4i1(target("riscv.vector.tuple", <vscale x 8 x i8>, 7) undef, ptr %base, <vscale x 4 x i1> %mask, i64 %vl, i64 1, i64 4) - %1 = call <vscale x 4 x i16> @llvm.riscv.tuple.extract.nxv4i16.triscv.vector.tuple_nxv8i8_7t(target("riscv.vector.tuple", <vscale x 8 x i8>, 7) %0, i32 1) - ret <vscale x 4 x i16> %1 + ret target("riscv.vector.tuple", <vscale x 8 x i8>, 7) %0 } - -declare target("riscv.vector.tuple", <vscale x 2 x i8>, 8) @llvm.riscv.vlseg8.mask.triscv.vector.tuple_nxv2i8_8t.nxv1i1(target("riscv.vector.tuple", <vscale x 2 x i8>, 8), ptr, <vscale x 1 x i1>, i64, i64, i64) - -define <vscale x 1 x i16> @test_vlseg8_nxv1i16_triscv.vector.tuple_nxv2i8_8t(ptr %base, i64 %vl) { +define target("riscv.vector.tuple", <vscale x 2 x i8>, 8) @test_vlseg8_nxv1i16_triscv.vector.tuple_nxv2i8_8t(ptr %base, i64 %vl) { ; CHECK-LABEL: test_vlseg8_nxv1i16_triscv.vector.tuple_nxv2i8_8t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, ma -; CHECK-NEXT: vlseg8e16.v v7, (a0) +; CHECK-NEXT: vlseg8e16.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 2 x i8>, 8) @llvm.riscv.vlseg8.triscv.vector.tuple_nxv2i8_8t(target("riscv.vector.tuple", <vscale x 2 x i8>, 8) undef, ptr %base, i64 %vl, i64 4) - %1 = call <vscale x 1 x i16> @llvm.riscv.tuple.extract.nxv1i16.triscv.vector.tuple_nxv2i8_8t(target("riscv.vector.tuple", <vscale x 2 x i8>, 8) %0, i32 1) - ret <vscale x 1 x i16> %1 + ret target("riscv.vector.tuple", <vscale x 2 x i8>, 8) %0 } - -define <vscale x 1 x i16> @test_vlseg8_mask_nxv1i16_triscv.vector.tuple_nxv2i8_8t(ptr %base, i64 %vl, <vscale x 1 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 2 x i8>, 8) @test_vlseg8_mask_nxv1i16_triscv.vector.tuple_nxv2i8_8t(ptr %base, i64 %vl, <vscale x 1 x i1> %mask) { ; CHECK-LABEL: test_vlseg8_mask_nxv1i16_triscv.vector.tuple_nxv2i8_8t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, ma -; CHECK-NEXT: vlseg8e16.v v7, (a0), v0.t +; CHECK-NEXT: vlseg8e16.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 2 x i8>, 8) @llvm.riscv.vlseg8.mask.triscv.vector.tuple_nxv2i8_8t.nxv1i1(target("riscv.vector.tuple", <vscale x 2 x i8>, 8) undef, ptr %base, <vscale x 1 x i1> %mask, i64 %vl, i64 1, i64 4) - %1 = call <vscale x 1 x i16> @llvm.riscv.tuple.extract.nxv1i16.triscv.vector.tuple_nxv2i8_8t(target("riscv.vector.tuple", <vscale x 2 x i8>, 8) %0, i32 1) - ret <vscale x 1 x i16> %1 + ret target("riscv.vector.tuple", <vscale x 2 x i8>, 8) %0 } - -declare target("riscv.vector.tuple", <vscale x 4 x i8>, 8) @llvm.riscv.vlseg8.mask.triscv.vector.tuple_nxv4i8_8t.nxv2i1(target("riscv.vector.tuple", <vscale x 4 x i8>, 8), ptr, <vscale x 2 x i1>, i64, i64, i64) - -define <vscale x 2 x i16> @test_vlseg8_nxv2i16_triscv.vector.tuple_nxv4i8_8t(ptr %base, i64 %vl) { +define target("riscv.vector.tuple", <vscale x 4 x i8>, 8) @test_vlseg8_nxv2i16_triscv.vector.tuple_nxv4i8_8t(ptr %base, i64 %vl) { ; CHECK-LABEL: test_vlseg8_nxv2i16_triscv.vector.tuple_nxv4i8_8t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, ma -; CHECK-NEXT: vlseg8e16.v v7, (a0) +; CHECK-NEXT: vlseg8e16.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 8) @llvm.riscv.vlseg8.triscv.vector.tuple_nxv4i8_8t(target("riscv.vector.tuple", <vscale x 4 x i8>, 8) undef, ptr %base, i64 %vl, i64 4) - %1 = call <vscale x 2 x i16> @llvm.riscv.tuple.extract.nxv2i16.triscv.vector.tuple_nxv4i8_8t(target("riscv.vector.tuple", <vscale x 4 x i8>, 8) %0, i32 1) - ret <vscale x 2 x i16> %1 + ret target("riscv.vector.tuple", <vscale x 4 x i8>, 8) %0 } - -define <vscale x 2 x i16> @test_vlseg8_mask_nxv2i16_triscv.vector.tuple_nxv4i8_8t(ptr %base, i64 %vl, <vscale x 2 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 4 x i8>, 8) @test_vlseg8_mask_nxv2i16_triscv.vector.tuple_nxv4i8_8t(ptr %base, i64 %vl, <vscale x 2 x i1> %mask) { ; CHECK-LABEL: test_vlseg8_mask_nxv2i16_triscv.vector.tuple_nxv4i8_8t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, ma -; CHECK-NEXT: vlseg8e16.v v7, (a0), v0.t +; CHECK-NEXT: vlseg8e16.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 8) @llvm.riscv.vlseg8.mask.triscv.vector.tuple_nxv4i8_8t.nxv2i1(target("riscv.vector.tuple", <vscale x 4 x i8>, 8) undef, ptr %base, <vscale x 2 x i1> %mask, i64 %vl, i64 1, i64 4) - %1 = call <vscale x 2 x i16> @llvm.riscv.tuple.extract.nxv2i16.triscv.vector.tuple_nxv4i8_8t(target("riscv.vector.tuple", <vscale x 4 x i8>, 8) %0, i32 1) - ret <vscale x 2 x i16> %1 + ret target("riscv.vector.tuple", <vscale x 4 x i8>, 8) %0 } - -declare target("riscv.vector.tuple", <vscale x 8 x i8>, 8) @llvm.riscv.vlseg8.mask.triscv.vector.tuple_nxv8i8_8t.nxv4i1(target("riscv.vector.tuple", <vscale x 8 x i8>, 8), ptr, <vscale x 4 x i1>, i64, i64, i64) - -define <vscale x 4 x i16> @test_vlseg8_nxv4i16_triscv.vector.tuple_nxv8i8_8t(ptr %base, i64 %vl) { +define target("riscv.vector.tuple", <vscale x 8 x i8>, 8) @test_vlseg8_nxv4i16_triscv.vector.tuple_nxv8i8_8t(ptr %base, i64 %vl) { ; CHECK-LABEL: test_vlseg8_nxv4i16_triscv.vector.tuple_nxv8i8_8t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma -; CHECK-NEXT: vlseg8e16.v v7, (a0) +; CHECK-NEXT: vlseg8e16.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 8) @llvm.riscv.vlseg8.triscv.vector.tuple_nxv8i8_8t(target("riscv.vector.tuple", <vscale x 8 x i8>, 8) undef, ptr %base, i64 %vl, i64 4) - %1 = call <vscale x 4 x i16> @llvm.riscv.tuple.extract.nxv4i16.triscv.vector.tuple_nxv8i8_8t(target("riscv.vector.tuple", <vscale x 8 x i8>, 8) %0, i32 1) - ret <vscale x 4 x i16> %1 + ret target("riscv.vector.tuple", <vscale x 8 x i8>, 8) %0 } - -define <vscale x 4 x i16> @test_vlseg8_mask_nxv4i16_triscv.vector.tuple_nxv8i8_8t(ptr %base, i64 %vl, <vscale x 4 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 8 x i8>, 8) @test_vlseg8_mask_nxv4i16_triscv.vector.tuple_nxv8i8_8t(ptr %base, i64 %vl, <vscale x 4 x i1> %mask) { ; CHECK-LABEL: test_vlseg8_mask_nxv4i16_triscv.vector.tuple_nxv8i8_8t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma -; CHECK-NEXT: vlseg8e16.v v7, (a0), v0.t +; CHECK-NEXT: vlseg8e16.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 8) @llvm.riscv.vlseg8.mask.triscv.vector.tuple_nxv8i8_8t.nxv4i1(target("riscv.vector.tuple", <vscale x 8 x i8>, 8) undef, ptr %base, <vscale x 4 x i1> %mask, i64 %vl, i64 1, i64 4) - %1 = call <vscale x 4 x i16> @llvm.riscv.tuple.extract.nxv4i16.triscv.vector.tuple_nxv8i8_8t(target("riscv.vector.tuple", <vscale x 8 x i8>, 8) %0, i32 1) - ret <vscale x 4 x i16> %1 + ret target("riscv.vector.tuple", <vscale x 8 x i8>, 8) %0 } - -declare target("riscv.vector.tuple", <vscale x 4 x i8>, 2) @llvm.riscv.vlseg2.mask.triscv.vector.tuple_nxv4i8_2t.nxv1i1(target("riscv.vector.tuple", <vscale x 4 x i8>, 2), ptr, <vscale x 1 x i1>, i64, i64, i64) - -define <vscale x 1 x i32> @test_vlseg2_nxv1i32_triscv.vector.tuple_nxv4i8_2t(ptr %base, i64 %vl) { +define target("riscv.vector.tuple", <vscale x 4 x i8>, 2) @test_vlseg2_nxv1i32_triscv.vector.tuple_nxv4i8_2t(ptr %base, i64 %vl) { ; CHECK-LABEL: test_vlseg2_nxv1i32_triscv.vector.tuple_nxv4i8_2t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, ma -; CHECK-NEXT: vlseg2e32.v v7, (a0) +; CHECK-NEXT: vlseg2e32.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 2) @llvm.riscv.vlseg2.triscv.vector.tuple_nxv4i8_2t(target("riscv.vector.tuple", <vscale x 4 x i8>, 2) undef, ptr %base, i64 %vl, i64 5) - %1 = call <vscale x 1 x i32> @llvm.riscv.tuple.extract.nxv1i32.triscv.vector.tuple_nxv4i8_2t(target("riscv.vector.tuple", <vscale x 4 x i8>, 2) %0, i32 1) - ret <vscale x 1 x i32> %1 + ret target("riscv.vector.tuple", <vscale x 4 x i8>, 2) %0 } - -define <vscale x 1 x i32> @test_vlseg2_mask_nxv1i32_triscv.vector.tuple_nxv4i8_2t(ptr %base, i64 %vl, <vscale x 1 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 4 x i8>, 2) @test_vlseg2_mask_nxv1i32_triscv.vector.tuple_nxv4i8_2t(ptr %base, i64 %vl, <vscale x 1 x i1> %mask) { ; CHECK-LABEL: test_vlseg2_mask_nxv1i32_triscv.vector.tuple_nxv4i8_2t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, ma -; CHECK-NEXT: vlseg2e32.v v7, (a0), v0.t +; CHECK-NEXT: vlseg2e32.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 2) @llvm.riscv.vlseg2.mask.triscv.vector.tuple_nxv4i8_2t.nxv1i1(target("riscv.vector.tuple", <vscale x 4 x i8>, 2) undef, ptr %base, <vscale x 1 x i1> %mask, i64 %vl, i64 1, i64 5) - %1 = call <vscale x 1 x i32> @llvm.riscv.tuple.extract.nxv1i32.triscv.vector.tuple_nxv4i8_2t(target("riscv.vector.tuple", <vscale x 4 x i8>, 2) %0, i32 1) - ret <vscale x 1 x i32> %1 + ret target("riscv.vector.tuple", <vscale x 4 x i8>, 2) %0 } - -declare target("riscv.vector.tuple", <vscale x 8 x i8>, 2) @llvm.riscv.vlseg2.mask.triscv.vector.tuple_nxv8i8_2t.nxv2i1(target("riscv.vector.tuple", <vscale x 8 x i8>, 2), ptr, <vscale x 2 x i1>, i64, i64, i64) - -define <vscale x 2 x i32> @test_vlseg2_nxv2i32_triscv.vector.tuple_nxv8i8_2t(ptr %base, i64 %vl) { +define target("riscv.vector.tuple", <vscale x 8 x i8>, 2) @test_vlseg2_nxv2i32_triscv.vector.tuple_nxv8i8_2t(ptr %base, i64 %vl) { ; CHECK-LABEL: test_vlseg2_nxv2i32_triscv.vector.tuple_nxv8i8_2t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma -; CHECK-NEXT: vlseg2e32.v v7, (a0) +; CHECK-NEXT: vlseg2e32.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 2) @llvm.riscv.vlseg2.triscv.vector.tuple_nxv8i8_2t(target("riscv.vector.tuple", <vscale x 8 x i8>, 2) undef, ptr %base, i64 %vl, i64 5) - %1 = call <vscale x 2 x i32> @llvm.riscv.tuple.extract.nxv2i32.triscv.vector.tuple_nxv8i8_2t(target("riscv.vector.tuple", <vscale x 8 x i8>, 2) %0, i32 1) - ret <vscale x 2 x i32> %1 + ret target("riscv.vector.tuple", <vscale x 8 x i8>, 2) %0 } - -define <vscale x 2 x i32> @test_vlseg2_mask_nxv2i32_triscv.vector.tuple_nxv8i8_2t(ptr %base, i64 %vl, <vscale x 2 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 8 x i8>, 2) @test_vlseg2_mask_nxv2i32_triscv.vector.tuple_nxv8i8_2t(ptr %base, i64 %vl, <vscale x 2 x i1> %mask) { ; CHECK-LABEL: test_vlseg2_mask_nxv2i32_triscv.vector.tuple_nxv8i8_2t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma -; CHECK-NEXT: vlseg2e32.v v7, (a0), v0.t +; CHECK-NEXT: vlseg2e32.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 2) @llvm.riscv.vlseg2.mask.triscv.vector.tuple_nxv8i8_2t.nxv2i1(target("riscv.vector.tuple", <vscale x 8 x i8>, 2) undef, ptr %base, <vscale x 2 x i1> %mask, i64 %vl, i64 1, i64 5) - %1 = call <vscale x 2 x i32> @llvm.riscv.tuple.extract.nxv2i32.triscv.vector.tuple_nxv8i8_2t(target("riscv.vector.tuple", <vscale x 8 x i8>, 2) %0, i32 1) - ret <vscale x 2 x i32> %1 + ret target("riscv.vector.tuple", <vscale x 8 x i8>, 2) %0 } - -declare target("riscv.vector.tuple", <vscale x 16 x i8>, 2) @llvm.riscv.vlseg2.mask.triscv.vector.tuple_nxv16i8_2t.nxv4i1(target("riscv.vector.tuple", <vscale x 16 x i8>, 2), ptr, <vscale x 4 x i1>, i64, i64, i64) - -define <vscale x 4 x i32> @test_vlseg2_nxv4i32_triscv.vector.tuple_nxv16i8_2t(ptr %base, i64 %vl) { +define target("riscv.vector.tuple", <vscale x 16 x i8>, 2) @test_vlseg2_nxv4i32_triscv.vector.tuple_nxv16i8_2t(ptr %base, i64 %vl) { ; CHECK-LABEL: test_vlseg2_nxv4i32_triscv.vector.tuple_nxv16i8_2t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, ma -; CHECK-NEXT: vlseg2e32.v v6, (a0) +; CHECK-NEXT: vlseg2e32.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 2) @llvm.riscv.vlseg2.triscv.vector.tuple_nxv16i8_2t(target("riscv.vector.tuple", <vscale x 16 x i8>, 2) undef, ptr %base, i64 %vl, i64 5) - %1 = call <vscale x 4 x i32> @llvm.riscv.tuple.extract.nxv4i32.triscv.vector.tuple_nxv16i8_2t(target("riscv.vector.tuple", <vscale x 16 x i8>, 2) %0, i32 1) - ret <vscale x 4 x i32> %1 + ret target("riscv.vector.tuple", <vscale x 16 x i8>, 2) %0 } - -define <vscale x 4 x i32> @test_vlseg2_mask_nxv4i32_triscv.vector.tuple_nxv16i8_2t(ptr %base, i64 %vl, <vscale x 4 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 16 x i8>, 2) @test_vlseg2_mask_nxv4i32_triscv.vector.tuple_nxv16i8_2t(ptr %base, i64 %vl, <vscale x 4 x i1> %mask) { ; CHECK-LABEL: test_vlseg2_mask_nxv4i32_triscv.vector.tuple_nxv16i8_2t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, ma -; CHECK-NEXT: vlseg2e32.v v6, (a0), v0.t +; CHECK-NEXT: vlseg2e32.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 2) @llvm.riscv.vlseg2.mask.triscv.vector.tuple_nxv16i8_2t.nxv4i1(target("riscv.vector.tuple", <vscale x 16 x i8>, 2) undef, ptr %base, <vscale x 4 x i1> %mask, i64 %vl, i64 1, i64 5) - %1 = call <vscale x 4 x i32> @llvm.riscv.tuple.extract.nxv4i32.triscv.vector.tuple_nxv16i8_2t(target("riscv.vector.tuple", <vscale x 16 x i8>, 2) %0, i32 1) - ret <vscale x 4 x i32> %1 + ret target("riscv.vector.tuple", <vscale x 16 x i8>, 2) %0 } - -declare target("riscv.vector.tuple", <vscale x 32 x i8>, 2) @llvm.riscv.vlseg2.mask.triscv.vector.tuple_nxv32i8_2t.nxv8i1(target("riscv.vector.tuple", <vscale x 32 x i8>, 2), ptr, <vscale x 8 x i1>, i64, i64, i64) - -define <vscale x 8 x i32> @test_vlseg2_nxv8i32_triscv.vector.tuple_nxv32i8_2t(ptr %base, i64 %vl) { +define target("riscv.vector.tuple", <vscale x 32 x i8>, 2) @test_vlseg2_nxv8i32_triscv.vector.tuple_nxv32i8_2t(ptr %base, i64 %vl) { ; CHECK-LABEL: test_vlseg2_nxv8i32_triscv.vector.tuple_nxv32i8_2t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, ma -; CHECK-NEXT: vlseg2e32.v v4, (a0) +; CHECK-NEXT: vlseg2e32.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 32 x i8>, 2) @llvm.riscv.vlseg2.triscv.vector.tuple_nxv32i8_2t(target("riscv.vector.tuple", <vscale x 32 x i8>, 2) undef, ptr %base, i64 %vl, i64 5) - %1 = call <vscale x 8 x i32> @llvm.riscv.tuple.extract.nxv8i32.triscv.vector.tuple_nxv32i8_2t(target("riscv.vector.tuple", <vscale x 32 x i8>, 2) %0, i32 1) - ret <vscale x 8 x i32> %1 + ret target("riscv.vector.tuple", <vscale x 32 x i8>, 2) %0 } - -define <vscale x 8 x i32> @test_vlseg2_mask_nxv8i32_triscv.vector.tuple_nxv32i8_2t(ptr %base, i64 %vl, <vscale x 8 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 32 x i8>, 2) @test_vlseg2_mask_nxv8i32_triscv.vector.tuple_nxv32i8_2t(ptr %base, i64 %vl, <vscale x 8 x i1> %mask) { ; CHECK-LABEL: test_vlseg2_mask_nxv8i32_triscv.vector.tuple_nxv32i8_2t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, ma -; CHECK-NEXT: vlseg2e32.v v4, (a0), v0.t +; CHECK-NEXT: vlseg2e32.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 32 x i8>, 2) @llvm.riscv.vlseg2.mask.triscv.vector.tuple_nxv32i8_2t.nxv8i1(target("riscv.vector.tuple", <vscale x 32 x i8>, 2) undef, ptr %base, <vscale x 8 x i1> %mask, i64 %vl, i64 1, i64 5) - %1 = call <vscale x 8 x i32> @llvm.riscv.tuple.extract.nxv8i32.triscv.vector.tuple_nxv32i8_2t(target("riscv.vector.tuple", <vscale x 32 x i8>, 2) %0, i32 1) - ret <vscale x 8 x i32> %1 + ret target("riscv.vector.tuple", <vscale x 32 x i8>, 2) %0 } - -declare target("riscv.vector.tuple", <vscale x 4 x i8>, 3) @llvm.riscv.vlseg3.mask.triscv.vector.tuple_nxv4i8_3t.nxv1i1(target("riscv.vector.tuple", <vscale x 4 x i8>, 3), ptr, <vscale x 1 x i1>, i64, i64, i64) - -define <vscale x 1 x i32> @test_vlseg3_nxv1i32_triscv.vector.tuple_nxv4i8_3t(ptr %base, i64 %vl) { +define target("riscv.vector.tuple", <vscale x 4 x i8>, 3) @test_vlseg3_nxv1i32_triscv.vector.tuple_nxv4i8_3t(ptr %base, i64 %vl) { ; CHECK-LABEL: test_vlseg3_nxv1i32_triscv.vector.tuple_nxv4i8_3t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, ma -; CHECK-NEXT: vlseg3e32.v v7, (a0) +; CHECK-NEXT: vlseg3e32.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 3) @llvm.riscv.vlseg3.triscv.vector.tuple_nxv4i8_3t(target("riscv.vector.tuple", <vscale x 4 x i8>, 3) undef, ptr %base, i64 %vl, i64 5) - %1 = call <vscale x 1 x i32> @llvm.riscv.tuple.extract.nxv1i32.triscv.vector.tuple_nxv4i8_3t(target("riscv.vector.tuple", <vscale x 4 x i8>, 3) %0, i32 1) - ret <vscale x 1 x i32> %1 + ret target("riscv.vector.tuple", <vscale x 4 x i8>, 3) %0 } - -define <vscale x 1 x i32> @test_vlseg3_mask_nxv1i32_triscv.vector.tuple_nxv4i8_3t(ptr %base, i64 %vl, <vscale x 1 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 4 x i8>, 3) @test_vlseg3_mask_nxv1i32_triscv.vector.tuple_nxv4i8_3t(ptr %base, i64 %vl, <vscale x 1 x i1> %mask) { ; CHECK-LABEL: test_vlseg3_mask_nxv1i32_triscv.vector.tuple_nxv4i8_3t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, ma -; CHECK-NEXT: vlseg3e32.v v7, (a0), v0.t +; CHECK-NEXT: vlseg3e32.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 3) @llvm.riscv.vlseg3.mask.triscv.vector.tuple_nxv4i8_3t.nxv1i1(target("riscv.vector.tuple", <vscale x 4 x i8>, 3) undef, ptr %base, <vscale x 1 x i1> %mask, i64 %vl, i64 1, i64 5) - %1 = call <vscale x 1 x i32> @llvm.riscv.tuple.extract.nxv1i32.triscv.vector.tuple_nxv4i8_3t(target("riscv.vector.tuple", <vscale x 4 x i8>, 3) %0, i32 1) - ret <vscale x 1 x i32> %1 + ret target("riscv.vector.tuple", <vscale x 4 x i8>, 3) %0 } - -declare target("riscv.vector.tuple", <vscale x 8 x i8>, 3) @llvm.riscv.vlseg3.mask.triscv.vector.tuple_nxv8i8_3t.nxv2i1(target("riscv.vector.tuple", <vscale x 8 x i8>, 3), ptr, <vscale x 2 x i1>, i64, i64, i64) - -define <vscale x 2 x i32> @test_vlseg3_nxv2i32_triscv.vector.tuple_nxv8i8_3t(ptr %base, i64 %vl) { +define target("riscv.vector.tuple", <vscale x 8 x i8>, 3) @test_vlseg3_nxv2i32_triscv.vector.tuple_nxv8i8_3t(ptr %base, i64 %vl) { ; CHECK-LABEL: test_vlseg3_nxv2i32_triscv.vector.tuple_nxv8i8_3t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma -; CHECK-NEXT: vlseg3e32.v v7, (a0) +; CHECK-NEXT: vlseg3e32.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 3) @llvm.riscv.vlseg3.triscv.vector.tuple_nxv8i8_3t(target("riscv.vector.tuple", <vscale x 8 x i8>, 3) undef, ptr %base, i64 %vl, i64 5) - %1 = call <vscale x 2 x i32> @llvm.riscv.tuple.extract.nxv2i32.triscv.vector.tuple_nxv8i8_3t(target("riscv.vector.tuple", <vscale x 8 x i8>, 3) %0, i32 1) - ret <vscale x 2 x i32> %1 + ret target("riscv.vector.tuple", <vscale x 8 x i8>, 3) %0 } - -define <vscale x 2 x i32> @test_vlseg3_mask_nxv2i32_triscv.vector.tuple_nxv8i8_3t(ptr %base, i64 %vl, <vscale x 2 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 8 x i8>, 3) @test_vlseg3_mask_nxv2i32_triscv.vector.tuple_nxv8i8_3t(ptr %base, i64 %vl, <vscale x 2 x i1> %mask) { ; CHECK-LABEL: test_vlseg3_mask_nxv2i32_triscv.vector.tuple_nxv8i8_3t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma -; CHECK-NEXT: vlseg3e32.v v7, (a0), v0.t +; CHECK-NEXT: vlseg3e32.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 3) @llvm.riscv.vlseg3.mask.triscv.vector.tuple_nxv8i8_3t.nxv2i1(target("riscv.vector.tuple", <vscale x 8 x i8>, 3) undef, ptr %base, <vscale x 2 x i1> %mask, i64 %vl, i64 1, i64 5) - %1 = call <vscale x 2 x i32> @llvm.riscv.tuple.extract.nxv2i32.triscv.vector.tuple_nxv8i8_3t(target("riscv.vector.tuple", <vscale x 8 x i8>, 3) %0, i32 1) - ret <vscale x 2 x i32> %1 + ret target("riscv.vector.tuple", <vscale x 8 x i8>, 3) %0 } - -declare target("riscv.vector.tuple", <vscale x 16 x i8>, 3) @llvm.riscv.vlseg3.mask.triscv.vector.tuple_nxv16i8_3t.nxv4i1(target("riscv.vector.tuple", <vscale x 16 x i8>, 3), ptr, <vscale x 4 x i1>, i64, i64, i64) - -define <vscale x 4 x i32> @test_vlseg3_nxv4i32_triscv.vector.tuple_nxv16i8_3t(ptr %base, i64 %vl) { +define target("riscv.vector.tuple", <vscale x 16 x i8>, 3) @test_vlseg3_nxv4i32_triscv.vector.tuple_nxv16i8_3t(ptr %base, i64 %vl) { ; CHECK-LABEL: test_vlseg3_nxv4i32_triscv.vector.tuple_nxv16i8_3t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, ma -; CHECK-NEXT: vlseg3e32.v v6, (a0) +; CHECK-NEXT: vlseg3e32.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 3) @llvm.riscv.vlseg3.triscv.vector.tuple_nxv16i8_3t(target("riscv.vector.tuple", <vscale x 16 x i8>, 3) undef, ptr %base, i64 %vl, i64 5) - %1 = call <vscale x 4 x i32> @llvm.riscv.tuple.extract.nxv4i32.triscv.vector.tuple_nxv16i8_3t(target("riscv.vector.tuple", <vscale x 16 x i8>, 3) %0, i32 1) - ret <vscale x 4 x i32> %1 + ret target("riscv.vector.tuple", <vscale x 16 x i8>, 3) %0 } - -define <vscale x 4 x i32> @test_vlseg3_mask_nxv4i32_triscv.vector.tuple_nxv16i8_3t(ptr %base, i64 %vl, <vscale x 4 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 16 x i8>, 3) @test_vlseg3_mask_nxv4i32_triscv.vector.tuple_nxv16i8_3t(ptr %base, i64 %vl, <vscale x 4 x i1> %mask) { ; CHECK-LABEL: test_vlseg3_mask_nxv4i32_triscv.vector.tuple_nxv16i8_3t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, ma -; CHECK-NEXT: vlseg3e32.v v6, (a0), v0.t +; CHECK-NEXT: vlseg3e32.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 3) @llvm.riscv.vlseg3.mask.triscv.vector.tuple_nxv16i8_3t.nxv4i1(target("riscv.vector.tuple", <vscale x 16 x i8>, 3) undef, ptr %base, <vscale x 4 x i1> %mask, i64 %vl, i64 1, i64 5) - %1 = call <vscale x 4 x i32> @llvm.riscv.tuple.extract.nxv4i32.triscv.vector.tuple_nxv16i8_3t(target("riscv.vector.tuple", <vscale x 16 x i8>, 3) %0, i32 1) - ret <vscale x 4 x i32> %1 + ret target("riscv.vector.tuple", <vscale x 16 x i8>, 3) %0 } - -declare target("riscv.vector.tuple", <vscale x 4 x i8>, 4) @llvm.riscv.vlseg4.mask.triscv.vector.tuple_nxv4i8_4t.nxv1i1(target("riscv.vector.tuple", <vscale x 4 x i8>, 4), ptr, <vscale x 1 x i1>, i64, i64, i64) - -define <vscale x 1 x i32> @test_vlseg4_nxv1i32_triscv.vector.tuple_nxv4i8_4t(ptr %base, i64 %vl) { +define target("riscv.vector.tuple", <vscale x 4 x i8>, 4) @test_vlseg4_nxv1i32_triscv.vector.tuple_nxv4i8_4t(ptr %base, i64 %vl) { ; CHECK-LABEL: test_vlseg4_nxv1i32_triscv.vector.tuple_nxv4i8_4t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, ma -; CHECK-NEXT: vlseg4e32.v v7, (a0) +; CHECK-NEXT: vlseg4e32.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 4) @llvm.riscv.vlseg4.triscv.vector.tuple_nxv4i8_4t(target("riscv.vector.tuple", <vscale x 4 x i8>, 4) undef, ptr %base, i64 %vl, i64 5) - %1 = call <vscale x 1 x i32> @llvm.riscv.tuple.extract.nxv1i32.triscv.vector.tuple_nxv4i8_4t(target("riscv.vector.tuple", <vscale x 4 x i8>, 4) %0, i32 1) - ret <vscale x 1 x i32> %1 + ret target("riscv.vector.tuple", <vscale x 4 x i8>, 4) %0 } - -define <vscale x 1 x i32> @test_vlseg4_mask_nxv1i32_triscv.vector.tuple_nxv4i8_4t(ptr %base, i64 %vl, <vscale x 1 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 4 x i8>, 4) @test_vlseg4_mask_nxv1i32_triscv.vector.tuple_nxv4i8_4t(ptr %base, i64 %vl, <vscale x 1 x i1> %mask) { ; CHECK-LABEL: test_vlseg4_mask_nxv1i32_triscv.vector.tuple_nxv4i8_4t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, ma -; CHECK-NEXT: vlseg4e32.v v7, (a0), v0.t +; CHECK-NEXT: vlseg4e32.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 4) @llvm.riscv.vlseg4.mask.triscv.vector.tuple_nxv4i8_4t.nxv1i1(target("riscv.vector.tuple", <vscale x 4 x i8>, 4) undef, ptr %base, <vscale x 1 x i1> %mask, i64 %vl, i64 1, i64 5) - %1 = call <vscale x 1 x i32> @llvm.riscv.tuple.extract.nxv1i32.triscv.vector.tuple_nxv4i8_4t(target("riscv.vector.tuple", <vscale x 4 x i8>, 4) %0, i32 1) - ret <vscale x 1 x i32> %1 + ret target("riscv.vector.tuple", <vscale x 4 x i8>, 4) %0 } - -declare target("riscv.vector.tuple", <vscale x 8 x i8>, 4) @llvm.riscv.vlseg4.mask.triscv.vector.tuple_nxv8i8_4t.nxv2i1(target("riscv.vector.tuple", <vscale x 8 x i8>, 4), ptr, <vscale x 2 x i1>, i64, i64, i64) - -define <vscale x 2 x i32> @test_vlseg4_nxv2i32_triscv.vector.tuple_nxv8i8_4t(ptr %base, i64 %vl) { +define target("riscv.vector.tuple", <vscale x 8 x i8>, 4) @test_vlseg4_nxv2i32_triscv.vector.tuple_nxv8i8_4t(ptr %base, i64 %vl) { ; CHECK-LABEL: test_vlseg4_nxv2i32_triscv.vector.tuple_nxv8i8_4t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma -; CHECK-NEXT: vlseg4e32.v v7, (a0) +; CHECK-NEXT: vlseg4e32.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 4) @llvm.riscv.vlseg4.triscv.vector.tuple_nxv8i8_4t(target("riscv.vector.tuple", <vscale x 8 x i8>, 4) undef, ptr %base, i64 %vl, i64 5) - %1 = call <vscale x 2 x i32> @llvm.riscv.tuple.extract.nxv2i32.triscv.vector.tuple_nxv8i8_4t(target("riscv.vector.tuple", <vscale x 8 x i8>, 4) %0, i32 1) - ret <vscale x 2 x i32> %1 + ret target("riscv.vector.tuple", <vscale x 8 x i8>, 4) %0 } - -define <vscale x 2 x i32> @test_vlseg4_mask_nxv2i32_triscv.vector.tuple_nxv8i8_4t(ptr %base, i64 %vl, <vscale x 2 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 8 x i8>, 4) @test_vlseg4_mask_nxv2i32_triscv.vector.tuple_nxv8i8_4t(ptr %base, i64 %vl, <vscale x 2 x i1> %mask) { ; CHECK-LABEL: test_vlseg4_mask_nxv2i32_triscv.vector.tuple_nxv8i8_4t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma -; CHECK-NEXT: vlseg4e32.v v7, (a0), v0.t +; CHECK-NEXT: vlseg4e32.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 4) @llvm.riscv.vlseg4.mask.triscv.vector.tuple_nxv8i8_4t.nxv2i1(target("riscv.vector.tuple", <vscale x 8 x i8>, 4) undef, ptr %base, <vscale x 2 x i1> %mask, i64 %vl, i64 1, i64 5) - %1 = call <vscale x 2 x i32> @llvm.riscv.tuple.extract.nxv2i32.triscv.vector.tuple_nxv8i8_4t(target("riscv.vector.tuple", <vscale x 8 x i8>, 4) %0, i32 1) - ret <vscale x 2 x i32> %1 + ret target("riscv.vector.tuple", <vscale x 8 x i8>, 4) %0 } - -declare target("riscv.vector.tuple", <vscale x 16 x i8>, 4) @llvm.riscv.vlseg4.mask.triscv.vector.tuple_nxv16i8_4t.nxv4i1(target("riscv.vector.tuple", <vscale x 16 x i8>, 4), ptr, <vscale x 4 x i1>, i64, i64, i64) - -define <vscale x 4 x i32> @test_vlseg4_nxv4i32_triscv.vector.tuple_nxv16i8_4t(ptr %base, i64 %vl) { +define target("riscv.vector.tuple", <vscale x 16 x i8>, 4) @test_vlseg4_nxv4i32_triscv.vector.tuple_nxv16i8_4t(ptr %base, i64 %vl) { ; CHECK-LABEL: test_vlseg4_nxv4i32_triscv.vector.tuple_nxv16i8_4t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, ma -; CHECK-NEXT: vlseg4e32.v v6, (a0) +; CHECK-NEXT: vlseg4e32.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 4) @llvm.riscv.vlseg4.triscv.vector.tuple_nxv16i8_4t(target("riscv.vector.tuple", <vscale x 16 x i8>, 4) undef, ptr %base, i64 %vl, i64 5) - %1 = call <vscale x 4 x i32> @llvm.riscv.tuple.extract.nxv4i32.triscv.vector.tuple_nxv16i8_4t(target("riscv.vector.tuple", <vscale x 16 x i8>, 4) %0, i32 1) - ret <vscale x 4 x i32> %1 + ret target("riscv.vector.tuple", <vscale x 16 x i8>, 4) %0 } - -define <vscale x 4 x i32> @test_vlseg4_mask_nxv4i32_triscv.vector.tuple_nxv16i8_4t(ptr %base, i64 %vl, <vscale x 4 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 16 x i8>, 4) @test_vlseg4_mask_nxv4i32_triscv.vector.tuple_nxv16i8_4t(ptr %base, i64 %vl, <vscale x 4 x i1> %mask) { ; CHECK-LABEL: test_vlseg4_mask_nxv4i32_triscv.vector.tuple_nxv16i8_4t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, ma -; CHECK-NEXT: vlseg4e32.v v6, (a0), v0.t +; CHECK-NEXT: vlseg4e32.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 4) @llvm.riscv.vlseg4.mask.triscv.vector.tuple_nxv16i8_4t.nxv4i1(target("riscv.vector.tuple", <vscale x 16 x i8>, 4) undef, ptr %base, <vscale x 4 x i1> %mask, i64 %vl, i64 1, i64 5) - %1 = call <vscale x 4 x i32> @llvm.riscv.tuple.extract.nxv4i32.triscv.vector.tuple_nxv16i8_4t(target("riscv.vector.tuple", <vscale x 16 x i8>, 4) %0, i32 1) - ret <vscale x 4 x i32> %1 + ret target("riscv.vector.tuple", <vscale x 16 x i8>, 4) %0 } - -declare target("riscv.vector.tuple", <vscale x 4 x i8>, 5) @llvm.riscv.vlseg5.mask.triscv.vector.tuple_nxv4i8_5t.nxv1i1(target("riscv.vector.tuple", <vscale x 4 x i8>, 5), ptr, <vscale x 1 x i1>, i64, i64, i64) - -define <vscale x 1 x i32> @test_vlseg5_nxv1i32_triscv.vector.tuple_nxv4i8_5t(ptr %base, i64 %vl) { +define target("riscv.vector.tuple", <vscale x 4 x i8>, 5) @test_vlseg5_nxv1i32_triscv.vector.tuple_nxv4i8_5t(ptr %base, i64 %vl) { ; CHECK-LABEL: test_vlseg5_nxv1i32_triscv.vector.tuple_nxv4i8_5t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, ma -; CHECK-NEXT: vlseg5e32.v v7, (a0) +; CHECK-NEXT: vlseg5e32.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 5) @llvm.riscv.vlseg5.triscv.vector.tuple_nxv4i8_5t(target("riscv.vector.tuple", <vscale x 4 x i8>, 5) undef, ptr %base, i64 %vl, i64 5) - %1 = call <vscale x 1 x i32> @llvm.riscv.tuple.extract.nxv1i32.triscv.vector.tuple_nxv4i8_5t(target("riscv.vector.tuple", <vscale x 4 x i8>, 5) %0, i32 1) - ret <vscale x 1 x i32> %1 + ret target("riscv.vector.tuple", <vscale x 4 x i8>, 5) %0 } - -define <vscale x 1 x i32> @test_vlseg5_mask_nxv1i32_triscv.vector.tuple_nxv4i8_5t(ptr %base, i64 %vl, <vscale x 1 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 4 x i8>, 5) @test_vlseg5_mask_nxv1i32_triscv.vector.tuple_nxv4i8_5t(ptr %base, i64 %vl, <vscale x 1 x i1> %mask) { ; CHECK-LABEL: test_vlseg5_mask_nxv1i32_triscv.vector.tuple_nxv4i8_5t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, ma -; CHECK-NEXT: vlseg5e32.v v7, (a0), v0.t +; CHECK-NEXT: vlseg5e32.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 5) @llvm.riscv.vlseg5.mask.triscv.vector.tuple_nxv4i8_5t.nxv1i1(target("riscv.vector.tuple", <vscale x 4 x i8>, 5) undef, ptr %base, <vscale x 1 x i1> %mask, i64 %vl, i64 1, i64 5) - %1 = call <vscale x 1 x i32> @llvm.riscv.tuple.extract.nxv1i32.triscv.vector.tuple_nxv4i8_5t(target("riscv.vector.tuple", <vscale x 4 x i8>, 5) %0, i32 1) - ret <vscale x 1 x i32> %1 + ret target("riscv.vector.tuple", <vscale x 4 x i8>, 5) %0 } - -declare target("riscv.vector.tuple", <vscale x 8 x i8>, 5) @llvm.riscv.vlseg5.mask.triscv.vector.tuple_nxv8i8_5t.nxv2i1(target("riscv.vector.tuple", <vscale x 8 x i8>, 5), ptr, <vscale x 2 x i1>, i64, i64, i64) - -define <vscale x 2 x i32> @test_vlseg5_nxv2i32_triscv.vector.tuple_nxv8i8_5t(ptr %base, i64 %vl) { +define target("riscv.vector.tuple", <vscale x 8 x i8>, 5) @test_vlseg5_nxv2i32_triscv.vector.tuple_nxv8i8_5t(ptr %base, i64 %vl) { ; CHECK-LABEL: test_vlseg5_nxv2i32_triscv.vector.tuple_nxv8i8_5t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma -; CHECK-NEXT: vlseg5e32.v v7, (a0) +; CHECK-NEXT: vlseg5e32.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 5) @llvm.riscv.vlseg5.triscv.vector.tuple_nxv8i8_5t(target("riscv.vector.tuple", <vscale x 8 x i8>, 5) undef, ptr %base, i64 %vl, i64 5) - %1 = call <vscale x 2 x i32> @llvm.riscv.tuple.extract.nxv2i32.triscv.vector.tuple_nxv8i8_5t(target("riscv.vector.tuple", <vscale x 8 x i8>, 5) %0, i32 1) - ret <vscale x 2 x i32> %1 + ret target("riscv.vector.tuple", <vscale x 8 x i8>, 5) %0 } - -define <vscale x 2 x i32> @test_vlseg5_mask_nxv2i32_triscv.vector.tuple_nxv8i8_5t(ptr %base, i64 %vl, <vscale x 2 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 8 x i8>, 5) @test_vlseg5_mask_nxv2i32_triscv.vector.tuple_nxv8i8_5t(ptr %base, i64 %vl, <vscale x 2 x i1> %mask) { ; CHECK-LABEL: test_vlseg5_mask_nxv2i32_triscv.vector.tuple_nxv8i8_5t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma -; CHECK-NEXT: vlseg5e32.v v7, (a0), v0.t +; CHECK-NEXT: vlseg5e32.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 5) @llvm.riscv.vlseg5.mask.triscv.vector.tuple_nxv8i8_5t.nxv2i1(target("riscv.vector.tuple", <vscale x 8 x i8>, 5) undef, ptr %base, <vscale x 2 x i1> %mask, i64 %vl, i64 1, i64 5) - %1 = call <vscale x 2 x i32> @llvm.riscv.tuple.extract.nxv2i32.triscv.vector.tuple_nxv8i8_5t(target("riscv.vector.tuple", <vscale x 8 x i8>, 5) %0, i32 1) - ret <vscale x 2 x i32> %1 + ret target("riscv.vector.tuple", <vscale x 8 x i8>, 5) %0 } - -declare target("riscv.vector.tuple", <vscale x 4 x i8>, 6) @llvm.riscv.vlseg6.mask.triscv.vector.tuple_nxv4i8_6t.nxv1i1(target("riscv.vector.tuple", <vscale x 4 x i8>, 6), ptr, <vscale x 1 x i1>, i64, i64, i64) - -define <vscale x 1 x i32> @test_vlseg6_nxv1i32_triscv.vector.tuple_nxv4i8_6t(ptr %base, i64 %vl) { +define target("riscv.vector.tuple", <vscale x 4 x i8>, 6) @test_vlseg6_nxv1i32_triscv.vector.tuple_nxv4i8_6t(ptr %base, i64 %vl) { ; CHECK-LABEL: test_vlseg6_nxv1i32_triscv.vector.tuple_nxv4i8_6t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, ma -; CHECK-NEXT: vlseg6e32.v v7, (a0) +; CHECK-NEXT: vlseg6e32.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 6) @llvm.riscv.vlseg6.triscv.vector.tuple_nxv4i8_6t(target("riscv.vector.tuple", <vscale x 4 x i8>, 6) undef, ptr %base, i64 %vl, i64 5) - %1 = call <vscale x 1 x i32> @llvm.riscv.tuple.extract.nxv1i32.triscv.vector.tuple_nxv4i8_6t(target("riscv.vector.tuple", <vscale x 4 x i8>, 6) %0, i32 1) - ret <vscale x 1 x i32> %1 + ret target("riscv.vector.tuple", <vscale x 4 x i8>, 6) %0 } - -define <vscale x 1 x i32> @test_vlseg6_mask_nxv1i32_triscv.vector.tuple_nxv4i8_6t(ptr %base, i64 %vl, <vscale x 1 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 4 x i8>, 6) @test_vlseg6_mask_nxv1i32_triscv.vector.tuple_nxv4i8_6t(ptr %base, i64 %vl, <vscale x 1 x i1> %mask) { ; CHECK-LABEL: test_vlseg6_mask_nxv1i32_triscv.vector.tuple_nxv4i8_6t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, ma -; CHECK-NEXT: vlseg6e32.v v7, (a0), v0.t +; CHECK-NEXT: vlseg6e32.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 6) @llvm.riscv.vlseg6.mask.triscv.vector.tuple_nxv4i8_6t.nxv1i1(target("riscv.vector.tuple", <vscale x 4 x i8>, 6) undef, ptr %base, <vscale x 1 x i1> %mask, i64 %vl, i64 1, i64 5) - %1 = call <vscale x 1 x i32> @llvm.riscv.tuple.extract.nxv1i32.triscv.vector.tuple_nxv4i8_6t(target("riscv.vector.tuple", <vscale x 4 x i8>, 6) %0, i32 1) - ret <vscale x 1 x i32> %1 + ret target("riscv.vector.tuple", <vscale x 4 x i8>, 6) %0 } - -declare target("riscv.vector.tuple", <vscale x 8 x i8>, 6) @llvm.riscv.vlseg6.mask.triscv.vector.tuple_nxv8i8_6t.nxv2i1(target("riscv.vector.tuple", <vscale x 8 x i8>, 6), ptr, <vscale x 2 x i1>, i64, i64, i64) - -define <vscale x 2 x i32> @test_vlseg6_nxv2i32_triscv.vector.tuple_nxv8i8_6t(ptr %base, i64 %vl) { +define target("riscv.vector.tuple", <vscale x 8 x i8>, 6) @test_vlseg6_nxv2i32_triscv.vector.tuple_nxv8i8_6t(ptr %base, i64 %vl) { ; CHECK-LABEL: test_vlseg6_nxv2i32_triscv.vector.tuple_nxv8i8_6t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma -; CHECK-NEXT: vlseg6e32.v v7, (a0) +; CHECK-NEXT: vlseg6e32.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 6) @llvm.riscv.vlseg6.triscv.vector.tuple_nxv8i8_6t(target("riscv.vector.tuple", <vscale x 8 x i8>, 6) undef, ptr %base, i64 %vl, i64 5) - %1 = call <vscale x 2 x i32> @llvm.riscv.tuple.extract.nxv2i32.triscv.vector.tuple_nxv8i8_6t(target("riscv.vector.tuple", <vscale x 8 x i8>, 6) %0, i32 1) - ret <vscale x 2 x i32> %1 + ret target("riscv.vector.tuple", <vscale x 8 x i8>, 6) %0 } - -define <vscale x 2 x i32> @test_vlseg6_mask_nxv2i32_triscv.vector.tuple_nxv8i8_6t(ptr %base, i64 %vl, <vscale x 2 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 8 x i8>, 6) @test_vlseg6_mask_nxv2i32_triscv.vector.tuple_nxv8i8_6t(ptr %base, i64 %vl, <vscale x 2 x i1> %mask) { ; CHECK-LABEL: test_vlseg6_mask_nxv2i32_triscv.vector.tuple_nxv8i8_6t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma -; CHECK-NEXT: vlseg6e32.v v7, (a0), v0.t +; CHECK-NEXT: vlseg6e32.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 6) @llvm.riscv.vlseg6.mask.triscv.vector.tuple_nxv8i8_6t.nxv2i1(target("riscv.vector.tuple", <vscale x 8 x i8>, 6) undef, ptr %base, <vscale x 2 x i1> %mask, i64 %vl, i64 1, i64 5) - %1 = call <vscale x 2 x i32> @llvm.riscv.tuple.extract.nxv2i32.triscv.vector.tuple_nxv8i8_6t(target("riscv.vector.tuple", <vscale x 8 x i8>, 6) %0, i32 1) - ret <vscale x 2 x i32> %1 + ret target("riscv.vector.tuple", <vscale x 8 x i8>, 6) %0 } - -declare target("riscv.vector.tuple", <vscale x 4 x i8>, 7) @llvm.riscv.vlseg7.mask.triscv.vector.tuple_nxv4i8_7t.nxv1i1(target("riscv.vector.tuple", <vscale x 4 x i8>, 7), ptr, <vscale x 1 x i1>, i64, i64, i64) - -define <vscale x 1 x i32> @test_vlseg7_nxv1i32_triscv.vector.tuple_nxv4i8_7t(ptr %base, i64 %vl) { +define target("riscv.vector.tuple", <vscale x 4 x i8>, 7) @test_vlseg7_nxv1i32_triscv.vector.tuple_nxv4i8_7t(ptr %base, i64 %vl) { ; CHECK-LABEL: test_vlseg7_nxv1i32_triscv.vector.tuple_nxv4i8_7t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, ma -; CHECK-NEXT: vlseg7e32.v v7, (a0) +; CHECK-NEXT: vlseg7e32.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 7) @llvm.riscv.vlseg7.triscv.vector.tuple_nxv4i8_7t(target("riscv.vector.tuple", <vscale x 4 x i8>, 7) undef, ptr %base, i64 %vl, i64 5) - %1 = call <vscale x 1 x i32> @llvm.riscv.tuple.extract.nxv1i32.triscv.vector.tuple_nxv4i8_7t(target("riscv.vector.tuple", <vscale x 4 x i8>, 7) %0, i32 1) - ret <vscale x 1 x i32> %1 + ret target("riscv.vector.tuple", <vscale x 4 x i8>, 7) %0 } - -define <vscale x 1 x i32> @test_vlseg7_mask_nxv1i32_triscv.vector.tuple_nxv4i8_7t(ptr %base, i64 %vl, <vscale x 1 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 4 x i8>, 7) @test_vlseg7_mask_nxv1i32_triscv.vector.tuple_nxv4i8_7t(ptr %base, i64 %vl, <vscale x 1 x i1> %mask) { ; CHECK-LABEL: test_vlseg7_mask_nxv1i32_triscv.vector.tuple_nxv4i8_7t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, ma -; CHECK-NEXT: vlseg7e32.v v7, (a0), v0.t +; CHECK-NEXT: vlseg7e32.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 7) @llvm.riscv.vlseg7.mask.triscv.vector.tuple_nxv4i8_7t.nxv1i1(target("riscv.vector.tuple", <vscale x 4 x i8>, 7) undef, ptr %base, <vscale x 1 x i1> %mask, i64 %vl, i64 1, i64 5) - %1 = call <vscale x 1 x i32> @llvm.riscv.tuple.extract.nxv1i32.triscv.vector.tuple_nxv4i8_7t(target("riscv.vector.tuple", <vscale x 4 x i8>, 7) %0, i32 1) - ret <vscale x 1 x i32> %1 + ret target("riscv.vector.tuple", <vscale x 4 x i8>, 7) %0 } - -declare target("riscv.vector.tuple", <vscale x 8 x i8>, 7) @llvm.riscv.vlseg7.mask.triscv.vector.tuple_nxv8i8_7t.nxv2i1(target("riscv.vector.tuple", <vscale x 8 x i8>, 7), ptr, <vscale x 2 x i1>, i64, i64, i64) - -define <vscale x 2 x i32> @test_vlseg7_nxv2i32_triscv.vector.tuple_nxv8i8_7t(ptr %base, i64 %vl) { +define target("riscv.vector.tuple", <vscale x 8 x i8>, 7) @test_vlseg7_nxv2i32_triscv.vector.tuple_nxv8i8_7t(ptr %base, i64 %vl) { ; CHECK-LABEL: test_vlseg7_nxv2i32_triscv.vector.tuple_nxv8i8_7t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma -; CHECK-NEXT: vlseg7e32.v v7, (a0) +; CHECK-NEXT: vlseg7e32.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 7) @llvm.riscv.vlseg7.triscv.vector.tuple_nxv8i8_7t(target("riscv.vector.tuple", <vscale x 8 x i8>, 7) undef, ptr %base, i64 %vl, i64 5) - %1 = call <vscale x 2 x i32> @llvm.riscv.tuple.extract.nxv2i32.triscv.vector.tuple_nxv8i8_7t(target("riscv.vector.tuple", <vscale x 8 x i8>, 7) %0, i32 1) - ret <vscale x 2 x i32> %1 + ret target("riscv.vector.tuple", <vscale x 8 x i8>, 7) %0 } - -define <vscale x 2 x i32> @test_vlseg7_mask_nxv2i32_triscv.vector.tuple_nxv8i8_7t(ptr %base, i64 %vl, <vscale x 2 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 8 x i8>, 7) @test_vlseg7_mask_nxv2i32_triscv.vector.tuple_nxv8i8_7t(ptr %base, i64 %vl, <vscale x 2 x i1> %mask) { ; CHECK-LABEL: test_vlseg7_mask_nxv2i32_triscv.vector.tuple_nxv8i8_7t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma -; CHECK-NEXT: vlseg7e32.v v7, (a0), v0.t +; CHECK-NEXT: vlseg7e32.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 7) @llvm.riscv.vlseg7.mask.triscv.vector.tuple_nxv8i8_7t.nxv2i1(target("riscv.vector.tuple", <vscale x 8 x i8>, 7) undef, ptr %base, <vscale x 2 x i1> %mask, i64 %vl, i64 1, i64 5) - %1 = call <vscale x 2 x i32> @llvm.riscv.tuple.extract.nxv2i32.triscv.vector.tuple_nxv8i8_7t(target("riscv.vector.tuple", <vscale x 8 x i8>, 7) %0, i32 1) - ret <vscale x 2 x i32> %1 + ret target("riscv.vector.tuple", <vscale x 8 x i8>, 7) %0 } - -declare target("riscv.vector.tuple", <vscale x 4 x i8>, 8) @llvm.riscv.vlseg8.mask.triscv.vector.tuple_nxv4i8_8t.nxv1i1(target("riscv.vector.tuple", <vscale x 4 x i8>, 8), ptr, <vscale x 1 x i1>, i64, i64, i64) - -define <vscale x 1 x i32> @test_vlseg8_nxv1i32_triscv.vector.tuple_nxv4i8_8t(ptr %base, i64 %vl) { +define target("riscv.vector.tuple", <vscale x 4 x i8>, 8) @test_vlseg8_nxv1i32_triscv.vector.tuple_nxv4i8_8t(ptr %base, i64 %vl) { ; CHECK-LABEL: test_vlseg8_nxv1i32_triscv.vector.tuple_nxv4i8_8t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, ma -; CHECK-NEXT: vlseg8e32.v v7, (a0) +; CHECK-NEXT: vlseg8e32.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 8) @llvm.riscv.vlseg8.triscv.vector.tuple_nxv4i8_8t(target("riscv.vector.tuple", <vscale x 4 x i8>, 8) undef, ptr %base, i64 %vl, i64 5) - %1 = call <vscale x 1 x i32> @llvm.riscv.tuple.extract.nxv1i32.triscv.vector.tuple_nxv4i8_8t(target("riscv.vector.tuple", <vscale x 4 x i8>, 8) %0, i32 1) - ret <vscale x 1 x i32> %1 + ret target("riscv.vector.tuple", <vscale x 4 x i8>, 8) %0 } - -define <vscale x 1 x i32> @test_vlseg8_mask_nxv1i32_triscv.vector.tuple_nxv4i8_8t(ptr %base, i64 %vl, <vscale x 1 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 4 x i8>, 8) @test_vlseg8_mask_nxv1i32_triscv.vector.tuple_nxv4i8_8t(ptr %base, i64 %vl, <vscale x 1 x i1> %mask) { ; CHECK-LABEL: test_vlseg8_mask_nxv1i32_triscv.vector.tuple_nxv4i8_8t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, ma -; CHECK-NEXT: vlseg8e32.v v7, (a0), v0.t +; CHECK-NEXT: vlseg8e32.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 8) @llvm.riscv.vlseg8.mask.triscv.vector.tuple_nxv4i8_8t.nxv1i1(target("riscv.vector.tuple", <vscale x 4 x i8>, 8) undef, ptr %base, <vscale x 1 x i1> %mask, i64 %vl, i64 1, i64 5) - %1 = call <vscale x 1 x i32> @llvm.riscv.tuple.extract.nxv1i32.triscv.vector.tuple_nxv4i8_8t(target("riscv.vector.tuple", <vscale x 4 x i8>, 8) %0, i32 1) - ret <vscale x 1 x i32> %1 + ret target("riscv.vector.tuple", <vscale x 4 x i8>, 8) %0 } - -declare target("riscv.vector.tuple", <vscale x 8 x i8>, 8) @llvm.riscv.vlseg8.mask.triscv.vector.tuple_nxv8i8_8t.nxv2i1(target("riscv.vector.tuple", <vscale x 8 x i8>, 8), ptr, <vscale x 2 x i1>, i64, i64, i64) - -define <vscale x 2 x i32> @test_vlseg8_nxv2i32_triscv.vector.tuple_nxv8i8_8t(ptr %base, i64 %vl) { +define target("riscv.vector.tuple", <vscale x 8 x i8>, 8) @test_vlseg8_nxv2i32_triscv.vector.tuple_nxv8i8_8t(ptr %base, i64 %vl) { ; CHECK-LABEL: test_vlseg8_nxv2i32_triscv.vector.tuple_nxv8i8_8t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma -; CHECK-NEXT: vlseg8e32.v v7, (a0) +; CHECK-NEXT: vlseg8e32.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 8) @llvm.riscv.vlseg8.triscv.vector.tuple_nxv8i8_8t(target("riscv.vector.tuple", <vscale x 8 x i8>, 8) undef, ptr %base, i64 %vl, i64 5) - %1 = call <vscale x 2 x i32> @llvm.riscv.tuple.extract.nxv2i32.triscv.vector.tuple_nxv8i8_8t(target("riscv.vector.tuple", <vscale x 8 x i8>, 8) %0, i32 1) - ret <vscale x 2 x i32> %1 + ret target("riscv.vector.tuple", <vscale x 8 x i8>, 8) %0 } - -define <vscale x 2 x i32> @test_vlseg8_mask_nxv2i32_triscv.vector.tuple_nxv8i8_8t(ptr %base, i64 %vl, <vscale x 2 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 8 x i8>, 8) @test_vlseg8_mask_nxv2i32_triscv.vector.tuple_nxv8i8_8t(ptr %base, i64 %vl, <vscale x 2 x i1> %mask) { ; CHECK-LABEL: test_vlseg8_mask_nxv2i32_triscv.vector.tuple_nxv8i8_8t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma -; CHECK-NEXT: vlseg8e32.v v7, (a0), v0.t +; CHECK-NEXT: vlseg8e32.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 8) @llvm.riscv.vlseg8.mask.triscv.vector.tuple_nxv8i8_8t.nxv2i1(target("riscv.vector.tuple", <vscale x 8 x i8>, 8) undef, ptr %base, <vscale x 2 x i1> %mask, i64 %vl, i64 1, i64 5) - %1 = call <vscale x 2 x i32> @llvm.riscv.tuple.extract.nxv2i32.triscv.vector.tuple_nxv8i8_8t(target("riscv.vector.tuple", <vscale x 8 x i8>, 8) %0, i32 1) - ret <vscale x 2 x i32> %1 + ret target("riscv.vector.tuple", <vscale x 8 x i8>, 8) %0 } - -declare target("riscv.vector.tuple", <vscale x 8 x i8>, 2) @llvm.riscv.vlseg2.mask.triscv.vector.tuple_nxv8i8_2t.nxv1i1(target("riscv.vector.tuple", <vscale x 8 x i8>, 2), ptr, <vscale x 1 x i1>, i64, i64, i64) - -define <vscale x 1 x i64> @test_vlseg2_nxv1i64_triscv.vector.tuple_nxv8i8_2t(ptr %base, i64 %vl) { +define target("riscv.vector.tuple", <vscale x 8 x i8>, 2) @test_vlseg2_nxv1i64_triscv.vector.tuple_nxv8i8_2t(ptr %base, i64 %vl) { ; CHECK-LABEL: test_vlseg2_nxv1i64_triscv.vector.tuple_nxv8i8_2t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e64, m1, ta, ma -; CHECK-NEXT: vlseg2e64.v v7, (a0) +; CHECK-NEXT: vlseg2e64.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 2) @llvm.riscv.vlseg2.triscv.vector.tuple_nxv8i8_2t(target("riscv.vector.tuple", <vscale x 8 x i8>, 2) undef, ptr %base, i64 %vl, i64 6) - %1 = call <vscale x 1 x i64> @llvm.riscv.tuple.extract.nxv1i64.triscv.vector.tuple_nxv8i8_2t(target("riscv.vector.tuple", <vscale x 8 x i8>, 2) %0, i32 1) - ret <vscale x 1 x i64> %1 + ret target("riscv.vector.tuple", <vscale x 8 x i8>, 2) %0 } - -define <vscale x 1 x i64> @test_vlseg2_mask_nxv1i64_triscv.vector.tuple_nxv8i8_2t(ptr %base, i64 %vl, <vscale x 1 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 8 x i8>, 2) @test_vlseg2_mask_nxv1i64_triscv.vector.tuple_nxv8i8_2t(ptr %base, i64 %vl, <vscale x 1 x i1> %mask) { ; CHECK-LABEL: test_vlseg2_mask_nxv1i64_triscv.vector.tuple_nxv8i8_2t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e64, m1, ta, ma -; CHECK-NEXT: vlseg2e64.v v7, (a0), v0.t +; CHECK-NEXT: vlseg2e64.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 2) @llvm.riscv.vlseg2.mask.triscv.vector.tuple_nxv8i8_2t.nxv1i1(target("riscv.vector.tuple", <vscale x 8 x i8>, 2) undef, ptr %base, <vscale x 1 x i1> %mask, i64 %vl, i64 1, i64 6) - %1 = call <vscale x 1 x i64> @llvm.riscv.tuple.extract.nxv1i64.triscv.vector.tuple_nxv8i8_2t(target("riscv.vector.tuple", <vscale x 8 x i8>, 2) %0, i32 1) - ret <vscale x 1 x i64> %1 + ret target("riscv.vector.tuple", <vscale x 8 x i8>, 2) %0 } - -declare target("riscv.vector.tuple", <vscale x 16 x i8>, 2) @llvm.riscv.vlseg2.mask.triscv.vector.tuple_nxv16i8_2t.nxv2i1(target("riscv.vector.tuple", <vscale x 16 x i8>, 2), ptr, <vscale x 2 x i1>, i64, i64, i64) - -define <vscale x 2 x i64> @test_vlseg2_nxv2i64_triscv.vector.tuple_nxv16i8_2t(ptr %base, i64 %vl) { +define target("riscv.vector.tuple", <vscale x 16 x i8>, 2) @test_vlseg2_nxv2i64_triscv.vector.tuple_nxv16i8_2t(ptr %base, i64 %vl) { ; CHECK-LABEL: test_vlseg2_nxv2i64_triscv.vector.tuple_nxv16i8_2t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e64, m2, ta, ma -; CHECK-NEXT: vlseg2e64.v v6, (a0) +; CHECK-NEXT: vlseg2e64.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 2) @llvm.riscv.vlseg2.triscv.vector.tuple_nxv16i8_2t(target("riscv.vector.tuple", <vscale x 16 x i8>, 2) undef, ptr %base, i64 %vl, i64 6) - %1 = call <vscale x 2 x i64> @llvm.riscv.tuple.extract.nxv2i64.triscv.vector.tuple_nxv16i8_2t(target("riscv.vector.tuple", <vscale x 16 x i8>, 2) %0, i32 1) - ret <vscale x 2 x i64> %1 + ret target("riscv.vector.tuple", <vscale x 16 x i8>, 2) %0 } - -define <vscale x 2 x i64> @test_vlseg2_mask_nxv2i64_triscv.vector.tuple_nxv16i8_2t(ptr %base, i64 %vl, <vscale x 2 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 16 x i8>, 2) @test_vlseg2_mask_nxv2i64_triscv.vector.tuple_nxv16i8_2t(ptr %base, i64 %vl, <vscale x 2 x i1> %mask) { ; CHECK-LABEL: test_vlseg2_mask_nxv2i64_triscv.vector.tuple_nxv16i8_2t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e64, m2, ta, ma -; CHECK-NEXT: vlseg2e64.v v6, (a0), v0.t +; CHECK-NEXT: vlseg2e64.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 2) @llvm.riscv.vlseg2.mask.triscv.vector.tuple_nxv16i8_2t.nxv2i1(target("riscv.vector.tuple", <vscale x 16 x i8>, 2) undef, ptr %base, <vscale x 2 x i1> %mask, i64 %vl, i64 1, i64 6) - %1 = call <vscale x 2 x i64> @llvm.riscv.tuple.extract.nxv2i64.triscv.vector.tuple_nxv16i8_2t(target("riscv.vector.tuple", <vscale x 16 x i8>, 2) %0, i32 1) - ret <vscale x 2 x i64> %1 + ret target("riscv.vector.tuple", <vscale x 16 x i8>, 2) %0 } - -declare target("riscv.vector.tuple", <vscale x 32 x i8>, 2) @llvm.riscv.vlseg2.mask.triscv.vector.tuple_nxv32i8_2t.nxv4i1(target("riscv.vector.tuple", <vscale x 32 x i8>, 2), ptr, <vscale x 4 x i1>, i64, i64, i64) - -define <vscale x 4 x i64> @test_vlseg2_nxv4i64_triscv.vector.tuple_nxv32i8_2t(ptr %base, i64 %vl) { +define target("riscv.vector.tuple", <vscale x 32 x i8>, 2) @test_vlseg2_nxv4i64_triscv.vector.tuple_nxv32i8_2t(ptr %base, i64 %vl) { ; CHECK-LABEL: test_vlseg2_nxv4i64_triscv.vector.tuple_nxv32i8_2t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e64, m4, ta, ma -; CHECK-NEXT: vlseg2e64.v v4, (a0) +; CHECK-NEXT: vlseg2e64.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 32 x i8>, 2) @llvm.riscv.vlseg2.triscv.vector.tuple_nxv32i8_2t(target("riscv.vector.tuple", <vscale x 32 x i8>, 2) undef, ptr %base, i64 %vl, i64 6) - %1 = call <vscale x 4 x i64> @llvm.riscv.tuple.extract.nxv4i64.triscv.vector.tuple_nxv32i8_2t(target("riscv.vector.tuple", <vscale x 32 x i8>, 2) %0, i32 1) - ret <vscale x 4 x i64> %1 + ret target("riscv.vector.tuple", <vscale x 32 x i8>, 2) %0 } - -define <vscale x 4 x i64> @test_vlseg2_mask_nxv4i64_triscv.vector.tuple_nxv32i8_2t(ptr %base, i64 %vl, <vscale x 4 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 32 x i8>, 2) @test_vlseg2_mask_nxv4i64_triscv.vector.tuple_nxv32i8_2t(ptr %base, i64 %vl, <vscale x 4 x i1> %mask) { ; CHECK-LABEL: test_vlseg2_mask_nxv4i64_triscv.vector.tuple_nxv32i8_2t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e64, m4, ta, ma -; CHECK-NEXT: vlseg2e64.v v4, (a0), v0.t +; CHECK-NEXT: vlseg2e64.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 32 x i8>, 2) @llvm.riscv.vlseg2.mask.triscv.vector.tuple_nxv32i8_2t.nxv4i1(target("riscv.vector.tuple", <vscale x 32 x i8>, 2) undef, ptr %base, <vscale x 4 x i1> %mask, i64 %vl, i64 1, i64 6) - %1 = call <vscale x 4 x i64> @llvm.riscv.tuple.extract.nxv4i64.triscv.vector.tuple_nxv32i8_2t(target("riscv.vector.tuple", <vscale x 32 x i8>, 2) %0, i32 1) - ret <vscale x 4 x i64> %1 + ret target("riscv.vector.tuple", <vscale x 32 x i8>, 2) %0 } - -declare target("riscv.vector.tuple", <vscale x 8 x i8>, 3) @llvm.riscv.vlseg3.mask.triscv.vector.tuple_nxv8i8_3t.nxv1i1(target("riscv.vector.tuple", <vscale x 8 x i8>, 3), ptr, <vscale x 1 x i1>, i64, i64, i64) - -define <vscale x 1 x i64> @test_vlseg3_nxv1i64_triscv.vector.tuple_nxv8i8_3t(ptr %base, i64 %vl) { +define target("riscv.vector.tuple", <vscale x 8 x i8>, 3) @test_vlseg3_nxv1i64_triscv.vector.tuple_nxv8i8_3t(ptr %base, i64 %vl) { ; CHECK-LABEL: test_vlseg3_nxv1i64_triscv.vector.tuple_nxv8i8_3t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e64, m1, ta, ma -; CHECK-NEXT: vlseg3e64.v v7, (a0) +; CHECK-NEXT: vlseg3e64.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 3) @llvm.riscv.vlseg3.triscv.vector.tuple_nxv8i8_3t(target("riscv.vector.tuple", <vscale x 8 x i8>, 3) undef, ptr %base, i64 %vl, i64 6) - %1 = call <vscale x 1 x i64> @llvm.riscv.tuple.extract.nxv1i64.triscv.vector.tuple_nxv8i8_3t(target("riscv.vector.tuple", <vscale x 8 x i8>, 3) %0, i32 1) - ret <vscale x 1 x i64> %1 + ret target("riscv.vector.tuple", <vscale x 8 x i8>, 3) %0 } - -define <vscale x 1 x i64> @test_vlseg3_mask_nxv1i64_triscv.vector.tuple_nxv8i8_3t(ptr %base, i64 %vl, <vscale x 1 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 8 x i8>, 3) @test_vlseg3_mask_nxv1i64_triscv.vector.tuple_nxv8i8_3t(ptr %base, i64 %vl, <vscale x 1 x i1> %mask) { ; CHECK-LABEL: test_vlseg3_mask_nxv1i64_triscv.vector.tuple_nxv8i8_3t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e64, m1, ta, ma -; CHECK-NEXT: vlseg3e64.v v7, (a0), v0.t +; CHECK-NEXT: vlseg3e64.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 3) @llvm.riscv.vlseg3.mask.triscv.vector.tuple_nxv8i8_3t.nxv1i1(target("riscv.vector.tuple", <vscale x 8 x i8>, 3) undef, ptr %base, <vscale x 1 x i1> %mask, i64 %vl, i64 1, i64 6) - %1 = call <vscale x 1 x i64> @llvm.riscv.tuple.extract.nxv1i64.triscv.vector.tuple_nxv8i8_3t(target("riscv.vector.tuple", <vscale x 8 x i8>, 3) %0, i32 1) - ret <vscale x 1 x i64> %1 + ret target("riscv.vector.tuple", <vscale x 8 x i8>, 3) %0 } - -declare target("riscv.vector.tuple", <vscale x 16 x i8>, 3) @llvm.riscv.vlseg3.mask.triscv.vector.tuple_nxv16i8_3t.nxv2i1(target("riscv.vector.tuple", <vscale x 16 x i8>, 3), ptr, <vscale x 2 x i1>, i64, i64, i64) - -define <vscale x 2 x i64> @test_vlseg3_nxv2i64_triscv.vector.tuple_nxv16i8_3t(ptr %base, i64 %vl) { +define target("riscv.vector.tuple", <vscale x 16 x i8>, 3) @test_vlseg3_nxv2i64_triscv.vector.tuple_nxv16i8_3t(ptr %base, i64 %vl) { ; CHECK-LABEL: test_vlseg3_nxv2i64_triscv.vector.tuple_nxv16i8_3t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e64, m2, ta, ma -; CHECK-NEXT: vlseg3e64.v v6, (a0) +; CHECK-NEXT: vlseg3e64.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 3) @llvm.riscv.vlseg3.triscv.vector.tuple_nxv16i8_3t(target("riscv.vector.tuple", <vscale x 16 x i8>, 3) undef, ptr %base, i64 %vl, i64 6) - %1 = call <vscale x 2 x i64> @llvm.riscv.tuple.extract.nxv2i64.triscv.vector.tuple_nxv16i8_3t(target("riscv.vector.tuple", <vscale x 16 x i8>, 3) %0, i32 1) - ret <vscale x 2 x i64> %1 + ret target("riscv.vector.tuple", <vscale x 16 x i8>, 3) %0 } - -define <vscale x 2 x i64> @test_vlseg3_mask_nxv2i64_triscv.vector.tuple_nxv16i8_3t(ptr %base, i64 %vl, <vscale x 2 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 16 x i8>, 3) @test_vlseg3_mask_nxv2i64_triscv.vector.tuple_nxv16i8_3t(ptr %base, i64 %vl, <vscale x 2 x i1> %mask) { ; CHECK-LABEL: test_vlseg3_mask_nxv2i64_triscv.vector.tuple_nxv16i8_3t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e64, m2, ta, ma -; CHECK-NEXT: vlseg3e64.v v6, (a0), v0.t +; CHECK-NEXT: vlseg3e64.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 3) @llvm.riscv.vlseg3.mask.triscv.vector.tuple_nxv16i8_3t.nxv2i1(target("riscv.vector.tuple", <vscale x 16 x i8>, 3) undef, ptr %base, <vscale x 2 x i1> %mask, i64 %vl, i64 1, i64 6) - %1 = call <vscale x 2 x i64> @llvm.riscv.tuple.extract.nxv2i64.triscv.vector.tuple_nxv16i8_3t(target("riscv.vector.tuple", <vscale x 16 x i8>, 3) %0, i32 1) - ret <vscale x 2 x i64> %1 + ret target("riscv.vector.tuple", <vscale x 16 x i8>, 3) %0 } - -declare target("riscv.vector.tuple", <vscale x 8 x i8>, 4) @llvm.riscv.vlseg4.mask.triscv.vector.tuple_nxv8i8_4t.nxv1i1(target("riscv.vector.tuple", <vscale x 8 x i8>, 4), ptr, <vscale x 1 x i1>, i64, i64, i64) - -define <vscale x 1 x i64> @test_vlseg4_nxv1i64_triscv.vector.tuple_nxv8i8_4t(ptr %base, i64 %vl) { +define target("riscv.vector.tuple", <vscale x 8 x i8>, 4) @test_vlseg4_nxv1i64_triscv.vector.tuple_nxv8i8_4t(ptr %base, i64 %vl) { ; CHECK-LABEL: test_vlseg4_nxv1i64_triscv.vector.tuple_nxv8i8_4t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e64, m1, ta, ma -; CHECK-NEXT: vlseg4e64.v v7, (a0) +; CHECK-NEXT: vlseg4e64.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 4) @llvm.riscv.vlseg4.triscv.vector.tuple_nxv8i8_4t(target("riscv.vector.tuple", <vscale x 8 x i8>, 4) undef, ptr %base, i64 %vl, i64 6) - %1 = call <vscale x 1 x i64> @llvm.riscv.tuple.extract.nxv1i64.triscv.vector.tuple_nxv8i8_4t(target("riscv.vector.tuple", <vscale x 8 x i8>, 4) %0, i32 1) - ret <vscale x 1 x i64> %1 + ret target("riscv.vector.tuple", <vscale x 8 x i8>, 4) %0 } - -define <vscale x 1 x i64> @test_vlseg4_mask_nxv1i64_triscv.vector.tuple_nxv8i8_4t(ptr %base, i64 %vl, <vscale x 1 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 8 x i8>, 4) @test_vlseg4_mask_nxv1i64_triscv.vector.tuple_nxv8i8_4t(ptr %base, i64 %vl, <vscale x 1 x i1> %mask) { ; CHECK-LABEL: test_vlseg4_mask_nxv1i64_triscv.vector.tuple_nxv8i8_4t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e64, m1, ta, ma -; CHECK-NEXT: vlseg4e64.v v7, (a0), v0.t +; CHECK-NEXT: vlseg4e64.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 4) @llvm.riscv.vlseg4.mask.triscv.vector.tuple_nxv8i8_4t.nxv1i1(target("riscv.vector.tuple", <vscale x 8 x i8>, 4) undef, ptr %base, <vscale x 1 x i1> %mask, i64 %vl, i64 1, i64 6) - %1 = call <vscale x 1 x i64> @llvm.riscv.tuple.extract.nxv1i64.triscv.vector.tuple_nxv8i8_4t(target("riscv.vector.tuple", <vscale x 8 x i8>, 4) %0, i32 1) - ret <vscale x 1 x i64> %1 + ret target("riscv.vector.tuple", <vscale x 8 x i8>, 4) %0 } - -declare target("riscv.vector.tuple", <vscale x 16 x i8>, 4) @llvm.riscv.vlseg4.mask.triscv.vector.tuple_nxv16i8_4t.nxv2i1(target("riscv.vector.tuple", <vscale x 16 x i8>, 4), ptr, <vscale x 2 x i1>, i64, i64, i64) - -define <vscale x 2 x i64> @test_vlseg4_nxv2i64_triscv.vector.tuple_nxv16i8_4t(ptr %base, i64 %vl) { +define target("riscv.vector.tuple", <vscale x 16 x i8>, 4) @test_vlseg4_nxv2i64_triscv.vector.tuple_nxv16i8_4t(ptr %base, i64 %vl) { ; CHECK-LABEL: test_vlseg4_nxv2i64_triscv.vector.tuple_nxv16i8_4t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e64, m2, ta, ma -; CHECK-NEXT: vlseg4e64.v v6, (a0) +; CHECK-NEXT: vlseg4e64.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 4) @llvm.riscv.vlseg4.triscv.vector.tuple_nxv16i8_4t(target("riscv.vector.tuple", <vscale x 16 x i8>, 4) undef, ptr %base, i64 %vl, i64 6) - %1 = call <vscale x 2 x i64> @llvm.riscv.tuple.extract.nxv2i64.triscv.vector.tuple_nxv16i8_4t(target("riscv.vector.tuple", <vscale x 16 x i8>, 4) %0, i32 1) - ret <vscale x 2 x i64> %1 + ret target("riscv.vector.tuple", <vscale x 16 x i8>, 4) %0 } - -define <vscale x 2 x i64> @test_vlseg4_mask_nxv2i64_triscv.vector.tuple_nxv16i8_4t(ptr %base, i64 %vl, <vscale x 2 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 16 x i8>, 4) @test_vlseg4_mask_nxv2i64_triscv.vector.tuple_nxv16i8_4t(ptr %base, i64 %vl, <vscale x 2 x i1> %mask) { ; CHECK-LABEL: test_vlseg4_mask_nxv2i64_triscv.vector.tuple_nxv16i8_4t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e64, m2, ta, ma -; CHECK-NEXT: vlseg4e64.v v6, (a0), v0.t +; CHECK-NEXT: vlseg4e64.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 4) @llvm.riscv.vlseg4.mask.triscv.vector.tuple_nxv16i8_4t.nxv2i1(target("riscv.vector.tuple", <vscale x 16 x i8>, 4) undef, ptr %base, <vscale x 2 x i1> %mask, i64 %vl, i64 1, i64 6) - %1 = call <vscale x 2 x i64> @llvm.riscv.tuple.extract.nxv2i64.triscv.vector.tuple_nxv16i8_4t(target("riscv.vector.tuple", <vscale x 16 x i8>, 4) %0, i32 1) - ret <vscale x 2 x i64> %1 + ret target("riscv.vector.tuple", <vscale x 16 x i8>, 4) %0 } - -declare target("riscv.vector.tuple", <vscale x 8 x i8>, 5) @llvm.riscv.vlseg5.mask.triscv.vector.tuple_nxv8i8_5t.nxv1i1(target("riscv.vector.tuple", <vscale x 8 x i8>, 5), ptr, <vscale x 1 x i1>, i64, i64, i64) - -define <vscale x 1 x i64> @test_vlseg5_nxv1i64_triscv.vector.tuple_nxv8i8_5t(ptr %base, i64 %vl) { +define target("riscv.vector.tuple", <vscale x 8 x i8>, 5) @test_vlseg5_nxv1i64_triscv.vector.tuple_nxv8i8_5t(ptr %base, i64 %vl) { ; CHECK-LABEL: test_vlseg5_nxv1i64_triscv.vector.tuple_nxv8i8_5t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e64, m1, ta, ma -; CHECK-NEXT: vlseg5e64.v v7, (a0) +; CHECK-NEXT: vlseg5e64.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 5) @llvm.riscv.vlseg5.triscv.vector.tuple_nxv8i8_5t(target("riscv.vector.tuple", <vscale x 8 x i8>, 5) undef, ptr %base, i64 %vl, i64 6) - %1 = call <vscale x 1 x i64> @llvm.riscv.tuple.extract.nxv1i64.triscv.vector.tuple_nxv8i8_5t(target("riscv.vector.tuple", <vscale x 8 x i8>, 5) %0, i32 1) - ret <vscale x 1 x i64> %1 + ret target("riscv.vector.tuple", <vscale x 8 x i8>, 5) %0 } - -define <vscale x 1 x i64> @test_vlseg5_mask_nxv1i64_triscv.vector.tuple_nxv8i8_5t(ptr %base, i64 %vl, <vscale x 1 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 8 x i8>, 5) @test_vlseg5_mask_nxv1i64_triscv.vector.tuple_nxv8i8_5t(ptr %base, i64 %vl, <vscale x 1 x i1> %mask) { ; CHECK-LABEL: test_vlseg5_mask_nxv1i64_triscv.vector.tuple_nxv8i8_5t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e64, m1, ta, ma -; CHECK-NEXT: vlseg5e64.v v7, (a0), v0.t +; CHECK-NEXT: vlseg5e64.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 5) @llvm.riscv.vlseg5.mask.triscv.vector.tuple_nxv8i8_5t.nxv1i1(target("riscv.vector.tuple", <vscale x 8 x i8>, 5) undef, ptr %base, <vscale x 1 x i1> %mask, i64 %vl, i64 1, i64 6) - %1 = call <vscale x 1 x i64> @llvm.riscv.tuple.extract.nxv1i64.triscv.vector.tuple_nxv8i8_5t(target("riscv.vector.tuple", <vscale x 8 x i8>, 5) %0, i32 1) - ret <vscale x 1 x i64> %1 + ret target("riscv.vector.tuple", <vscale x 8 x i8>, 5) %0 } - -declare target("riscv.vector.tuple", <vscale x 8 x i8>, 6) @llvm.riscv.vlseg6.mask.triscv.vector.tuple_nxv8i8_6t.nxv1i1(target("riscv.vector.tuple", <vscale x 8 x i8>, 6), ptr, <vscale x 1 x i1>, i64, i64, i64) - -define <vscale x 1 x i64> @test_vlseg6_nxv1i64_triscv.vector.tuple_nxv8i8_6t(ptr %base, i64 %vl) { +define target("riscv.vector.tuple", <vscale x 8 x i8>, 6) @test_vlseg6_nxv1i64_triscv.vector.tuple_nxv8i8_6t(ptr %base, i64 %vl) { ; CHECK-LABEL: test_vlseg6_nxv1i64_triscv.vector.tuple_nxv8i8_6t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e64, m1, ta, ma -; CHECK-NEXT: vlseg6e64.v v7, (a0) +; CHECK-NEXT: vlseg6e64.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 6) @llvm.riscv.vlseg6.triscv.vector.tuple_nxv8i8_6t(target("riscv.vector.tuple", <vscale x 8 x i8>, 6) undef, ptr %base, i64 %vl, i64 6) - %1 = call <vscale x 1 x i64> @llvm.riscv.tuple.extract.nxv1i64.triscv.vector.tuple_nxv8i8_6t(target("riscv.vector.tuple", <vscale x 8 x i8>, 6) %0, i32 1) - ret <vscale x 1 x i64> %1 + ret target("riscv.vector.tuple", <vscale x 8 x i8>, 6) %0 } - -define <vscale x 1 x i64> @test_vlseg6_mask_nxv1i64_triscv.vector.tuple_nxv8i8_6t(ptr %base, i64 %vl, <vscale x 1 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 8 x i8>, 6) @test_vlseg6_mask_nxv1i64_triscv.vector.tuple_nxv8i8_6t(ptr %base, i64 %vl, <vscale x 1 x i1> %mask) { ; CHECK-LABEL: test_vlseg6_mask_nxv1i64_triscv.vector.tuple_nxv8i8_6t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e64, m1, ta, ma -; CHECK-NEXT: vlseg6e64.v v7, (a0), v0.t +; CHECK-NEXT: vlseg6e64.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 6) @llvm.riscv.vlseg6.mask.triscv.vector.tuple_nxv8i8_6t.nxv1i1(target("riscv.vector.tuple", <vscale x 8 x i8>, 6) undef, ptr %base, <vscale x 1 x i1> %mask, i64 %vl, i64 1, i64 6) - %1 = call <vscale x 1 x i64> @llvm.riscv.tuple.extract.nxv1i64.triscv.vector.tuple_nxv8i8_6t(target("riscv.vector.tuple", <vscale x 8 x i8>, 6) %0, i32 1) - ret <vscale x 1 x i64> %1 + ret target("riscv.vector.tuple", <vscale x 8 x i8>, 6) %0 } - -declare target("riscv.vector.tuple", <vscale x 8 x i8>, 7) @llvm.riscv.vlseg7.mask.triscv.vector.tuple_nxv8i8_7t.nxv1i1(target("riscv.vector.tuple", <vscale x 8 x i8>, 7), ptr, <vscale x 1 x i1>, i64, i64, i64) - -define <vscale x 1 x i64> @test_vlseg7_nxv1i64_triscv.vector.tuple_nxv8i8_7t(ptr %base, i64 %vl) { +define target("riscv.vector.tuple", <vscale x 8 x i8>, 7) @test_vlseg7_nxv1i64_triscv.vector.tuple_nxv8i8_7t(ptr %base, i64 %vl) { ; CHECK-LABEL: test_vlseg7_nxv1i64_triscv.vector.tuple_nxv8i8_7t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e64, m1, ta, ma -; CHECK-NEXT: vlseg7e64.v v7, (a0) +; CHECK-NEXT: vlseg7e64.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 7) @llvm.riscv.vlseg7.triscv.vector.tuple_nxv8i8_7t(target("riscv.vector.tuple", <vscale x 8 x i8>, 7) undef, ptr %base, i64 %vl, i64 6) - %1 = call <vscale x 1 x i64> @llvm.riscv.tuple.extract.nxv1i64.triscv.vector.tuple_nxv8i8_7t(target("riscv.vector.tuple", <vscale x 8 x i8>, 7) %0, i32 1) - ret <vscale x 1 x i64> %1 + ret target("riscv.vector.tuple", <vscale x 8 x i8>, 7) %0 } - -define <vscale x 1 x i64> @test_vlseg7_mask_nxv1i64_triscv.vector.tuple_nxv8i8_7t(ptr %base, i64 %vl, <vscale x 1 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 8 x i8>, 7) @test_vlseg7_mask_nxv1i64_triscv.vector.tuple_nxv8i8_7t(ptr %base, i64 %vl, <vscale x 1 x i1> %mask) { ; CHECK-LABEL: test_vlseg7_mask_nxv1i64_triscv.vector.tuple_nxv8i8_7t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e64, m1, ta, ma -; CHECK-NEXT: vlseg7e64.v v7, (a0), v0.t +; CHECK-NEXT: vlseg7e64.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 7) @llvm.riscv.vlseg7.mask.triscv.vector.tuple_nxv8i8_7t.nxv1i1(target("riscv.vector.tuple", <vscale x 8 x i8>, 7) undef, ptr %base, <vscale x 1 x i1> %mask, i64 %vl, i64 1, i64 6) - %1 = call <vscale x 1 x i64> @llvm.riscv.tuple.extract.nxv1i64.triscv.vector.tuple_nxv8i8_7t(target("riscv.vector.tuple", <vscale x 8 x i8>, 7) %0, i32 1) - ret <vscale x 1 x i64> %1 + ret target("riscv.vector.tuple", <vscale x 8 x i8>, 7) %0 } - -declare target("riscv.vector.tuple", <vscale x 8 x i8>, 8) @llvm.riscv.vlseg8.mask.triscv.vector.tuple_nxv8i8_8t.nxv1i1(target("riscv.vector.tuple", <vscale x 8 x i8>, 8), ptr, <vscale x 1 x i1>, i64, i64, i64) - -define <vscale x 1 x i64> @test_vlseg8_nxv1i64_triscv.vector.tuple_nxv8i8_8t(ptr %base, i64 %vl) { +define target("riscv.vector.tuple", <vscale x 8 x i8>, 8) @test_vlseg8_nxv1i64_triscv.vector.tuple_nxv8i8_8t(ptr %base, i64 %vl) { ; CHECK-LABEL: test_vlseg8_nxv1i64_triscv.vector.tuple_nxv8i8_8t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e64, m1, ta, ma -; CHECK-NEXT: vlseg8e64.v v7, (a0) +; CHECK-NEXT: vlseg8e64.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 8) @llvm.riscv.vlseg8.triscv.vector.tuple_nxv8i8_8t(target("riscv.vector.tuple", <vscale x 8 x i8>, 8) undef, ptr %base, i64 %vl, i64 6) - %1 = call <vscale x 1 x i64> @llvm.riscv.tuple.extract.nxv1i64.triscv.vector.tuple_nxv8i8_8t(target("riscv.vector.tuple", <vscale x 8 x i8>, 8) %0, i32 1) - ret <vscale x 1 x i64> %1 + ret target("riscv.vector.tuple", <vscale x 8 x i8>, 8) %0 } - -define <vscale x 1 x i64> @test_vlseg8_mask_nxv1i64_triscv.vector.tuple_nxv8i8_8t(ptr %base, i64 %vl, <vscale x 1 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 8 x i8>, 8) @test_vlseg8_mask_nxv1i64_triscv.vector.tuple_nxv8i8_8t(ptr %base, i64 %vl, <vscale x 1 x i1> %mask) { ; CHECK-LABEL: test_vlseg8_mask_nxv1i64_triscv.vector.tuple_nxv8i8_8t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e64, m1, ta, ma -; CHECK-NEXT: vlseg8e64.v v7, (a0), v0.t +; CHECK-NEXT: vlseg8e64.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 8) @llvm.riscv.vlseg8.mask.triscv.vector.tuple_nxv8i8_8t.nxv1i1(target("riscv.vector.tuple", <vscale x 8 x i8>, 8) undef, ptr %base, <vscale x 1 x i1> %mask, i64 %vl, i64 1, i64 6) - %1 = call <vscale x 1 x i64> @llvm.riscv.tuple.extract.nxv1i64.triscv.vector.tuple_nxv8i8_8t(target("riscv.vector.tuple", <vscale x 8 x i8>, 8) %0, i32 1) - ret <vscale x 1 x i64> %1 + ret target("riscv.vector.tuple", <vscale x 8 x i8>, 8) %0 } - - -define <vscale x 1 x half> @test_vlseg2_nxv1f16_triscv.vector.tuple_nxv2i8_2t(ptr %base, i64 %vl) { +define target("riscv.vector.tuple", <vscale x 2 x i8>, 2) @test_vlseg2_nxv1f16_triscv.vector.tuple_nxv2i8_2t(ptr %base, i64 %vl) { ; CHECK-LABEL: test_vlseg2_nxv1f16_triscv.vector.tuple_nxv2i8_2t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, ma -; CHECK-NEXT: vlseg2e16.v v7, (a0) +; CHECK-NEXT: vlseg2e16.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 2 x i8>, 2) @llvm.riscv.vlseg2.triscv.vector.tuple_nxv2i8_2t(target("riscv.vector.tuple", <vscale x 2 x i8>, 2) undef, ptr %base, i64 %vl, i64 4) - %1 = call <vscale x 1 x half> @llvm.riscv.tuple.extract.nxv1f16.triscv.vector.tuple_nxv2i8_2t(target("riscv.vector.tuple", <vscale x 2 x i8>, 2) %0, i32 1) - ret <vscale x 1 x half> %1 + ret target("riscv.vector.tuple", <vscale x 2 x i8>, 2) %0 } - -define <vscale x 1 x half> @test_vlseg2_mask_nxv1f16_triscv.vector.tuple_nxv2i8_2t(ptr %base, i64 %vl, <vscale x 1 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 2 x i8>, 2) @test_vlseg2_mask_nxv1f16_triscv.vector.tuple_nxv2i8_2t(ptr %base, i64 %vl, <vscale x 1 x i1> %mask) { ; CHECK-LABEL: test_vlseg2_mask_nxv1f16_triscv.vector.tuple_nxv2i8_2t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, ma -; CHECK-NEXT: vlseg2e16.v v7, (a0), v0.t +; CHECK-NEXT: vlseg2e16.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 2 x i8>, 2) @llvm.riscv.vlseg2.mask.triscv.vector.tuple_nxv2i8_2t.nxv1i1(target("riscv.vector.tuple", <vscale x 2 x i8>, 2) undef, ptr %base, <vscale x 1 x i1> %mask, i64 %vl, i64 1, i64 4) - %1 = call <vscale x 1 x half> @llvm.riscv.tuple.extract.nxv1f16.triscv.vector.tuple_nxv2i8_2t(target("riscv.vector.tuple", <vscale x 2 x i8>, 2) %0, i32 1) - ret <vscale x 1 x half> %1 + ret target("riscv.vector.tuple", <vscale x 2 x i8>, 2) %0 } - - -define <vscale x 2 x half> @test_vlseg2_nxv2f16_triscv.vector.tuple_nxv4i8_2t(ptr %base, i64 %vl) { +define target("riscv.vector.tuple", <vscale x 4 x i8>, 2) @test_vlseg2_nxv2f16_triscv.vector.tuple_nxv4i8_2t(ptr %base, i64 %vl) { ; CHECK-LABEL: test_vlseg2_nxv2f16_triscv.vector.tuple_nxv4i8_2t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, ma -; CHECK-NEXT: vlseg2e16.v v7, (a0) +; CHECK-NEXT: vlseg2e16.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 2) @llvm.riscv.vlseg2.triscv.vector.tuple_nxv4i8_2t(target("riscv.vector.tuple", <vscale x 4 x i8>, 2) undef, ptr %base, i64 %vl, i64 4) - %1 = call <vscale x 2 x half> @llvm.riscv.tuple.extract.nxv2f16.triscv.vector.tuple_nxv4i8_2t(target("riscv.vector.tuple", <vscale x 4 x i8>, 2) %0, i32 1) - ret <vscale x 2 x half> %1 + ret target("riscv.vector.tuple", <vscale x 4 x i8>, 2) %0 } - -define <vscale x 2 x half> @test_vlseg2_mask_nxv2f16_triscv.vector.tuple_nxv4i8_2t(ptr %base, i64 %vl, <vscale x 2 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 4 x i8>, 2) @test_vlseg2_mask_nxv2f16_triscv.vector.tuple_nxv4i8_2t(ptr %base, i64 %vl, <vscale x 2 x i1> %mask) { ; CHECK-LABEL: test_vlseg2_mask_nxv2f16_triscv.vector.tuple_nxv4i8_2t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, ma -; CHECK-NEXT: vlseg2e16.v v7, (a0), v0.t +; CHECK-NEXT: vlseg2e16.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 2) @llvm.riscv.vlseg2.mask.triscv.vector.tuple_nxv4i8_2t.nxv2i1(target("riscv.vector.tuple", <vscale x 4 x i8>, 2) undef, ptr %base, <vscale x 2 x i1> %mask, i64 %vl, i64 1, i64 4) - %1 = call <vscale x 2 x half> @llvm.riscv.tuple.extract.nxv2f16.triscv.vector.tuple_nxv4i8_2t(target("riscv.vector.tuple", <vscale x 4 x i8>, 2) %0, i32 1) - ret <vscale x 2 x half> %1 + ret target("riscv.vector.tuple", <vscale x 4 x i8>, 2) %0 } - - -define <vscale x 4 x half> @test_vlseg2_nxv4f16_triscv.vector.tuple_nxv8i8_2t(ptr %base, i64 %vl) { +define target("riscv.vector.tuple", <vscale x 8 x i8>, 2) @test_vlseg2_nxv4f16_triscv.vector.tuple_nxv8i8_2t(ptr %base, i64 %vl) { ; CHECK-LABEL: test_vlseg2_nxv4f16_triscv.vector.tuple_nxv8i8_2t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma -; CHECK-NEXT: vlseg2e16.v v7, (a0) +; CHECK-NEXT: vlseg2e16.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 2) @llvm.riscv.vlseg2.triscv.vector.tuple_nxv8i8_2t(target("riscv.vector.tuple", <vscale x 8 x i8>, 2) undef, ptr %base, i64 %vl, i64 4) - %1 = call <vscale x 4 x half> @llvm.riscv.tuple.extract.nxv4f16.triscv.vector.tuple_nxv8i8_2t(target("riscv.vector.tuple", <vscale x 8 x i8>, 2) %0, i32 1) - ret <vscale x 4 x half> %1 + ret target("riscv.vector.tuple", <vscale x 8 x i8>, 2) %0 } - -define <vscale x 4 x half> @test_vlseg2_mask_nxv4f16_triscv.vector.tuple_nxv8i8_2t(ptr %base, i64 %vl, <vscale x 4 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 8 x i8>, 2) @test_vlseg2_mask_nxv4f16_triscv.vector.tuple_nxv8i8_2t(ptr %base, i64 %vl, <vscale x 4 x i1> %mask) { ; CHECK-LABEL: test_vlseg2_mask_nxv4f16_triscv.vector.tuple_nxv8i8_2t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma -; CHECK-NEXT: vlseg2e16.v v7, (a0), v0.t +; CHECK-NEXT: vlseg2e16.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 2) @llvm.riscv.vlseg2.mask.triscv.vector.tuple_nxv8i8_2t.nxv4i1(target("riscv.vector.tuple", <vscale x 8 x i8>, 2) undef, ptr %base, <vscale x 4 x i1> %mask, i64 %vl, i64 1, i64 4) - %1 = call <vscale x 4 x half> @llvm.riscv.tuple.extract.nxv4f16.triscv.vector.tuple_nxv8i8_2t(target("riscv.vector.tuple", <vscale x 8 x i8>, 2) %0, i32 1) - ret <vscale x 4 x half> %1 + ret target("riscv.vector.tuple", <vscale x 8 x i8>, 2) %0 } - - -define <vscale x 8 x half> @test_vlseg2_nxv8f16_triscv.vector.tuple_nxv16i8_2t(ptr %base, i64 %vl) { +define target("riscv.vector.tuple", <vscale x 16 x i8>, 2) @test_vlseg2_nxv8f16_triscv.vector.tuple_nxv16i8_2t(ptr %base, i64 %vl) { ; CHECK-LABEL: test_vlseg2_nxv8f16_triscv.vector.tuple_nxv16i8_2t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, ma -; CHECK-NEXT: vlseg2e16.v v6, (a0) +; CHECK-NEXT: vlseg2e16.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 2) @llvm.riscv.vlseg2.triscv.vector.tuple_nxv16i8_2t(target("riscv.vector.tuple", <vscale x 16 x i8>, 2) undef, ptr %base, i64 %vl, i64 4) - %1 = call <vscale x 8 x half> @llvm.riscv.tuple.extract.nxv8f16.triscv.vector.tuple_nxv16i8_2t(target("riscv.vector.tuple", <vscale x 16 x i8>, 2) %0, i32 1) - ret <vscale x 8 x half> %1 + ret target("riscv.vector.tuple", <vscale x 16 x i8>, 2) %0 } - -define <vscale x 8 x half> @test_vlseg2_mask_nxv8f16_triscv.vector.tuple_nxv16i8_2t(ptr %base, i64 %vl, <vscale x 8 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 16 x i8>, 2) @test_vlseg2_mask_nxv8f16_triscv.vector.tuple_nxv16i8_2t(ptr %base, i64 %vl, <vscale x 8 x i1> %mask) { ; CHECK-LABEL: test_vlseg2_mask_nxv8f16_triscv.vector.tuple_nxv16i8_2t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, ma -; CHECK-NEXT: vlseg2e16.v v6, (a0), v0.t +; CHECK-NEXT: vlseg2e16.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 2) @llvm.riscv.vlseg2.mask.triscv.vector.tuple_nxv16i8_2t.nxv8i1(target("riscv.vector.tuple", <vscale x 16 x i8>, 2) undef, ptr %base, <vscale x 8 x i1> %mask, i64 %vl, i64 1, i64 4) - %1 = call <vscale x 8 x half> @llvm.riscv.tuple.extract.nxv8f16.triscv.vector.tuple_nxv16i8_2t(target("riscv.vector.tuple", <vscale x 16 x i8>, 2) %0, i32 1) - ret <vscale x 8 x half> %1 + ret target("riscv.vector.tuple", <vscale x 16 x i8>, 2) %0 } - - -define <vscale x 16 x half> @test_vlseg2_nxv16f16_triscv.vector.tuple_nxv32i8_2t(ptr %base, i64 %vl) { +define target("riscv.vector.tuple", <vscale x 32 x i8>, 2) @test_vlseg2_nxv16f16_triscv.vector.tuple_nxv32i8_2t(ptr %base, i64 %vl) { ; CHECK-LABEL: test_vlseg2_nxv16f16_triscv.vector.tuple_nxv32i8_2t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, ma -; CHECK-NEXT: vlseg2e16.v v4, (a0) +; CHECK-NEXT: vlseg2e16.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 32 x i8>, 2) @llvm.riscv.vlseg2.triscv.vector.tuple_nxv32i8_2t(target("riscv.vector.tuple", <vscale x 32 x i8>, 2) undef, ptr %base, i64 %vl, i64 4) - %1 = call <vscale x 16 x half> @llvm.riscv.tuple.extract.nxv16f16.triscv.vector.tuple_nxv32i8_2t(target("riscv.vector.tuple", <vscale x 32 x i8>, 2) %0, i32 1) - ret <vscale x 16 x half> %1 + ret target("riscv.vector.tuple", <vscale x 32 x i8>, 2) %0 } - -define <vscale x 16 x half> @test_vlseg2_mask_nxv16f16_triscv.vector.tuple_nxv32i8_2t(ptr %base, i64 %vl, <vscale x 16 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 32 x i8>, 2) @test_vlseg2_mask_nxv16f16_triscv.vector.tuple_nxv32i8_2t(ptr %base, i64 %vl, <vscale x 16 x i1> %mask) { ; CHECK-LABEL: test_vlseg2_mask_nxv16f16_triscv.vector.tuple_nxv32i8_2t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, ma -; CHECK-NEXT: vlseg2e16.v v4, (a0), v0.t +; CHECK-NEXT: vlseg2e16.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 32 x i8>, 2) @llvm.riscv.vlseg2.mask.triscv.vector.tuple_nxv32i8_2t.nxv16i1(target("riscv.vector.tuple", <vscale x 32 x i8>, 2) undef, ptr %base, <vscale x 16 x i1> %mask, i64 %vl, i64 1, i64 4) - %1 = call <vscale x 16 x half> @llvm.riscv.tuple.extract.nxv16f16.triscv.vector.tuple_nxv32i8_2t(target("riscv.vector.tuple", <vscale x 32 x i8>, 2) %0, i32 1) - ret <vscale x 16 x half> %1 + ret target("riscv.vector.tuple", <vscale x 32 x i8>, 2) %0 } - - -define <vscale x 1 x half> @test_vlseg3_nxv1f16_triscv.vector.tuple_nxv2i8_3t(ptr %base, i64 %vl) { +define target("riscv.vector.tuple", <vscale x 2 x i8>, 3) @test_vlseg3_nxv1f16_triscv.vector.tuple_nxv2i8_3t(ptr %base, i64 %vl) { ; CHECK-LABEL: test_vlseg3_nxv1f16_triscv.vector.tuple_nxv2i8_3t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, ma -; CHECK-NEXT: vlseg3e16.v v7, (a0) +; CHECK-NEXT: vlseg3e16.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 2 x i8>, 3) @llvm.riscv.vlseg3.triscv.vector.tuple_nxv2i8_3t(target("riscv.vector.tuple", <vscale x 2 x i8>, 3) undef, ptr %base, i64 %vl, i64 4) - %1 = call <vscale x 1 x half> @llvm.riscv.tuple.extract.nxv1f16.triscv.vector.tuple_nxv2i8_3t(target("riscv.vector.tuple", <vscale x 2 x i8>, 3) %0, i32 1) - ret <vscale x 1 x half> %1 + ret target("riscv.vector.tuple", <vscale x 2 x i8>, 3) %0 } - -define <vscale x 1 x half> @test_vlseg3_mask_nxv1f16_triscv.vector.tuple_nxv2i8_3t(ptr %base, i64 %vl, <vscale x 1 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 2 x i8>, 3) @test_vlseg3_mask_nxv1f16_triscv.vector.tuple_nxv2i8_3t(ptr %base, i64 %vl, <vscale x 1 x i1> %mask) { ; CHECK-LABEL: test_vlseg3_mask_nxv1f16_triscv.vector.tuple_nxv2i8_3t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, ma -; CHECK-NEXT: vlseg3e16.v v7, (a0), v0.t +; CHECK-NEXT: vlseg3e16.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 2 x i8>, 3) @llvm.riscv.vlseg3.mask.triscv.vector.tuple_nxv2i8_3t.nxv1i1(target("riscv.vector.tuple", <vscale x 2 x i8>, 3) undef, ptr %base, <vscale x 1 x i1> %mask, i64 %vl, i64 1, i64 4) - %1 = call <vscale x 1 x half> @llvm.riscv.tuple.extract.nxv1f16.triscv.vector.tuple_nxv2i8_3t(target("riscv.vector.tuple", <vscale x 2 x i8>, 3) %0, i32 1) - ret <vscale x 1 x half> %1 + ret target("riscv.vector.tuple", <vscale x 2 x i8>, 3) %0 } - - -define <vscale x 2 x half> @test_vlseg3_nxv2f16_triscv.vector.tuple_nxv4i8_3t(ptr %base, i64 %vl) { +define target("riscv.vector.tuple", <vscale x 4 x i8>, 3) @test_vlseg3_nxv2f16_triscv.vector.tuple_nxv4i8_3t(ptr %base, i64 %vl) { ; CHECK-LABEL: test_vlseg3_nxv2f16_triscv.vector.tuple_nxv4i8_3t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, ma -; CHECK-NEXT: vlseg3e16.v v7, (a0) +; CHECK-NEXT: vlseg3e16.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 3) @llvm.riscv.vlseg3.triscv.vector.tuple_nxv4i8_3t(target("riscv.vector.tuple", <vscale x 4 x i8>, 3) undef, ptr %base, i64 %vl, i64 4) - %1 = call <vscale x 2 x half> @llvm.riscv.tuple.extract.nxv2f16.triscv.vector.tuple_nxv4i8_3t(target("riscv.vector.tuple", <vscale x 4 x i8>, 3) %0, i32 1) - ret <vscale x 2 x half> %1 + ret target("riscv.vector.tuple", <vscale x 4 x i8>, 3) %0 } - -define <vscale x 2 x half> @test_vlseg3_mask_nxv2f16_triscv.vector.tuple_nxv4i8_3t(ptr %base, i64 %vl, <vscale x 2 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 4 x i8>, 3) @test_vlseg3_mask_nxv2f16_triscv.vector.tuple_nxv4i8_3t(ptr %base, i64 %vl, <vscale x 2 x i1> %mask) { ; CHECK-LABEL: test_vlseg3_mask_nxv2f16_triscv.vector.tuple_nxv4i8_3t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, ma -; CHECK-NEXT: vlseg3e16.v v7, (a0), v0.t +; CHECK-NEXT: vlseg3e16.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 3) @llvm.riscv.vlseg3.mask.triscv.vector.tuple_nxv4i8_3t.nxv2i1(target("riscv.vector.tuple", <vscale x 4 x i8>, 3) undef, ptr %base, <vscale x 2 x i1> %mask, i64 %vl, i64 1, i64 4) - %1 = call <vscale x 2 x half> @llvm.riscv.tuple.extract.nxv2f16.triscv.vector.tuple_nxv4i8_3t(target("riscv.vector.tuple", <vscale x 4 x i8>, 3) %0, i32 1) - ret <vscale x 2 x half> %1 + ret target("riscv.vector.tuple", <vscale x 4 x i8>, 3) %0 } - - -define <vscale x 4 x half> @test_vlseg3_nxv4f16_triscv.vector.tuple_nxv8i8_3t(ptr %base, i64 %vl) { +define target("riscv.vector.tuple", <vscale x 8 x i8>, 3) @test_vlseg3_nxv4f16_triscv.vector.tuple_nxv8i8_3t(ptr %base, i64 %vl) { ; CHECK-LABEL: test_vlseg3_nxv4f16_triscv.vector.tuple_nxv8i8_3t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma -; CHECK-NEXT: vlseg3e16.v v7, (a0) +; CHECK-NEXT: vlseg3e16.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 3) @llvm.riscv.vlseg3.triscv.vector.tuple_nxv8i8_3t(target("riscv.vector.tuple", <vscale x 8 x i8>, 3) undef, ptr %base, i64 %vl, i64 4) - %1 = call <vscale x 4 x half> @llvm.riscv.tuple.extract.nxv4f16.triscv.vector.tuple_nxv8i8_3t(target("riscv.vector.tuple", <vscale x 8 x i8>, 3) %0, i32 1) - ret <vscale x 4 x half> %1 + ret target("riscv.vector.tuple", <vscale x 8 x i8>, 3) %0 } - -define <vscale x 4 x half> @test_vlseg3_mask_nxv4f16_triscv.vector.tuple_nxv8i8_3t(ptr %base, i64 %vl, <vscale x 4 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 8 x i8>, 3) @test_vlseg3_mask_nxv4f16_triscv.vector.tuple_nxv8i8_3t(ptr %base, i64 %vl, <vscale x 4 x i1> %mask) { ; CHECK-LABEL: test_vlseg3_mask_nxv4f16_triscv.vector.tuple_nxv8i8_3t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma -; CHECK-NEXT: vlseg3e16.v v7, (a0), v0.t +; CHECK-NEXT: vlseg3e16.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 3) @llvm.riscv.vlseg3.mask.triscv.vector.tuple_nxv8i8_3t.nxv4i1(target("riscv.vector.tuple", <vscale x 8 x i8>, 3) undef, ptr %base, <vscale x 4 x i1> %mask, i64 %vl, i64 1, i64 4) - %1 = call <vscale x 4 x half> @llvm.riscv.tuple.extract.nxv4f16.triscv.vector.tuple_nxv8i8_3t(target("riscv.vector.tuple", <vscale x 8 x i8>, 3) %0, i32 1) - ret <vscale x 4 x half> %1 + ret target("riscv.vector.tuple", <vscale x 8 x i8>, 3) %0 } - - -define <vscale x 8 x half> @test_vlseg3_nxv8f16_triscv.vector.tuple_nxv16i8_3t(ptr %base, i64 %vl) { +define target("riscv.vector.tuple", <vscale x 16 x i8>, 3) @test_vlseg3_nxv8f16_triscv.vector.tuple_nxv16i8_3t(ptr %base, i64 %vl) { ; CHECK-LABEL: test_vlseg3_nxv8f16_triscv.vector.tuple_nxv16i8_3t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, ma -; CHECK-NEXT: vlseg3e16.v v6, (a0) +; CHECK-NEXT: vlseg3e16.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 3) @llvm.riscv.vlseg3.triscv.vector.tuple_nxv16i8_3t(target("riscv.vector.tuple", <vscale x 16 x i8>, 3) undef, ptr %base, i64 %vl, i64 4) - %1 = call <vscale x 8 x half> @llvm.riscv.tuple.extract.nxv8f16.triscv.vector.tuple_nxv16i8_3t(target("riscv.vector.tuple", <vscale x 16 x i8>, 3) %0, i32 1) - ret <vscale x 8 x half> %1 + ret target("riscv.vector.tuple", <vscale x 16 x i8>, 3) %0 } - -define <vscale x 8 x half> @test_vlseg3_mask_nxv8f16_triscv.vector.tuple_nxv16i8_3t(ptr %base, i64 %vl, <vscale x 8 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 16 x i8>, 3) @test_vlseg3_mask_nxv8f16_triscv.vector.tuple_nxv16i8_3t(ptr %base, i64 %vl, <vscale x 8 x i1> %mask) { ; CHECK-LABEL: test_vlseg3_mask_nxv8f16_triscv.vector.tuple_nxv16i8_3t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, ma -; CHECK-NEXT: vlseg3e16.v v6, (a0), v0.t +; CHECK-NEXT: vlseg3e16.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 3) @llvm.riscv.vlseg3.mask.triscv.vector.tuple_nxv16i8_3t.nxv8i1(target("riscv.vector.tuple", <vscale x 16 x i8>, 3) undef, ptr %base, <vscale x 8 x i1> %mask, i64 %vl, i64 1, i64 4) - %1 = call <vscale x 8 x half> @llvm.riscv.tuple.extract.nxv8f16.triscv.vector.tuple_nxv16i8_3t(target("riscv.vector.tuple", <vscale x 16 x i8>, 3) %0, i32 1) - ret <vscale x 8 x half> %1 + ret target("riscv.vector.tuple", <vscale x 16 x i8>, 3) %0 } - - -define <vscale x 1 x half> @test_vlseg4_nxv1f16_triscv.vector.tuple_nxv2i8_4t(ptr %base, i64 %vl) { +define target("riscv.vector.tuple", <vscale x 2 x i8>, 4) @test_vlseg4_nxv1f16_triscv.vector.tuple_nxv2i8_4t(ptr %base, i64 %vl) { ; CHECK-LABEL: test_vlseg4_nxv1f16_triscv.vector.tuple_nxv2i8_4t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, ma -; CHECK-NEXT: vlseg4e16.v v7, (a0) +; CHECK-NEXT: vlseg4e16.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 2 x i8>, 4) @llvm.riscv.vlseg4.triscv.vector.tuple_nxv2i8_4t(target("riscv.vector.tuple", <vscale x 2 x i8>, 4) undef, ptr %base, i64 %vl, i64 4) - %1 = call <vscale x 1 x half> @llvm.riscv.tuple.extract.nxv1f16.triscv.vector.tuple_nxv2i8_4t(target("riscv.vector.tuple", <vscale x 2 x i8>, 4) %0, i32 1) - ret <vscale x 1 x half> %1 + ret target("riscv.vector.tuple", <vscale x 2 x i8>, 4) %0 } - -define <vscale x 1 x half> @test_vlseg4_mask_nxv1f16_triscv.vector.tuple_nxv2i8_4t(ptr %base, i64 %vl, <vscale x 1 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 2 x i8>, 4) @test_vlseg4_mask_nxv1f16_triscv.vector.tuple_nxv2i8_4t(ptr %base, i64 %vl, <vscale x 1 x i1> %mask) { ; CHECK-LABEL: test_vlseg4_mask_nxv1f16_triscv.vector.tuple_nxv2i8_4t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, ma -; CHECK-NEXT: vlseg4e16.v v7, (a0), v0.t +; CHECK-NEXT: vlseg4e16.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 2 x i8>, 4) @llvm.riscv.vlseg4.mask.triscv.vector.tuple_nxv2i8_4t.nxv1i1(target("riscv.vector.tuple", <vscale x 2 x i8>, 4) undef, ptr %base, <vscale x 1 x i1> %mask, i64 %vl, i64 1, i64 4) - %1 = call <vscale x 1 x half> @llvm.riscv.tuple.extract.nxv1f16.triscv.vector.tuple_nxv2i8_4t(target("riscv.vector.tuple", <vscale x 2 x i8>, 4) %0, i32 1) - ret <vscale x 1 x half> %1 + ret target("riscv.vector.tuple", <vscale x 2 x i8>, 4) %0 } - - -define <vscale x 2 x half> @test_vlseg4_nxv2f16_triscv.vector.tuple_nxv4i8_4t(ptr %base, i64 %vl) { +define target("riscv.vector.tuple", <vscale x 4 x i8>, 4) @test_vlseg4_nxv2f16_triscv.vector.tuple_nxv4i8_4t(ptr %base, i64 %vl) { ; CHECK-LABEL: test_vlseg4_nxv2f16_triscv.vector.tuple_nxv4i8_4t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, ma -; CHECK-NEXT: vlseg4e16.v v7, (a0) +; CHECK-NEXT: vlseg4e16.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 4) @llvm.riscv.vlseg4.triscv.vector.tuple_nxv4i8_4t(target("riscv.vector.tuple", <vscale x 4 x i8>, 4) undef, ptr %base, i64 %vl, i64 4) - %1 = call <vscale x 2 x half> @llvm.riscv.tuple.extract.nxv2f16.triscv.vector.tuple_nxv4i8_4t(target("riscv.vector.tuple", <vscale x 4 x i8>, 4) %0, i32 1) - ret <vscale x 2 x half> %1 + ret target("riscv.vector.tuple", <vscale x 4 x i8>, 4) %0 } - -define <vscale x 2 x half> @test_vlseg4_mask_nxv2f16_triscv.vector.tuple_nxv4i8_4t(ptr %base, i64 %vl, <vscale x 2 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 4 x i8>, 4) @test_vlseg4_mask_nxv2f16_triscv.vector.tuple_nxv4i8_4t(ptr %base, i64 %vl, <vscale x 2 x i1> %mask) { ; CHECK-LABEL: test_vlseg4_mask_nxv2f16_triscv.vector.tuple_nxv4i8_4t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, ma -; CHECK-NEXT: vlseg4e16.v v7, (a0), v0.t +; CHECK-NEXT: vlseg4e16.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 4) @llvm.riscv.vlseg4.mask.triscv.vector.tuple_nxv4i8_4t.nxv2i1(target("riscv.vector.tuple", <vscale x 4 x i8>, 4) undef, ptr %base, <vscale x 2 x i1> %mask, i64 %vl, i64 1, i64 4) - %1 = call <vscale x 2 x half> @llvm.riscv.tuple.extract.nxv2f16.triscv.vector.tuple_nxv4i8_4t(target("riscv.vector.tuple", <vscale x 4 x i8>, 4) %0, i32 1) - ret <vscale x 2 x half> %1 + ret target("riscv.vector.tuple", <vscale x 4 x i8>, 4) %0 } - - -define <vscale x 4 x half> @test_vlseg4_nxv4f16_triscv.vector.tuple_nxv8i8_4t(ptr %base, i64 %vl) { +define target("riscv.vector.tuple", <vscale x 8 x i8>, 4) @test_vlseg4_nxv4f16_triscv.vector.tuple_nxv8i8_4t(ptr %base, i64 %vl) { ; CHECK-LABEL: test_vlseg4_nxv4f16_triscv.vector.tuple_nxv8i8_4t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma -; CHECK-NEXT: vlseg4e16.v v7, (a0) +; CHECK-NEXT: vlseg4e16.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 4) @llvm.riscv.vlseg4.triscv.vector.tuple_nxv8i8_4t(target("riscv.vector.tuple", <vscale x 8 x i8>, 4) undef, ptr %base, i64 %vl, i64 4) - %1 = call <vscale x 4 x half> @llvm.riscv.tuple.extract.nxv4f16.triscv.vector.tuple_nxv8i8_4t(target("riscv.vector.tuple", <vscale x 8 x i8>, 4) %0, i32 1) - ret <vscale x 4 x half> %1 + ret target("riscv.vector.tuple", <vscale x 8 x i8>, 4) %0 } - -define <vscale x 4 x half> @test_vlseg4_mask_nxv4f16_triscv.vector.tuple_nxv8i8_4t(ptr %base, i64 %vl, <vscale x 4 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 8 x i8>, 4) @test_vlseg4_mask_nxv4f16_triscv.vector.tuple_nxv8i8_4t(ptr %base, i64 %vl, <vscale x 4 x i1> %mask) { ; CHECK-LABEL: test_vlseg4_mask_nxv4f16_triscv.vector.tuple_nxv8i8_4t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma -; CHECK-NEXT: vlseg4e16.v v7, (a0), v0.t +; CHECK-NEXT: vlseg4e16.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 4) @llvm.riscv.vlseg4.mask.triscv.vector.tuple_nxv8i8_4t.nxv4i1(target("riscv.vector.tuple", <vscale x 8 x i8>, 4) undef, ptr %base, <vscale x 4 x i1> %mask, i64 %vl, i64 1, i64 4) - %1 = call <vscale x 4 x half> @llvm.riscv.tuple.extract.nxv4f16.triscv.vector.tuple_nxv8i8_4t(target("riscv.vector.tuple", <vscale x 8 x i8>, 4) %0, i32 1) - ret <vscale x 4 x half> %1 + ret target("riscv.vector.tuple", <vscale x 8 x i8>, 4) %0 } - - -define <vscale x 8 x half> @test_vlseg4_nxv8f16_triscv.vector.tuple_nxv16i8_4t(ptr %base, i64 %vl) { +define target("riscv.vector.tuple", <vscale x 16 x i8>, 4) @test_vlseg4_nxv8f16_triscv.vector.tuple_nxv16i8_4t(ptr %base, i64 %vl) { ; CHECK-LABEL: test_vlseg4_nxv8f16_triscv.vector.tuple_nxv16i8_4t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, ma -; CHECK-NEXT: vlseg4e16.v v6, (a0) +; CHECK-NEXT: vlseg4e16.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 4) @llvm.riscv.vlseg4.triscv.vector.tuple_nxv16i8_4t(target("riscv.vector.tuple", <vscale x 16 x i8>, 4) undef, ptr %base, i64 %vl, i64 4) - %1 = call <vscale x 8 x half> @llvm.riscv.tuple.extract.nxv8f16.triscv.vector.tuple_nxv16i8_4t(target("riscv.vector.tuple", <vscale x 16 x i8>, 4) %0, i32 1) - ret <vscale x 8 x half> %1 + ret target("riscv.vector.tuple", <vscale x 16 x i8>, 4) %0 } - -define <vscale x 8 x half> @test_vlseg4_mask_nxv8f16_triscv.vector.tuple_nxv16i8_4t(ptr %base, i64 %vl, <vscale x 8 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 16 x i8>, 4) @test_vlseg4_mask_nxv8f16_triscv.vector.tuple_nxv16i8_4t(ptr %base, i64 %vl, <vscale x 8 x i1> %mask) { ; CHECK-LABEL: test_vlseg4_mask_nxv8f16_triscv.vector.tuple_nxv16i8_4t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, ma -; CHECK-NEXT: vlseg4e16.v v6, (a0), v0.t +; CHECK-NEXT: vlseg4e16.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 4) @llvm.riscv.vlseg4.mask.triscv.vector.tuple_nxv16i8_4t.nxv8i1(target("riscv.vector.tuple", <vscale x 16 x i8>, 4) undef, ptr %base, <vscale x 8 x i1> %mask, i64 %vl, i64 1, i64 4) - %1 = call <vscale x 8 x half> @llvm.riscv.tuple.extract.nxv8f16.triscv.vector.tuple_nxv16i8_4t(target("riscv.vector.tuple", <vscale x 16 x i8>, 4) %0, i32 1) - ret <vscale x 8 x half> %1 + ret target("riscv.vector.tuple", <vscale x 16 x i8>, 4) %0 } - - -define <vscale x 1 x half> @test_vlseg5_nxv1f16_triscv.vector.tuple_nxv2i8_5t(ptr %base, i64 %vl) { +define target("riscv.vector.tuple", <vscale x 2 x i8>, 5) @test_vlseg5_nxv1f16_triscv.vector.tuple_nxv2i8_5t(ptr %base, i64 %vl) { ; CHECK-LABEL: test_vlseg5_nxv1f16_triscv.vector.tuple_nxv2i8_5t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, ma -; CHECK-NEXT: vlseg5e16.v v7, (a0) +; CHECK-NEXT: vlseg5e16.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 2 x i8>, 5) @llvm.riscv.vlseg5.triscv.vector.tuple_nxv2i8_5t(target("riscv.vector.tuple", <vscale x 2 x i8>, 5) undef, ptr %base, i64 %vl, i64 4) - %1 = call <vscale x 1 x half> @llvm.riscv.tuple.extract.nxv1f16.triscv.vector.tuple_nxv2i8_5t(target("riscv.vector.tuple", <vscale x 2 x i8>, 5) %0, i32 1) - ret <vscale x 1 x half> %1 + ret target("riscv.vector.tuple", <vscale x 2 x i8>, 5) %0 } - -define <vscale x 1 x half> @test_vlseg5_mask_nxv1f16_triscv.vector.tuple_nxv2i8_5t(ptr %base, i64 %vl, <vscale x 1 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 2 x i8>, 5) @test_vlseg5_mask_nxv1f16_triscv.vector.tuple_nxv2i8_5t(ptr %base, i64 %vl, <vscale x 1 x i1> %mask) { ; CHECK-LABEL: test_vlseg5_mask_nxv1f16_triscv.vector.tuple_nxv2i8_5t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, ma -; CHECK-NEXT: vlseg5e16.v v7, (a0), v0.t +; CHECK-NEXT: vlseg5e16.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 2 x i8>, 5) @llvm.riscv.vlseg5.mask.triscv.vector.tuple_nxv2i8_5t.nxv1i1(target("riscv.vector.tuple", <vscale x 2 x i8>, 5) undef, ptr %base, <vscale x 1 x i1> %mask, i64 %vl, i64 1, i64 4) - %1 = call <vscale x 1 x half> @llvm.riscv.tuple.extract.nxv1f16.triscv.vector.tuple_nxv2i8_5t(target("riscv.vector.tuple", <vscale x 2 x i8>, 5) %0, i32 1) - ret <vscale x 1 x half> %1 + ret target("riscv.vector.tuple", <vscale x 2 x i8>, 5) %0 } - - -define <vscale x 2 x half> @test_vlseg5_nxv2f16_triscv.vector.tuple_nxv4i8_5t(ptr %base, i64 %vl) { +define target("riscv.vector.tuple", <vscale x 4 x i8>, 5) @test_vlseg5_nxv2f16_triscv.vector.tuple_nxv4i8_5t(ptr %base, i64 %vl) { ; CHECK-LABEL: test_vlseg5_nxv2f16_triscv.vector.tuple_nxv4i8_5t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, ma -; CHECK-NEXT: vlseg5e16.v v7, (a0) +; CHECK-NEXT: vlseg5e16.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 5) @llvm.riscv.vlseg5.triscv.vector.tuple_nxv4i8_5t(target("riscv.vector.tuple", <vscale x 4 x i8>, 5) undef, ptr %base, i64 %vl, i64 4) - %1 = call <vscale x 2 x half> @llvm.riscv.tuple.extract.nxv2f16.triscv.vector.tuple_nxv4i8_5t(target("riscv.vector.tuple", <vscale x 4 x i8>, 5) %0, i32 1) - ret <vscale x 2 x half> %1 + ret target("riscv.vector.tuple", <vscale x 4 x i8>, 5) %0 } - -define <vscale x 2 x half> @test_vlseg5_mask_nxv2f16_triscv.vector.tuple_nxv4i8_5t(ptr %base, i64 %vl, <vscale x 2 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 4 x i8>, 5) @test_vlseg5_mask_nxv2f16_triscv.vector.tuple_nxv4i8_5t(ptr %base, i64 %vl, <vscale x 2 x i1> %mask) { ; CHECK-LABEL: test_vlseg5_mask_nxv2f16_triscv.vector.tuple_nxv4i8_5t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, ma -; CHECK-NEXT: vlseg5e16.v v7, (a0), v0.t +; CHECK-NEXT: vlseg5e16.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 5) @llvm.riscv.vlseg5.mask.triscv.vector.tuple_nxv4i8_5t.nxv2i1(target("riscv.vector.tuple", <vscale x 4 x i8>, 5) undef, ptr %base, <vscale x 2 x i1> %mask, i64 %vl, i64 1, i64 4) - %1 = call <vscale x 2 x half> @llvm.riscv.tuple.extract.nxv2f16.triscv.vector.tuple_nxv4i8_5t(target("riscv.vector.tuple", <vscale x 4 x i8>, 5) %0, i32 1) - ret <vscale x 2 x half> %1 + ret target("riscv.vector.tuple", <vscale x 4 x i8>, 5) %0 } - - -define <vscale x 4 x half> @test_vlseg5_nxv4f16_triscv.vector.tuple_nxv8i8_5t(ptr %base, i64 %vl) { +define target("riscv.vector.tuple", <vscale x 8 x i8>, 5) @test_vlseg5_nxv4f16_triscv.vector.tuple_nxv8i8_5t(ptr %base, i64 %vl) { ; CHECK-LABEL: test_vlseg5_nxv4f16_triscv.vector.tuple_nxv8i8_5t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma -; CHECK-NEXT: vlseg5e16.v v7, (a0) +; CHECK-NEXT: vlseg5e16.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 5) @llvm.riscv.vlseg5.triscv.vector.tuple_nxv8i8_5t(target("riscv.vector.tuple", <vscale x 8 x i8>, 5) undef, ptr %base, i64 %vl, i64 4) - %1 = call <vscale x 4 x half> @llvm.riscv.tuple.extract.nxv4f16.triscv.vector.tuple_nxv8i8_5t(target("riscv.vector.tuple", <vscale x 8 x i8>, 5) %0, i32 1) - ret <vscale x 4 x half> %1 + ret target("riscv.vector.tuple", <vscale x 8 x i8>, 5) %0 } - -define <vscale x 4 x half> @test_vlseg5_mask_nxv4f16_triscv.vector.tuple_nxv8i8_5t(ptr %base, i64 %vl, <vscale x 4 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 8 x i8>, 5) @test_vlseg5_mask_nxv4f16_triscv.vector.tuple_nxv8i8_5t(ptr %base, i64 %vl, <vscale x 4 x i1> %mask) { ; CHECK-LABEL: test_vlseg5_mask_nxv4f16_triscv.vector.tuple_nxv8i8_5t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma -; CHECK-NEXT: vlseg5e16.v v7, (a0), v0.t +; CHECK-NEXT: vlseg5e16.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 5) @llvm.riscv.vlseg5.mask.triscv.vector.tuple_nxv8i8_5t.nxv4i1(target("riscv.vector.tuple", <vscale x 8 x i8>, 5) undef, ptr %base, <vscale x 4 x i1> %mask, i64 %vl, i64 1, i64 4) - %1 = call <vscale x 4 x half> @llvm.riscv.tuple.extract.nxv4f16.triscv.vector.tuple_nxv8i8_5t(target("riscv.vector.tuple", <vscale x 8 x i8>, 5) %0, i32 1) - ret <vscale x 4 x half> %1 + ret target("riscv.vector.tuple", <vscale x 8 x i8>, 5) %0 } - - -define <vscale x 1 x half> @test_vlseg6_nxv1f16_triscv.vector.tuple_nxv2i8_6t(ptr %base, i64 %vl) { +define target("riscv.vector.tuple", <vscale x 2 x i8>, 6) @test_vlseg6_nxv1f16_triscv.vector.tuple_nxv2i8_6t(ptr %base, i64 %vl) { ; CHECK-LABEL: test_vlseg6_nxv1f16_triscv.vector.tuple_nxv2i8_6t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, ma -; CHECK-NEXT: vlseg6e16.v v7, (a0) +; CHECK-NEXT: vlseg6e16.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 2 x i8>, 6) @llvm.riscv.vlseg6.triscv.vector.tuple_nxv2i8_6t(target("riscv.vector.tuple", <vscale x 2 x i8>, 6) undef, ptr %base, i64 %vl, i64 4) - %1 = call <vscale x 1 x half> @llvm.riscv.tuple.extract.nxv1f16.triscv.vector.tuple_nxv2i8_6t(target("riscv.vector.tuple", <vscale x 2 x i8>, 6) %0, i32 1) - ret <vscale x 1 x half> %1 + ret target("riscv.vector.tuple", <vscale x 2 x i8>, 6) %0 } - -define <vscale x 1 x half> @test_vlseg6_mask_nxv1f16_triscv.vector.tuple_nxv2i8_6t(ptr %base, i64 %vl, <vscale x 1 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 2 x i8>, 6) @test_vlseg6_mask_nxv1f16_triscv.vector.tuple_nxv2i8_6t(ptr %base, i64 %vl, <vscale x 1 x i1> %mask) { ; CHECK-LABEL: test_vlseg6_mask_nxv1f16_triscv.vector.tuple_nxv2i8_6t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, ma -; CHECK-NEXT: vlseg6e16.v v7, (a0), v0.t +; CHECK-NEXT: vlseg6e16.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 2 x i8>, 6) @llvm.riscv.vlseg6.mask.triscv.vector.tuple_nxv2i8_6t.nxv1i1(target("riscv.vector.tuple", <vscale x 2 x i8>, 6) undef, ptr %base, <vscale x 1 x i1> %mask, i64 %vl, i64 1, i64 4) - %1 = call <vscale x 1 x half> @llvm.riscv.tuple.extract.nxv1f16.triscv.vector.tuple_nxv2i8_6t(target("riscv.vector.tuple", <vscale x 2 x i8>, 6) %0, i32 1) - ret <vscale x 1 x half> %1 + ret target("riscv.vector.tuple", <vscale x 2 x i8>, 6) %0 } - - -define <vscale x 2 x half> @test_vlseg6_nxv2f16_triscv.vector.tuple_nxv4i8_6t(ptr %base, i64 %vl) { +define target("riscv.vector.tuple", <vscale x 4 x i8>, 6) @test_vlseg6_nxv2f16_triscv.vector.tuple_nxv4i8_6t(ptr %base, i64 %vl) { ; CHECK-LABEL: test_vlseg6_nxv2f16_triscv.vector.tuple_nxv4i8_6t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, ma -; CHECK-NEXT: vlseg6e16.v v7, (a0) +; CHECK-NEXT: vlseg6e16.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 6) @llvm.riscv.vlseg6.triscv.vector.tuple_nxv4i8_6t(target("riscv.vector.tuple", <vscale x 4 x i8>, 6) undef, ptr %base, i64 %vl, i64 4) - %1 = call <vscale x 2 x half> @llvm.riscv.tuple.extract.nxv2f16.triscv.vector.tuple_nxv4i8_6t(target("riscv.vector.tuple", <vscale x 4 x i8>, 6) %0, i32 1) - ret <vscale x 2 x half> %1 + ret target("riscv.vector.tuple", <vscale x 4 x i8>, 6) %0 } - -define <vscale x 2 x half> @test_vlseg6_mask_nxv2f16_triscv.vector.tuple_nxv4i8_6t(ptr %base, i64 %vl, <vscale x 2 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 4 x i8>, 6) @test_vlseg6_mask_nxv2f16_triscv.vector.tuple_nxv4i8_6t(ptr %base, i64 %vl, <vscale x 2 x i1> %mask) { ; CHECK-LABEL: test_vlseg6_mask_nxv2f16_triscv.vector.tuple_nxv4i8_6t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, ma -; CHECK-NEXT: vlseg6e16.v v7, (a0), v0.t +; CHECK-NEXT: vlseg6e16.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 6) @llvm.riscv.vlseg6.mask.triscv.vector.tuple_nxv4i8_6t.nxv2i1(target("riscv.vector.tuple", <vscale x 4 x i8>, 6) undef, ptr %base, <vscale x 2 x i1> %mask, i64 %vl, i64 1, i64 4) - %1 = call <vscale x 2 x half> @llvm.riscv.tuple.extract.nxv2f16.triscv.vector.tuple_nxv4i8_6t(target("riscv.vector.tuple", <vscale x 4 x i8>, 6) %0, i32 1) - ret <vscale x 2 x half> %1 + ret target("riscv.vector.tuple", <vscale x 4 x i8>, 6) %0 } - - -define <vscale x 4 x half> @test_vlseg6_nxv4f16_triscv.vector.tuple_nxv8i8_6t(ptr %base, i64 %vl) { +define target("riscv.vector.tuple", <vscale x 8 x i8>, 6) @test_vlseg6_nxv4f16_triscv.vector.tuple_nxv8i8_6t(ptr %base, i64 %vl) { ; CHECK-LABEL: test_vlseg6_nxv4f16_triscv.vector.tuple_nxv8i8_6t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma -; CHECK-NEXT: vlseg6e16.v v7, (a0) +; CHECK-NEXT: vlseg6e16.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 6) @llvm.riscv.vlseg6.triscv.vector.tuple_nxv8i8_6t(target("riscv.vector.tuple", <vscale x 8 x i8>, 6) undef, ptr %base, i64 %vl, i64 4) - %1 = call <vscale x 4 x half> @llvm.riscv.tuple.extract.nxv4f16.triscv.vector.tuple_nxv8i8_6t(target("riscv.vector.tuple", <vscale x 8 x i8>, 6) %0, i32 1) - ret <vscale x 4 x half> %1 + ret target("riscv.vector.tuple", <vscale x 8 x i8>, 6) %0 } - -define <vscale x 4 x half> @test_vlseg6_mask_nxv4f16_triscv.vector.tuple_nxv8i8_6t(ptr %base, i64 %vl, <vscale x 4 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 8 x i8>, 6) @test_vlseg6_mask_nxv4f16_triscv.vector.tuple_nxv8i8_6t(ptr %base, i64 %vl, <vscale x 4 x i1> %mask) { ; CHECK-LABEL: test_vlseg6_mask_nxv4f16_triscv.vector.tuple_nxv8i8_6t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma -; CHECK-NEXT: vlseg6e16.v v7, (a0), v0.t +; CHECK-NEXT: vlseg6e16.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 6) @llvm.riscv.vlseg6.mask.triscv.vector.tuple_nxv8i8_6t.nxv4i1(target("riscv.vector.tuple", <vscale x 8 x i8>, 6) undef, ptr %base, <vscale x 4 x i1> %mask, i64 %vl, i64 1, i64 4) - %1 = call <vscale x 4 x half> @llvm.riscv.tuple.extract.nxv4f16.triscv.vector.tuple_nxv8i8_6t(target("riscv.vector.tuple", <vscale x 8 x i8>, 6) %0, i32 1) - ret <vscale x 4 x half> %1 + ret target("riscv.vector.tuple", <vscale x 8 x i8>, 6) %0 } - - -define <vscale x 1 x half> @test_vlseg7_nxv1f16_triscv.vector.tuple_nxv2i8_7t(ptr %base, i64 %vl) { +define target("riscv.vector.tuple", <vscale x 2 x i8>, 7) @test_vlseg7_nxv1f16_triscv.vector.tuple_nxv2i8_7t(ptr %base, i64 %vl) { ; CHECK-LABEL: test_vlseg7_nxv1f16_triscv.vector.tuple_nxv2i8_7t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, ma -; CHECK-NEXT: vlseg7e16.v v7, (a0) +; CHECK-NEXT: vlseg7e16.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 2 x i8>, 7) @llvm.riscv.vlseg7.triscv.vector.tuple_nxv2i8_7t(target("riscv.vector.tuple", <vscale x 2 x i8>, 7) undef, ptr %base, i64 %vl, i64 4) - %1 = call <vscale x 1 x half> @llvm.riscv.tuple.extract.nxv1f16.triscv.vector.tuple_nxv2i8_7t(target("riscv.vector.tuple", <vscale x 2 x i8>, 7) %0, i32 1) - ret <vscale x 1 x half> %1 + ret target("riscv.vector.tuple", <vscale x 2 x i8>, 7) %0 } - -define <vscale x 1 x half> @test_vlseg7_mask_nxv1f16_triscv.vector.tuple_nxv2i8_7t(ptr %base, i64 %vl, <vscale x 1 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 2 x i8>, 7) @test_vlseg7_mask_nxv1f16_triscv.vector.tuple_nxv2i8_7t(ptr %base, i64 %vl, <vscale x 1 x i1> %mask) { ; CHECK-LABEL: test_vlseg7_mask_nxv1f16_triscv.vector.tuple_nxv2i8_7t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, ma -; CHECK-NEXT: vlseg7e16.v v7, (a0), v0.t +; CHECK-NEXT: vlseg7e16.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 2 x i8>, 7) @llvm.riscv.vlseg7.mask.triscv.vector.tuple_nxv2i8_7t.nxv1i1(target("riscv.vector.tuple", <vscale x 2 x i8>, 7) undef, ptr %base, <vscale x 1 x i1> %mask, i64 %vl, i64 1, i64 4) - %1 = call <vscale x 1 x half> @llvm.riscv.tuple.extract.nxv1f16.triscv.vector.tuple_nxv2i8_7t(target("riscv.vector.tuple", <vscale x 2 x i8>, 7) %0, i32 1) - ret <vscale x 1 x half> %1 + ret target("riscv.vector.tuple", <vscale x 2 x i8>, 7) %0 } - - -define <vscale x 2 x half> @test_vlseg7_nxv2f16_triscv.vector.tuple_nxv4i8_7t(ptr %base, i64 %vl) { +define target("riscv.vector.tuple", <vscale x 4 x i8>, 7) @test_vlseg7_nxv2f16_triscv.vector.tuple_nxv4i8_7t(ptr %base, i64 %vl) { ; CHECK-LABEL: test_vlseg7_nxv2f16_triscv.vector.tuple_nxv4i8_7t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, ma -; CHECK-NEXT: vlseg7e16.v v7, (a0) +; CHECK-NEXT: vlseg7e16.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 7) @llvm.riscv.vlseg7.triscv.vector.tuple_nxv4i8_7t(target("riscv.vector.tuple", <vscale x 4 x i8>, 7) undef, ptr %base, i64 %vl, i64 4) - %1 = call <vscale x 2 x half> @llvm.riscv.tuple.extract.nxv2f16.triscv.vector.tuple_nxv4i8_7t(target("riscv.vector.tuple", <vscale x 4 x i8>, 7) %0, i32 1) - ret <vscale x 2 x half> %1 + ret target("riscv.vector.tuple", <vscale x 4 x i8>, 7) %0 } - -define <vscale x 2 x half> @test_vlseg7_mask_nxv2f16_triscv.vector.tuple_nxv4i8_7t(ptr %base, i64 %vl, <vscale x 2 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 4 x i8>, 7) @test_vlseg7_mask_nxv2f16_triscv.vector.tuple_nxv4i8_7t(ptr %base, i64 %vl, <vscale x 2 x i1> %mask) { ; CHECK-LABEL: test_vlseg7_mask_nxv2f16_triscv.vector.tuple_nxv4i8_7t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, ma -; CHECK-NEXT: vlseg7e16.v v7, (a0), v0.t +; CHECK-NEXT: vlseg7e16.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 7) @llvm.riscv.vlseg7.mask.triscv.vector.tuple_nxv4i8_7t.nxv2i1(target("riscv.vector.tuple", <vscale x 4 x i8>, 7) undef, ptr %base, <vscale x 2 x i1> %mask, i64 %vl, i64 1, i64 4) - %1 = call <vscale x 2 x half> @llvm.riscv.tuple.extract.nxv2f16.triscv.vector.tuple_nxv4i8_7t(target("riscv.vector.tuple", <vscale x 4 x i8>, 7) %0, i32 1) - ret <vscale x 2 x half> %1 + ret target("riscv.vector.tuple", <vscale x 4 x i8>, 7) %0 } - - -define <vscale x 4 x half> @test_vlseg7_nxv4f16_triscv.vector.tuple_nxv8i8_7t(ptr %base, i64 %vl) { +define target("riscv.vector.tuple", <vscale x 8 x i8>, 7) @test_vlseg7_nxv4f16_triscv.vector.tuple_nxv8i8_7t(ptr %base, i64 %vl) { ; CHECK-LABEL: test_vlseg7_nxv4f16_triscv.vector.tuple_nxv8i8_7t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma -; CHECK-NEXT: vlseg7e16.v v7, (a0) +; CHECK-NEXT: vlseg7e16.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 7) @llvm.riscv.vlseg7.triscv.vector.tuple_nxv8i8_7t(target("riscv.vector.tuple", <vscale x 8 x i8>, 7) undef, ptr %base, i64 %vl, i64 4) - %1 = call <vscale x 4 x half> @llvm.riscv.tuple.extract.nxv4f16.triscv.vector.tuple_nxv8i8_7t(target("riscv.vector.tuple", <vscale x 8 x i8>, 7) %0, i32 1) - ret <vscale x 4 x half> %1 + ret target("riscv.vector.tuple", <vscale x 8 x i8>, 7) %0 } - -define <vscale x 4 x half> @test_vlseg7_mask_nxv4f16_triscv.vector.tuple_nxv8i8_7t(ptr %base, i64 %vl, <vscale x 4 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 8 x i8>, 7) @test_vlseg7_mask_nxv4f16_triscv.vector.tuple_nxv8i8_7t(ptr %base, i64 %vl, <vscale x 4 x i1> %mask) { ; CHECK-LABEL: test_vlseg7_mask_nxv4f16_triscv.vector.tuple_nxv8i8_7t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma -; CHECK-NEXT: vlseg7e16.v v7, (a0), v0.t +; CHECK-NEXT: vlseg7e16.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 7) @llvm.riscv.vlseg7.mask.triscv.vector.tuple_nxv8i8_7t.nxv4i1(target("riscv.vector.tuple", <vscale x 8 x i8>, 7) undef, ptr %base, <vscale x 4 x i1> %mask, i64 %vl, i64 1, i64 4) - %1 = call <vscale x 4 x half> @llvm.riscv.tuple.extract.nxv4f16.triscv.vector.tuple_nxv8i8_7t(target("riscv.vector.tuple", <vscale x 8 x i8>, 7) %0, i32 1) - ret <vscale x 4 x half> %1 + ret target("riscv.vector.tuple", <vscale x 8 x i8>, 7) %0 } - - -define <vscale x 1 x half> @test_vlseg8_nxv1f16_triscv.vector.tuple_nxv2i8_8t(ptr %base, i64 %vl) { +define target("riscv.vector.tuple", <vscale x 2 x i8>, 8) @test_vlseg8_nxv1f16_triscv.vector.tuple_nxv2i8_8t(ptr %base, i64 %vl) { ; CHECK-LABEL: test_vlseg8_nxv1f16_triscv.vector.tuple_nxv2i8_8t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, ma -; CHECK-NEXT: vlseg8e16.v v7, (a0) +; CHECK-NEXT: vlseg8e16.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 2 x i8>, 8) @llvm.riscv.vlseg8.triscv.vector.tuple_nxv2i8_8t(target("riscv.vector.tuple", <vscale x 2 x i8>, 8) undef, ptr %base, i64 %vl, i64 4) - %1 = call <vscale x 1 x half> @llvm.riscv.tuple.extract.nxv1f16.triscv.vector.tuple_nxv2i8_8t(target("riscv.vector.tuple", <vscale x 2 x i8>, 8) %0, i32 1) - ret <vscale x 1 x half> %1 + ret target("riscv.vector.tuple", <vscale x 2 x i8>, 8) %0 } - -define <vscale x 1 x half> @test_vlseg8_mask_nxv1f16_triscv.vector.tuple_nxv2i8_8t(ptr %base, i64 %vl, <vscale x 1 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 2 x i8>, 8) @test_vlseg8_mask_nxv1f16_triscv.vector.tuple_nxv2i8_8t(ptr %base, i64 %vl, <vscale x 1 x i1> %mask) { ; CHECK-LABEL: test_vlseg8_mask_nxv1f16_triscv.vector.tuple_nxv2i8_8t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, ma -; CHECK-NEXT: vlseg8e16.v v7, (a0), v0.t +; CHECK-NEXT: vlseg8e16.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 2 x i8>, 8) @llvm.riscv.vlseg8.mask.triscv.vector.tuple_nxv2i8_8t.nxv1i1(target("riscv.vector.tuple", <vscale x 2 x i8>, 8) undef, ptr %base, <vscale x 1 x i1> %mask, i64 %vl, i64 1, i64 4) - %1 = call <vscale x 1 x half> @llvm.riscv.tuple.extract.nxv1f16.triscv.vector.tuple_nxv2i8_8t(target("riscv.vector.tuple", <vscale x 2 x i8>, 8) %0, i32 1) - ret <vscale x 1 x half> %1 + ret target("riscv.vector.tuple", <vscale x 2 x i8>, 8) %0 } - - -define <vscale x 2 x half> @test_vlseg8_nxv2f16_triscv.vector.tuple_nxv4i8_8t(ptr %base, i64 %vl) { +define target("riscv.vector.tuple", <vscale x 4 x i8>, 8) @test_vlseg8_nxv2f16_triscv.vector.tuple_nxv4i8_8t(ptr %base, i64 %vl) { ; CHECK-LABEL: test_vlseg8_nxv2f16_triscv.vector.tuple_nxv4i8_8t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, ma -; CHECK-NEXT: vlseg8e16.v v7, (a0) +; CHECK-NEXT: vlseg8e16.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 8) @llvm.riscv.vlseg8.triscv.vector.tuple_nxv4i8_8t(target("riscv.vector.tuple", <vscale x 4 x i8>, 8) undef, ptr %base, i64 %vl, i64 4) - %1 = call <vscale x 2 x half> @llvm.riscv.tuple.extract.nxv2f16.triscv.vector.tuple_nxv4i8_8t(target("riscv.vector.tuple", <vscale x 4 x i8>, 8) %0, i32 1) - ret <vscale x 2 x half> %1 + ret target("riscv.vector.tuple", <vscale x 4 x i8>, 8) %0 } - -define <vscale x 2 x half> @test_vlseg8_mask_nxv2f16_triscv.vector.tuple_nxv4i8_8t(ptr %base, i64 %vl, <vscale x 2 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 4 x i8>, 8) @test_vlseg8_mask_nxv2f16_triscv.vector.tuple_nxv4i8_8t(ptr %base, i64 %vl, <vscale x 2 x i1> %mask) { ; CHECK-LABEL: test_vlseg8_mask_nxv2f16_triscv.vector.tuple_nxv4i8_8t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, ma -; CHECK-NEXT: vlseg8e16.v v7, (a0), v0.t +; CHECK-NEXT: vlseg8e16.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 8) @llvm.riscv.vlseg8.mask.triscv.vector.tuple_nxv4i8_8t.nxv2i1(target("riscv.vector.tuple", <vscale x 4 x i8>, 8) undef, ptr %base, <vscale x 2 x i1> %mask, i64 %vl, i64 1, i64 4) - %1 = call <vscale x 2 x half> @llvm.riscv.tuple.extract.nxv2f16.triscv.vector.tuple_nxv4i8_8t(target("riscv.vector.tuple", <vscale x 4 x i8>, 8) %0, i32 1) - ret <vscale x 2 x half> %1 + ret target("riscv.vector.tuple", <vscale x 4 x i8>, 8) %0 } - - -define <vscale x 4 x half> @test_vlseg8_nxv4f16_triscv.vector.tuple_nxv8i8_8t(ptr %base, i64 %vl) { +define target("riscv.vector.tuple", <vscale x 8 x i8>, 8) @test_vlseg8_nxv4f16_triscv.vector.tuple_nxv8i8_8t(ptr %base, i64 %vl) { ; CHECK-LABEL: test_vlseg8_nxv4f16_triscv.vector.tuple_nxv8i8_8t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma -; CHECK-NEXT: vlseg8e16.v v7, (a0) +; CHECK-NEXT: vlseg8e16.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 8) @llvm.riscv.vlseg8.triscv.vector.tuple_nxv8i8_8t(target("riscv.vector.tuple", <vscale x 8 x i8>, 8) undef, ptr %base, i64 %vl, i64 4) - %1 = call <vscale x 4 x half> @llvm.riscv.tuple.extract.nxv4f16.triscv.vector.tuple_nxv8i8_8t(target("riscv.vector.tuple", <vscale x 8 x i8>, 8) %0, i32 1) - ret <vscale x 4 x half> %1 + ret target("riscv.vector.tuple", <vscale x 8 x i8>, 8) %0 } - -define <vscale x 4 x half> @test_vlseg8_mask_nxv4f16_triscv.vector.tuple_nxv8i8_8t(ptr %base, i64 %vl, <vscale x 4 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 8 x i8>, 8) @test_vlseg8_mask_nxv4f16_triscv.vector.tuple_nxv8i8_8t(ptr %base, i64 %vl, <vscale x 4 x i1> %mask) { ; CHECK-LABEL: test_vlseg8_mask_nxv4f16_triscv.vector.tuple_nxv8i8_8t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma -; CHECK-NEXT: vlseg8e16.v v7, (a0), v0.t +; CHECK-NEXT: vlseg8e16.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 8) @llvm.riscv.vlseg8.mask.triscv.vector.tuple_nxv8i8_8t.nxv4i1(target("riscv.vector.tuple", <vscale x 8 x i8>, 8) undef, ptr %base, <vscale x 4 x i1> %mask, i64 %vl, i64 1, i64 4) - %1 = call <vscale x 4 x half> @llvm.riscv.tuple.extract.nxv4f16.triscv.vector.tuple_nxv8i8_8t(target("riscv.vector.tuple", <vscale x 8 x i8>, 8) %0, i32 1) - ret <vscale x 4 x half> %1 + ret target("riscv.vector.tuple", <vscale x 8 x i8>, 8) %0 } - - -define <vscale x 1 x float> @test_vlseg2_nxv1f32_triscv.vector.tuple_nxv4i8_2t(ptr %base, i64 %vl) { +define target("riscv.vector.tuple", <vscale x 4 x i8>, 2) @test_vlseg2_nxv1f32_triscv.vector.tuple_nxv4i8_2t(ptr %base, i64 %vl) { ; CHECK-LABEL: test_vlseg2_nxv1f32_triscv.vector.tuple_nxv4i8_2t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, ma -; CHECK-NEXT: vlseg2e32.v v7, (a0) +; CHECK-NEXT: vlseg2e32.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 2) @llvm.riscv.vlseg2.triscv.vector.tuple_nxv4i8_2t(target("riscv.vector.tuple", <vscale x 4 x i8>, 2) undef, ptr %base, i64 %vl, i64 5) - %1 = call <vscale x 1 x float> @llvm.riscv.tuple.extract.nxv1f32.triscv.vector.tuple_nxv4i8_2t(target("riscv.vector.tuple", <vscale x 4 x i8>, 2) %0, i32 1) - ret <vscale x 1 x float> %1 + ret target("riscv.vector.tuple", <vscale x 4 x i8>, 2) %0 } - -define <vscale x 1 x float> @test_vlseg2_mask_nxv1f32_triscv.vector.tuple_nxv4i8_2t(ptr %base, i64 %vl, <vscale x 1 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 4 x i8>, 2) @test_vlseg2_mask_nxv1f32_triscv.vector.tuple_nxv4i8_2t(ptr %base, i64 %vl, <vscale x 1 x i1> %mask) { ; CHECK-LABEL: test_vlseg2_mask_nxv1f32_triscv.vector.tuple_nxv4i8_2t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, ma -; CHECK-NEXT: vlseg2e32.v v7, (a0), v0.t +; CHECK-NEXT: vlseg2e32.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 2) @llvm.riscv.vlseg2.mask.triscv.vector.tuple_nxv4i8_2t.nxv1i1(target("riscv.vector.tuple", <vscale x 4 x i8>, 2) undef, ptr %base, <vscale x 1 x i1> %mask, i64 %vl, i64 1, i64 5) - %1 = call <vscale x 1 x float> @llvm.riscv.tuple.extract.nxv1f32.triscv.vector.tuple_nxv4i8_2t(target("riscv.vector.tuple", <vscale x 4 x i8>, 2) %0, i32 1) - ret <vscale x 1 x float> %1 + ret target("riscv.vector.tuple", <vscale x 4 x i8>, 2) %0 } - - -define <vscale x 2 x float> @test_vlseg2_nxv2f32_triscv.vector.tuple_nxv8i8_2t(ptr %base, i64 %vl) { +define target("riscv.vector.tuple", <vscale x 8 x i8>, 2) @test_vlseg2_nxv2f32_triscv.vector.tuple_nxv8i8_2t(ptr %base, i64 %vl) { ; CHECK-LABEL: test_vlseg2_nxv2f32_triscv.vector.tuple_nxv8i8_2t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma -; CHECK-NEXT: vlseg2e32.v v7, (a0) +; CHECK-NEXT: vlseg2e32.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 2) @llvm.riscv.vlseg2.triscv.vector.tuple_nxv8i8_2t(target("riscv.vector.tuple", <vscale x 8 x i8>, 2) undef, ptr %base, i64 %vl, i64 5) - %1 = call <vscale x 2 x float> @llvm.riscv.tuple.extract.nxv2f32.triscv.vector.tuple_nxv8i8_2t(target("riscv.vector.tuple", <vscale x 8 x i8>, 2) %0, i32 1) - ret <vscale x 2 x float> %1 + ret target("riscv.vector.tuple", <vscale x 8 x i8>, 2) %0 } - -define <vscale x 2 x float> @test_vlseg2_mask_nxv2f32_triscv.vector.tuple_nxv8i8_2t(ptr %base, i64 %vl, <vscale x 2 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 8 x i8>, 2) @test_vlseg2_mask_nxv2f32_triscv.vector.tuple_nxv8i8_2t(ptr %base, i64 %vl, <vscale x 2 x i1> %mask) { ; CHECK-LABEL: test_vlseg2_mask_nxv2f32_triscv.vector.tuple_nxv8i8_2t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma -; CHECK-NEXT: vlseg2e32.v v7, (a0), v0.t +; CHECK-NEXT: vlseg2e32.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 2) @llvm.riscv.vlseg2.mask.triscv.vector.tuple_nxv8i8_2t.nxv2i1(target("riscv.vector.tuple", <vscale x 8 x i8>, 2) undef, ptr %base, <vscale x 2 x i1> %mask, i64 %vl, i64 1, i64 5) - %1 = call <vscale x 2 x float> @llvm.riscv.tuple.extract.nxv2f32.triscv.vector.tuple_nxv8i8_2t(target("riscv.vector.tuple", <vscale x 8 x i8>, 2) %0, i32 1) - ret <vscale x 2 x float> %1 + ret target("riscv.vector.tuple", <vscale x 8 x i8>, 2) %0 } - - -define <vscale x 4 x float> @test_vlseg2_nxv4f32_triscv.vector.tuple_nxv16i8_2t(ptr %base, i64 %vl) { +define target("riscv.vector.tuple", <vscale x 16 x i8>, 2) @test_vlseg2_nxv4f32_triscv.vector.tuple_nxv16i8_2t(ptr %base, i64 %vl) { ; CHECK-LABEL: test_vlseg2_nxv4f32_triscv.vector.tuple_nxv16i8_2t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, ma -; CHECK-NEXT: vlseg2e32.v v6, (a0) +; CHECK-NEXT: vlseg2e32.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 2) @llvm.riscv.vlseg2.triscv.vector.tuple_nxv16i8_2t(target("riscv.vector.tuple", <vscale x 16 x i8>, 2) undef, ptr %base, i64 %vl, i64 5) - %1 = call <vscale x 4 x float> @llvm.riscv.tuple.extract.nxv4f32.triscv.vector.tuple_nxv16i8_2t(target("riscv.vector.tuple", <vscale x 16 x i8>, 2) %0, i32 1) - ret <vscale x 4 x float> %1 + ret target("riscv.vector.tuple", <vscale x 16 x i8>, 2) %0 } - -define <vscale x 4 x float> @test_vlseg2_mask_nxv4f32_triscv.vector.tuple_nxv16i8_2t(ptr %base, i64 %vl, <vscale x 4 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 16 x i8>, 2) @test_vlseg2_mask_nxv4f32_triscv.vector.tuple_nxv16i8_2t(ptr %base, i64 %vl, <vscale x 4 x i1> %mask) { ; CHECK-LABEL: test_vlseg2_mask_nxv4f32_triscv.vector.tuple_nxv16i8_2t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, ma -; CHECK-NEXT: vlseg2e32.v v6, (a0), v0.t +; CHECK-NEXT: vlseg2e32.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 2) @llvm.riscv.vlseg2.mask.triscv.vector.tuple_nxv16i8_2t.nxv4i1(target("riscv.vector.tuple", <vscale x 16 x i8>, 2) undef, ptr %base, <vscale x 4 x i1> %mask, i64 %vl, i64 1, i64 5) - %1 = call <vscale x 4 x float> @llvm.riscv.tuple.extract.nxv4f32.triscv.vector.tuple_nxv16i8_2t(target("riscv.vector.tuple", <vscale x 16 x i8>, 2) %0, i32 1) - ret <vscale x 4 x float> %1 + ret target("riscv.vector.tuple", <vscale x 16 x i8>, 2) %0 } - - -define <vscale x 8 x float> @test_vlseg2_nxv8f32_triscv.vector.tuple_nxv32i8_2t(ptr %base, i64 %vl) { +define target("riscv.vector.tuple", <vscale x 32 x i8>, 2) @test_vlseg2_nxv8f32_triscv.vector.tuple_nxv32i8_2t(ptr %base, i64 %vl) { ; CHECK-LABEL: test_vlseg2_nxv8f32_triscv.vector.tuple_nxv32i8_2t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, ma -; CHECK-NEXT: vlseg2e32.v v4, (a0) +; CHECK-NEXT: vlseg2e32.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 32 x i8>, 2) @llvm.riscv.vlseg2.triscv.vector.tuple_nxv32i8_2t(target("riscv.vector.tuple", <vscale x 32 x i8>, 2) undef, ptr %base, i64 %vl, i64 5) - %1 = call <vscale x 8 x float> @llvm.riscv.tuple.extract.nxv8f32.triscv.vector.tuple_nxv32i8_2t(target("riscv.vector.tuple", <vscale x 32 x i8>, 2) %0, i32 1) - ret <vscale x 8 x float> %1 + ret target("riscv.vector.tuple", <vscale x 32 x i8>, 2) %0 } - -define <vscale x 8 x float> @test_vlseg2_mask_nxv8f32_triscv.vector.tuple_nxv32i8_2t(ptr %base, i64 %vl, <vscale x 8 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 32 x i8>, 2) @test_vlseg2_mask_nxv8f32_triscv.vector.tuple_nxv32i8_2t(ptr %base, i64 %vl, <vscale x 8 x i1> %mask) { ; CHECK-LABEL: test_vlseg2_mask_nxv8f32_triscv.vector.tuple_nxv32i8_2t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, ma -; CHECK-NEXT: vlseg2e32.v v4, (a0), v0.t +; CHECK-NEXT: vlseg2e32.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 32 x i8>, 2) @llvm.riscv.vlseg2.mask.triscv.vector.tuple_nxv32i8_2t.nxv8i1(target("riscv.vector.tuple", <vscale x 32 x i8>, 2) undef, ptr %base, <vscale x 8 x i1> %mask, i64 %vl, i64 1, i64 5) - %1 = call <vscale x 8 x float> @llvm.riscv.tuple.extract.nxv8f32.triscv.vector.tuple_nxv32i8_2t(target("riscv.vector.tuple", <vscale x 32 x i8>, 2) %0, i32 1) - ret <vscale x 8 x float> %1 + ret target("riscv.vector.tuple", <vscale x 32 x i8>, 2) %0 } - - -define <vscale x 1 x float> @test_vlseg3_nxv1f32_triscv.vector.tuple_nxv4i8_3t(ptr %base, i64 %vl) { +define target("riscv.vector.tuple", <vscale x 4 x i8>, 3) @test_vlseg3_nxv1f32_triscv.vector.tuple_nxv4i8_3t(ptr %base, i64 %vl) { ; CHECK-LABEL: test_vlseg3_nxv1f32_triscv.vector.tuple_nxv4i8_3t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, ma -; CHECK-NEXT: vlseg3e32.v v7, (a0) +; CHECK-NEXT: vlseg3e32.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 3) @llvm.riscv.vlseg3.triscv.vector.tuple_nxv4i8_3t(target("riscv.vector.tuple", <vscale x 4 x i8>, 3) undef, ptr %base, i64 %vl, i64 5) - %1 = call <vscale x 1 x float> @llvm.riscv.tuple.extract.nxv1f32.triscv.vector.tuple_nxv4i8_3t(target("riscv.vector.tuple", <vscale x 4 x i8>, 3) %0, i32 1) - ret <vscale x 1 x float> %1 + ret target("riscv.vector.tuple", <vscale x 4 x i8>, 3) %0 } - -define <vscale x 1 x float> @test_vlseg3_mask_nxv1f32_triscv.vector.tuple_nxv4i8_3t(ptr %base, i64 %vl, <vscale x 1 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 4 x i8>, 3) @test_vlseg3_mask_nxv1f32_triscv.vector.tuple_nxv4i8_3t(ptr %base, i64 %vl, <vscale x 1 x i1> %mask) { ; CHECK-LABEL: test_vlseg3_mask_nxv1f32_triscv.vector.tuple_nxv4i8_3t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, ma -; CHECK-NEXT: vlseg3e32.v v7, (a0), v0.t +; CHECK-NEXT: vlseg3e32.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 3) @llvm.riscv.vlseg3.mask.triscv.vector.tuple_nxv4i8_3t.nxv1i1(target("riscv.vector.tuple", <vscale x 4 x i8>, 3) undef, ptr %base, <vscale x 1 x i1> %mask, i64 %vl, i64 1, i64 5) - %1 = call <vscale x 1 x float> @llvm.riscv.tuple.extract.nxv1f32.triscv.vector.tuple_nxv4i8_3t(target("riscv.vector.tuple", <vscale x 4 x i8>, 3) %0, i32 1) - ret <vscale x 1 x float> %1 + ret target("riscv.vector.tuple", <vscale x 4 x i8>, 3) %0 } - - -define <vscale x 2 x float> @test_vlseg3_nxv2f32_triscv.vector.tuple_nxv8i8_3t(ptr %base, i64 %vl) { +define target("riscv.vector.tuple", <vscale x 8 x i8>, 3) @test_vlseg3_nxv2f32_triscv.vector.tuple_nxv8i8_3t(ptr %base, i64 %vl) { ; CHECK-LABEL: test_vlseg3_nxv2f32_triscv.vector.tuple_nxv8i8_3t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma -; CHECK-NEXT: vlseg3e32.v v7, (a0) +; CHECK-NEXT: vlseg3e32.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 3) @llvm.riscv.vlseg3.triscv.vector.tuple_nxv8i8_3t(target("riscv.vector.tuple", <vscale x 8 x i8>, 3) undef, ptr %base, i64 %vl, i64 5) - %1 = call <vscale x 2 x float> @llvm.riscv.tuple.extract.nxv2f32.triscv.vector.tuple_nxv8i8_3t(target("riscv.vector.tuple", <vscale x 8 x i8>, 3) %0, i32 1) - ret <vscale x 2 x float> %1 + ret target("riscv.vector.tuple", <vscale x 8 x i8>, 3) %0 } - -define <vscale x 2 x float> @test_vlseg3_mask_nxv2f32_triscv.vector.tuple_nxv8i8_3t(ptr %base, i64 %vl, <vscale x 2 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 8 x i8>, 3) @test_vlseg3_mask_nxv2f32_triscv.vector.tuple_nxv8i8_3t(ptr %base, i64 %vl, <vscale x 2 x i1> %mask) { ; CHECK-LABEL: test_vlseg3_mask_nxv2f32_triscv.vector.tuple_nxv8i8_3t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma -; CHECK-NEXT: vlseg3e32.v v7, (a0), v0.t +; CHECK-NEXT: vlseg3e32.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 3) @llvm.riscv.vlseg3.mask.triscv.vector.tuple_nxv8i8_3t.nxv2i1(target("riscv.vector.tuple", <vscale x 8 x i8>, 3) undef, ptr %base, <vscale x 2 x i1> %mask, i64 %vl, i64 1, i64 5) - %1 = call <vscale x 2 x float> @llvm.riscv.tuple.extract.nxv2f32.triscv.vector.tuple_nxv8i8_3t(target("riscv.vector.tuple", <vscale x 8 x i8>, 3) %0, i32 1) - ret <vscale x 2 x float> %1 + ret target("riscv.vector.tuple", <vscale x 8 x i8>, 3) %0 } - - -define <vscale x 4 x float> @test_vlseg3_nxv4f32_triscv.vector.tuple_nxv16i8_3t(ptr %base, i64 %vl) { +define target("riscv.vector.tuple", <vscale x 16 x i8>, 3) @test_vlseg3_nxv4f32_triscv.vector.tuple_nxv16i8_3t(ptr %base, i64 %vl) { ; CHECK-LABEL: test_vlseg3_nxv4f32_triscv.vector.tuple_nxv16i8_3t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, ma -; CHECK-NEXT: vlseg3e32.v v6, (a0) +; CHECK-NEXT: vlseg3e32.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 3) @llvm.riscv.vlseg3.triscv.vector.tuple_nxv16i8_3t(target("riscv.vector.tuple", <vscale x 16 x i8>, 3) undef, ptr %base, i64 %vl, i64 5) - %1 = call <vscale x 4 x float> @llvm.riscv.tuple.extract.nxv4f32.triscv.vector.tuple_nxv16i8_3t(target("riscv.vector.tuple", <vscale x 16 x i8>, 3) %0, i32 1) - ret <vscale x 4 x float> %1 + ret target("riscv.vector.tuple", <vscale x 16 x i8>, 3) %0 } - -define <vscale x 4 x float> @test_vlseg3_mask_nxv4f32_triscv.vector.tuple_nxv16i8_3t(ptr %base, i64 %vl, <vscale x 4 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 16 x i8>, 3) @test_vlseg3_mask_nxv4f32_triscv.vector.tuple_nxv16i8_3t(ptr %base, i64 %vl, <vscale x 4 x i1> %mask) { ; CHECK-LABEL: test_vlseg3_mask_nxv4f32_triscv.vector.tuple_nxv16i8_3t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, ma -; CHECK-NEXT: vlseg3e32.v v6, (a0), v0.t +; CHECK-NEXT: vlseg3e32.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 3) @llvm.riscv.vlseg3.mask.triscv.vector.tuple_nxv16i8_3t.nxv4i1(target("riscv.vector.tuple", <vscale x 16 x i8>, 3) undef, ptr %base, <vscale x 4 x i1> %mask, i64 %vl, i64 1, i64 5) - %1 = call <vscale x 4 x float> @llvm.riscv.tuple.extract.nxv4f32.triscv.vector.tuple_nxv16i8_3t(target("riscv.vector.tuple", <vscale x 16 x i8>, 3) %0, i32 1) - ret <vscale x 4 x float> %1 + ret target("riscv.vector.tuple", <vscale x 16 x i8>, 3) %0 } - - -define <vscale x 1 x float> @test_vlseg4_nxv1f32_triscv.vector.tuple_nxv4i8_4t(ptr %base, i64 %vl) { +define target("riscv.vector.tuple", <vscale x 4 x i8>, 4) @test_vlseg4_nxv1f32_triscv.vector.tuple_nxv4i8_4t(ptr %base, i64 %vl) { ; CHECK-LABEL: test_vlseg4_nxv1f32_triscv.vector.tuple_nxv4i8_4t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, ma -; CHECK-NEXT: vlseg4e32.v v7, (a0) +; CHECK-NEXT: vlseg4e32.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 4) @llvm.riscv.vlseg4.triscv.vector.tuple_nxv4i8_4t(target("riscv.vector.tuple", <vscale x 4 x i8>, 4) undef, ptr %base, i64 %vl, i64 5) - %1 = call <vscale x 1 x float> @llvm.riscv.tuple.extract.nxv1f32.triscv.vector.tuple_nxv4i8_4t(target("riscv.vector.tuple", <vscale x 4 x i8>, 4) %0, i32 1) - ret <vscale x 1 x float> %1 + ret target("riscv.vector.tuple", <vscale x 4 x i8>, 4) %0 } - -define <vscale x 1 x float> @test_vlseg4_mask_nxv1f32_triscv.vector.tuple_nxv4i8_4t(ptr %base, i64 %vl, <vscale x 1 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 4 x i8>, 4) @test_vlseg4_mask_nxv1f32_triscv.vector.tuple_nxv4i8_4t(ptr %base, i64 %vl, <vscale x 1 x i1> %mask) { ; CHECK-LABEL: test_vlseg4_mask_nxv1f32_triscv.vector.tuple_nxv4i8_4t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, ma -; CHECK-NEXT: vlseg4e32.v v7, (a0), v0.t +; CHECK-NEXT: vlseg4e32.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 4) @llvm.riscv.vlseg4.mask.triscv.vector.tuple_nxv4i8_4t.nxv1i1(target("riscv.vector.tuple", <vscale x 4 x i8>, 4) undef, ptr %base, <vscale x 1 x i1> %mask, i64 %vl, i64 1, i64 5) - %1 = call <vscale x 1 x float> @llvm.riscv.tuple.extract.nxv1f32.triscv.vector.tuple_nxv4i8_4t(target("riscv.vector.tuple", <vscale x 4 x i8>, 4) %0, i32 1) - ret <vscale x 1 x float> %1 + ret target("riscv.vector.tuple", <vscale x 4 x i8>, 4) %0 } - - -define <vscale x 2 x float> @test_vlseg4_nxv2f32_triscv.vector.tuple_nxv8i8_4t(ptr %base, i64 %vl) { +define target("riscv.vector.tuple", <vscale x 8 x i8>, 4) @test_vlseg4_nxv2f32_triscv.vector.tuple_nxv8i8_4t(ptr %base, i64 %vl) { ; CHECK-LABEL: test_vlseg4_nxv2f32_triscv.vector.tuple_nxv8i8_4t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma -; CHECK-NEXT: vlseg4e32.v v7, (a0) +; CHECK-NEXT: vlseg4e32.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 4) @llvm.riscv.vlseg4.triscv.vector.tuple_nxv8i8_4t(target("riscv.vector.tuple", <vscale x 8 x i8>, 4) undef, ptr %base, i64 %vl, i64 5) - %1 = call <vscale x 2 x float> @llvm.riscv.tuple.extract.nxv2f32.triscv.vector.tuple_nxv8i8_4t(target("riscv.vector.tuple", <vscale x 8 x i8>, 4) %0, i32 1) - ret <vscale x 2 x float> %1 + ret target("riscv.vector.tuple", <vscale x 8 x i8>, 4) %0 } - -define <vscale x 2 x float> @test_vlseg4_mask_nxv2f32_triscv.vector.tuple_nxv8i8_4t(ptr %base, i64 %vl, <vscale x 2 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 8 x i8>, 4) @test_vlseg4_mask_nxv2f32_triscv.vector.tuple_nxv8i8_4t(ptr %base, i64 %vl, <vscale x 2 x i1> %mask) { ; CHECK-LABEL: test_vlseg4_mask_nxv2f32_triscv.vector.tuple_nxv8i8_4t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma -; CHECK-NEXT: vlseg4e32.v v7, (a0), v0.t +; CHECK-NEXT: vlseg4e32.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 4) @llvm.riscv.vlseg4.mask.triscv.vector.tuple_nxv8i8_4t.nxv2i1(target("riscv.vector.tuple", <vscale x 8 x i8>, 4) undef, ptr %base, <vscale x 2 x i1> %mask, i64 %vl, i64 1, i64 5) - %1 = call <vscale x 2 x float> @llvm.riscv.tuple.extract.nxv2f32.triscv.vector.tuple_nxv8i8_4t(target("riscv.vector.tuple", <vscale x 8 x i8>, 4) %0, i32 1) - ret <vscale x 2 x float> %1 + ret target("riscv.vector.tuple", <vscale x 8 x i8>, 4) %0 } - - -define <vscale x 4 x float> @test_vlseg4_nxv4f32_triscv.vector.tuple_nxv16i8_4t(ptr %base, i64 %vl) { +define target("riscv.vector.tuple", <vscale x 16 x i8>, 4) @test_vlseg4_nxv4f32_triscv.vector.tuple_nxv16i8_4t(ptr %base, i64 %vl) { ; CHECK-LABEL: test_vlseg4_nxv4f32_triscv.vector.tuple_nxv16i8_4t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, ma -; CHECK-NEXT: vlseg4e32.v v6, (a0) +; CHECK-NEXT: vlseg4e32.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 4) @llvm.riscv.vlseg4.triscv.vector.tuple_nxv16i8_4t(target("riscv.vector.tuple", <vscale x 16 x i8>, 4) undef, ptr %base, i64 %vl, i64 5) - %1 = call <vscale x 4 x float> @llvm.riscv.tuple.extract.nxv4f32.triscv.vector.tuple_nxv16i8_4t(target("riscv.vector.tuple", <vscale x 16 x i8>, 4) %0, i32 1) - ret <vscale x 4 x float> %1 + ret target("riscv.vector.tuple", <vscale x 16 x i8>, 4) %0 } - -define <vscale x 4 x float> @test_vlseg4_mask_nxv4f32_triscv.vector.tuple_nxv16i8_4t(ptr %base, i64 %vl, <vscale x 4 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 16 x i8>, 4) @test_vlseg4_mask_nxv4f32_triscv.vector.tuple_nxv16i8_4t(ptr %base, i64 %vl, <vscale x 4 x i1> %mask) { ; CHECK-LABEL: test_vlseg4_mask_nxv4f32_triscv.vector.tuple_nxv16i8_4t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, ma -; CHECK-NEXT: vlseg4e32.v v6, (a0), v0.t +; CHECK-NEXT: vlseg4e32.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 4) @llvm.riscv.vlseg4.mask.triscv.vector.tuple_nxv16i8_4t.nxv4i1(target("riscv.vector.tuple", <vscale x 16 x i8>, 4) undef, ptr %base, <vscale x 4 x i1> %mask, i64 %vl, i64 1, i64 5) - %1 = call <vscale x 4 x float> @llvm.riscv.tuple.extract.nxv4f32.triscv.vector.tuple_nxv16i8_4t(target("riscv.vector.tuple", <vscale x 16 x i8>, 4) %0, i32 1) - ret <vscale x 4 x float> %1 + ret target("riscv.vector.tuple", <vscale x 16 x i8>, 4) %0 } - - -define <vscale x 1 x float> @test_vlseg5_nxv1f32_triscv.vector.tuple_nxv4i8_5t(ptr %base, i64 %vl) { +define target("riscv.vector.tuple", <vscale x 4 x i8>, 5) @test_vlseg5_nxv1f32_triscv.vector.tuple_nxv4i8_5t(ptr %base, i64 %vl) { ; CHECK-LABEL: test_vlseg5_nxv1f32_triscv.vector.tuple_nxv4i8_5t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, ma -; CHECK-NEXT: vlseg5e32.v v7, (a0) +; CHECK-NEXT: vlseg5e32.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 5) @llvm.riscv.vlseg5.triscv.vector.tuple_nxv4i8_5t(target("riscv.vector.tuple", <vscale x 4 x i8>, 5) undef, ptr %base, i64 %vl, i64 5) - %1 = call <vscale x 1 x float> @llvm.riscv.tuple.extract.nxv1f32.triscv.vector.tuple_nxv4i8_5t(target("riscv.vector.tuple", <vscale x 4 x i8>, 5) %0, i32 1) - ret <vscale x 1 x float> %1 + ret target("riscv.vector.tuple", <vscale x 4 x i8>, 5) %0 } - -define <vscale x 1 x float> @test_vlseg5_mask_nxv1f32_triscv.vector.tuple_nxv4i8_5t(ptr %base, i64 %vl, <vscale x 1 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 4 x i8>, 5) @test_vlseg5_mask_nxv1f32_triscv.vector.tuple_nxv4i8_5t(ptr %base, i64 %vl, <vscale x 1 x i1> %mask) { ; CHECK-LABEL: test_vlseg5_mask_nxv1f32_triscv.vector.tuple_nxv4i8_5t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, ma -; CHECK-NEXT: vlseg5e32.v v7, (a0), v0.t +; CHECK-NEXT: vlseg5e32.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 5) @llvm.riscv.vlseg5.mask.triscv.vector.tuple_nxv4i8_5t.nxv1i1(target("riscv.vector.tuple", <vscale x 4 x i8>, 5) undef, ptr %base, <vscale x 1 x i1> %mask, i64 %vl, i64 1, i64 5) - %1 = call <vscale x 1 x float> @llvm.riscv.tuple.extract.nxv1f32.triscv.vector.tuple_nxv4i8_5t(target("riscv.vector.tuple", <vscale x 4 x i8>, 5) %0, i32 1) - ret <vscale x 1 x float> %1 + ret target("riscv.vector.tuple", <vscale x 4 x i8>, 5) %0 } - - -define <vscale x 2 x float> @test_vlseg5_nxv2f32_triscv.vector.tuple_nxv8i8_5t(ptr %base, i64 %vl) { +define target("riscv.vector.tuple", <vscale x 8 x i8>, 5) @test_vlseg5_nxv2f32_triscv.vector.tuple_nxv8i8_5t(ptr %base, i64 %vl) { ; CHECK-LABEL: test_vlseg5_nxv2f32_triscv.vector.tuple_nxv8i8_5t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma -; CHECK-NEXT: vlseg5e32.v v7, (a0) +; CHECK-NEXT: vlseg5e32.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 5) @llvm.riscv.vlseg5.triscv.vector.tuple_nxv8i8_5t(target("riscv.vector.tuple", <vscale x 8 x i8>, 5) undef, ptr %base, i64 %vl, i64 5) - %1 = call <vscale x 2 x float> @llvm.riscv.tuple.extract.nxv2f32.triscv.vector.tuple_nxv8i8_5t(target("riscv.vector.tuple", <vscale x 8 x i8>, 5) %0, i32 1) - ret <vscale x 2 x float> %1 + ret target("riscv.vector.tuple", <vscale x 8 x i8>, 5) %0 } - -define <vscale x 2 x float> @test_vlseg5_mask_nxv2f32_triscv.vector.tuple_nxv8i8_5t(ptr %base, i64 %vl, <vscale x 2 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 8 x i8>, 5) @test_vlseg5_mask_nxv2f32_triscv.vector.tuple_nxv8i8_5t(ptr %base, i64 %vl, <vscale x 2 x i1> %mask) { ; CHECK-LABEL: test_vlseg5_mask_nxv2f32_triscv.vector.tuple_nxv8i8_5t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma -; CHECK-NEXT: vlseg5e32.v v7, (a0), v0.t +; CHECK-NEXT: vlseg5e32.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 5) @llvm.riscv.vlseg5.mask.triscv.vector.tuple_nxv8i8_5t.nxv2i1(target("riscv.vector.tuple", <vscale x 8 x i8>, 5) undef, ptr %base, <vscale x 2 x i1> %mask, i64 %vl, i64 1, i64 5) - %1 = call <vscale x 2 x float> @llvm.riscv.tuple.extract.nxv2f32.triscv.vector.tuple_nxv8i8_5t(target("riscv.vector.tuple", <vscale x 8 x i8>, 5) %0, i32 1) - ret <vscale x 2 x float> %1 + ret target("riscv.vector.tuple", <vscale x 8 x i8>, 5) %0 } - - -define <vscale x 1 x float> @test_vlseg6_nxv1f32_triscv.vector.tuple_nxv4i8_6t(ptr %base, i64 %vl) { +define target("riscv.vector.tuple", <vscale x 4 x i8>, 6) @test_vlseg6_nxv1f32_triscv.vector.tuple_nxv4i8_6t(ptr %base, i64 %vl) { ; CHECK-LABEL: test_vlseg6_nxv1f32_triscv.vector.tuple_nxv4i8_6t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, ma -; CHECK-NEXT: vlseg6e32.v v7, (a0) +; CHECK-NEXT: vlseg6e32.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 6) @llvm.riscv.vlseg6.triscv.vector.tuple_nxv4i8_6t(target("riscv.vector.tuple", <vscale x 4 x i8>, 6) undef, ptr %base, i64 %vl, i64 5) - %1 = call <vscale x 1 x float> @llvm.riscv.tuple.extract.nxv1f32.triscv.vector.tuple_nxv4i8_6t(target("riscv.vector.tuple", <vscale x 4 x i8>, 6) %0, i32 1) - ret <vscale x 1 x float> %1 + ret target("riscv.vector.tuple", <vscale x 4 x i8>, 6) %0 } - -define <vscale x 1 x float> @test_vlseg6_mask_nxv1f32_triscv.vector.tuple_nxv4i8_6t(ptr %base, i64 %vl, <vscale x 1 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 4 x i8>, 6) @test_vlseg6_mask_nxv1f32_triscv.vector.tuple_nxv4i8_6t(ptr %base, i64 %vl, <vscale x 1 x i1> %mask) { ; CHECK-LABEL: test_vlseg6_mask_nxv1f32_triscv.vector.tuple_nxv4i8_6t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, ma -; CHECK-NEXT: vlseg6e32.v v7, (a0), v0.t +; CHECK-NEXT: vlseg6e32.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 6) @llvm.riscv.vlseg6.mask.triscv.vector.tuple_nxv4i8_6t.nxv1i1(target("riscv.vector.tuple", <vscale x 4 x i8>, 6) undef, ptr %base, <vscale x 1 x i1> %mask, i64 %vl, i64 1, i64 5) - %1 = call <vscale x 1 x float> @llvm.riscv.tuple.extract.nxv1f32.triscv.vector.tuple_nxv4i8_6t(target("riscv.vector.tuple", <vscale x 4 x i8>, 6) %0, i32 1) - ret <vscale x 1 x float> %1 + ret target("riscv.vector.tuple", <vscale x 4 x i8>, 6) %0 } - - -define <vscale x 2 x float> @test_vlseg6_nxv2f32_triscv.vector.tuple_nxv8i8_6t(ptr %base, i64 %vl) { +define target("riscv.vector.tuple", <vscale x 8 x i8>, 6) @test_vlseg6_nxv2f32_triscv.vector.tuple_nxv8i8_6t(ptr %base, i64 %vl) { ; CHECK-LABEL: test_vlseg6_nxv2f32_triscv.vector.tuple_nxv8i8_6t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma -; CHECK-NEXT: vlseg6e32.v v7, (a0) +; CHECK-NEXT: vlseg6e32.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 6) @llvm.riscv.vlseg6.triscv.vector.tuple_nxv8i8_6t(target("riscv.vector.tuple", <vscale x 8 x i8>, 6) undef, ptr %base, i64 %vl, i64 5) - %1 = call <vscale x 2 x float> @llvm.riscv.tuple.extract.nxv2f32.triscv.vector.tuple_nxv8i8_6t(target("riscv.vector.tuple", <vscale x 8 x i8>, 6) %0, i32 1) - ret <vscale x 2 x float> %1 + ret target("riscv.vector.tuple", <vscale x 8 x i8>, 6) %0 } - -define <vscale x 2 x float> @test_vlseg6_mask_nxv2f32_triscv.vector.tuple_nxv8i8_6t(ptr %base, i64 %vl, <vscale x 2 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 8 x i8>, 6) @test_vlseg6_mask_nxv2f32_triscv.vector.tuple_nxv8i8_6t(ptr %base, i64 %vl, <vscale x 2 x i1> %mask) { ; CHECK-LABEL: test_vlseg6_mask_nxv2f32_triscv.vector.tuple_nxv8i8_6t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma -; CHECK-NEXT: vlseg6e32.v v7, (a0), v0.t +; CHECK-NEXT: vlseg6e32.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 6) @llvm.riscv.vlseg6.mask.triscv.vector.tuple_nxv8i8_6t.nxv2i1(target("riscv.vector.tuple", <vscale x 8 x i8>, 6) undef, ptr %base, <vscale x 2 x i1> %mask, i64 %vl, i64 1, i64 5) - %1 = call <vscale x 2 x float> @llvm.riscv.tuple.extract.nxv2f32.triscv.vector.tuple_nxv8i8_6t(target("riscv.vector.tuple", <vscale x 8 x i8>, 6) %0, i32 1) - ret <vscale x 2 x float> %1 + ret target("riscv.vector.tuple", <vscale x 8 x i8>, 6) %0 } - - -define <vscale x 1 x float> @test_vlseg7_nxv1f32_triscv.vector.tuple_nxv4i8_7t(ptr %base, i64 %vl) { +define target("riscv.vector.tuple", <vscale x 4 x i8>, 7) @test_vlseg7_nxv1f32_triscv.vector.tuple_nxv4i8_7t(ptr %base, i64 %vl) { ; CHECK-LABEL: test_vlseg7_nxv1f32_triscv.vector.tuple_nxv4i8_7t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, ma -; CHECK-NEXT: vlseg7e32.v v7, (a0) +; CHECK-NEXT: vlseg7e32.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 7) @llvm.riscv.vlseg7.triscv.vector.tuple_nxv4i8_7t(target("riscv.vector.tuple", <vscale x 4 x i8>, 7) undef, ptr %base, i64 %vl, i64 5) - %1 = call <vscale x 1 x float> @llvm.riscv.tuple.extract.nxv1f32.triscv.vector.tuple_nxv4i8_7t(target("riscv.vector.tuple", <vscale x 4 x i8>, 7) %0, i32 1) - ret <vscale x 1 x float> %1 + ret target("riscv.vector.tuple", <vscale x 4 x i8>, 7) %0 } - -define <vscale x 1 x float> @test_vlseg7_mask_nxv1f32_triscv.vector.tuple_nxv4i8_7t(ptr %base, i64 %vl, <vscale x 1 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 4 x i8>, 7) @test_vlseg7_mask_nxv1f32_triscv.vector.tuple_nxv4i8_7t(ptr %base, i64 %vl, <vscale x 1 x i1> %mask) { ; CHECK-LABEL: test_vlseg7_mask_nxv1f32_triscv.vector.tuple_nxv4i8_7t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, ma -; CHECK-NEXT: vlseg7e32.v v7, (a0), v0.t +; CHECK-NEXT: vlseg7e32.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 7) @llvm.riscv.vlseg7.mask.triscv.vector.tuple_nxv4i8_7t.nxv1i1(target("riscv.vector.tuple", <vscale x 4 x i8>, 7) undef, ptr %base, <vscale x 1 x i1> %mask, i64 %vl, i64 1, i64 5) - %1 = call <vscale x 1 x float> @llvm.riscv.tuple.extract.nxv1f32.triscv.vector.tuple_nxv4i8_7t(target("riscv.vector.tuple", <vscale x 4 x i8>, 7) %0, i32 1) - ret <vscale x 1 x float> %1 + ret target("riscv.vector.tuple", <vscale x 4 x i8>, 7) %0 } - - -define <vscale x 2 x float> @test_vlseg7_nxv2f32_triscv.vector.tuple_nxv8i8_7t(ptr %base, i64 %vl) { +define target("riscv.vector.tuple", <vscale x 8 x i8>, 7) @test_vlseg7_nxv2f32_triscv.vector.tuple_nxv8i8_7t(ptr %base, i64 %vl) { ; CHECK-LABEL: test_vlseg7_nxv2f32_triscv.vector.tuple_nxv8i8_7t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma -; CHECK-NEXT: vlseg7e32.v v7, (a0) +; CHECK-NEXT: vlseg7e32.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 7) @llvm.riscv.vlseg7.triscv.vector.tuple_nxv8i8_7t(target("riscv.vector.tuple", <vscale x 8 x i8>, 7) undef, ptr %base, i64 %vl, i64 5) - %1 = call <vscale x 2 x float> @llvm.riscv.tuple.extract.nxv2f32.triscv.vector.tuple_nxv8i8_7t(target("riscv.vector.tuple", <vscale x 8 x i8>, 7) %0, i32 1) - ret <vscale x 2 x float> %1 + ret target("riscv.vector.tuple", <vscale x 8 x i8>, 7) %0 } - -define <vscale x 2 x float> @test_vlseg7_mask_nxv2f32_triscv.vector.tuple_nxv8i8_7t(ptr %base, i64 %vl, <vscale x 2 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 8 x i8>, 7) @test_vlseg7_mask_nxv2f32_triscv.vector.tuple_nxv8i8_7t(ptr %base, i64 %vl, <vscale x 2 x i1> %mask) { ; CHECK-LABEL: test_vlseg7_mask_nxv2f32_triscv.vector.tuple_nxv8i8_7t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma -; CHECK-NEXT: vlseg7e32.v v7, (a0), v0.t +; CHECK-NEXT: vlseg7e32.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 7) @llvm.riscv.vlseg7.mask.triscv.vector.tuple_nxv8i8_7t.nxv2i1(target("riscv.vector.tuple", <vscale x 8 x i8>, 7) undef, ptr %base, <vscale x 2 x i1> %mask, i64 %vl, i64 1, i64 5) - %1 = call <vscale x 2 x float> @llvm.riscv.tuple.extract.nxv2f32.triscv.vector.tuple_nxv8i8_7t(target("riscv.vector.tuple", <vscale x 8 x i8>, 7) %0, i32 1) - ret <vscale x 2 x float> %1 + ret target("riscv.vector.tuple", <vscale x 8 x i8>, 7) %0 } - - -define <vscale x 1 x float> @test_vlseg8_nxv1f32_triscv.vector.tuple_nxv4i8_8t(ptr %base, i64 %vl) { +define target("riscv.vector.tuple", <vscale x 4 x i8>, 8) @test_vlseg8_nxv1f32_triscv.vector.tuple_nxv4i8_8t(ptr %base, i64 %vl) { ; CHECK-LABEL: test_vlseg8_nxv1f32_triscv.vector.tuple_nxv4i8_8t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, ma -; CHECK-NEXT: vlseg8e32.v v7, (a0) +; CHECK-NEXT: vlseg8e32.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 8) @llvm.riscv.vlseg8.triscv.vector.tuple_nxv4i8_8t(target("riscv.vector.tuple", <vscale x 4 x i8>, 8) undef, ptr %base, i64 %vl, i64 5) - %1 = call <vscale x 1 x float> @llvm.riscv.tuple.extract.nxv1f32.triscv.vector.tuple_nxv4i8_8t(target("riscv.vector.tuple", <vscale x 4 x i8>, 8) %0, i32 1) - ret <vscale x 1 x float> %1 + ret target("riscv.vector.tuple", <vscale x 4 x i8>, 8) %0 } - -define <vscale x 1 x float> @test_vlseg8_mask_nxv1f32_triscv.vector.tuple_nxv4i8_8t(ptr %base, i64 %vl, <vscale x 1 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 4 x i8>, 8) @test_vlseg8_mask_nxv1f32_triscv.vector.tuple_nxv4i8_8t(ptr %base, i64 %vl, <vscale x 1 x i1> %mask) { ; CHECK-LABEL: test_vlseg8_mask_nxv1f32_triscv.vector.tuple_nxv4i8_8t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, ma -; CHECK-NEXT: vlseg8e32.v v7, (a0), v0.t +; CHECK-NEXT: vlseg8e32.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 8) @llvm.riscv.vlseg8.mask.triscv.vector.tuple_nxv4i8_8t.nxv1i1(target("riscv.vector.tuple", <vscale x 4 x i8>, 8) undef, ptr %base, <vscale x 1 x i1> %mask, i64 %vl, i64 1, i64 5) - %1 = call <vscale x 1 x float> @llvm.riscv.tuple.extract.nxv1f32.triscv.vector.tuple_nxv4i8_8t(target("riscv.vector.tuple", <vscale x 4 x i8>, 8) %0, i32 1) - ret <vscale x 1 x float> %1 + ret target("riscv.vector.tuple", <vscale x 4 x i8>, 8) %0 } - - -define <vscale x 2 x float> @test_vlseg8_nxv2f32_triscv.vector.tuple_nxv8i8_8t(ptr %base, i64 %vl) { +define target("riscv.vector.tuple", <vscale x 8 x i8>, 8) @test_vlseg8_nxv2f32_triscv.vector.tuple_nxv8i8_8t(ptr %base, i64 %vl) { ; CHECK-LABEL: test_vlseg8_nxv2f32_triscv.vector.tuple_nxv8i8_8t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma -; CHECK-NEXT: vlseg8e32.v v7, (a0) +; CHECK-NEXT: vlseg8e32.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 8) @llvm.riscv.vlseg8.triscv.vector.tuple_nxv8i8_8t(target("riscv.vector.tuple", <vscale x 8 x i8>, 8) undef, ptr %base, i64 %vl, i64 5) - %1 = call <vscale x 2 x float> @llvm.riscv.tuple.extract.nxv2f32.triscv.vector.tuple_nxv8i8_8t(target("riscv.vector.tuple", <vscale x 8 x i8>, 8) %0, i32 1) - ret <vscale x 2 x float> %1 + ret target("riscv.vector.tuple", <vscale x 8 x i8>, 8) %0 } - -define <vscale x 2 x float> @test_vlseg8_mask_nxv2f32_triscv.vector.tuple_nxv8i8_8t(ptr %base, i64 %vl, <vscale x 2 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 8 x i8>, 8) @test_vlseg8_mask_nxv2f32_triscv.vector.tuple_nxv8i8_8t(ptr %base, i64 %vl, <vscale x 2 x i1> %mask) { ; CHECK-LABEL: test_vlseg8_mask_nxv2f32_triscv.vector.tuple_nxv8i8_8t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma -; CHECK-NEXT: vlseg8e32.v v7, (a0), v0.t +; CHECK-NEXT: vlseg8e32.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 8) @llvm.riscv.vlseg8.mask.triscv.vector.tuple_nxv8i8_8t.nxv2i1(target("riscv.vector.tuple", <vscale x 8 x i8>, 8) undef, ptr %base, <vscale x 2 x i1> %mask, i64 %vl, i64 1, i64 5) - %1 = call <vscale x 2 x float> @llvm.riscv.tuple.extract.nxv2f32.triscv.vector.tuple_nxv8i8_8t(target("riscv.vector.tuple", <vscale x 8 x i8>, 8) %0, i32 1) - ret <vscale x 2 x float> %1 + ret target("riscv.vector.tuple", <vscale x 8 x i8>, 8) %0 } - - -define <vscale x 1 x double> @test_vlseg2_nxv1f64_triscv.vector.tuple_nxv8i8_2t(ptr %base, i64 %vl) { +define target("riscv.vector.tuple", <vscale x 8 x i8>, 2) @test_vlseg2_nxv1f64_triscv.vector.tuple_nxv8i8_2t(ptr %base, i64 %vl) { ; CHECK-LABEL: test_vlseg2_nxv1f64_triscv.vector.tuple_nxv8i8_2t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e64, m1, ta, ma -; CHECK-NEXT: vlseg2e64.v v7, (a0) +; CHECK-NEXT: vlseg2e64.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 2) @llvm.riscv.vlseg2.triscv.vector.tuple_nxv8i8_2t(target("riscv.vector.tuple", <vscale x 8 x i8>, 2) undef, ptr %base, i64 %vl, i64 6) - %1 = call <vscale x 1 x double> @llvm.riscv.tuple.extract.nxv1f64.triscv.vector.tuple_nxv8i8_2t(target("riscv.vector.tuple", <vscale x 8 x i8>, 2) %0, i32 1) - ret <vscale x 1 x double> %1 + ret target("riscv.vector.tuple", <vscale x 8 x i8>, 2) %0 } - -define <vscale x 1 x double> @test_vlseg2_mask_nxv1f64_triscv.vector.tuple_nxv8i8_2t(ptr %base, i64 %vl, <vscale x 1 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 8 x i8>, 2) @test_vlseg2_mask_nxv1f64_triscv.vector.tuple_nxv8i8_2t(ptr %base, i64 %vl, <vscale x 1 x i1> %mask) { ; CHECK-LABEL: test_vlseg2_mask_nxv1f64_triscv.vector.tuple_nxv8i8_2t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e64, m1, ta, ma -; CHECK-NEXT: vlseg2e64.v v7, (a0), v0.t +; CHECK-NEXT: vlseg2e64.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 2) @llvm.riscv.vlseg2.mask.triscv.vector.tuple_nxv8i8_2t.nxv1i1(target("riscv.vector.tuple", <vscale x 8 x i8>, 2) undef, ptr %base, <vscale x 1 x i1> %mask, i64 %vl, i64 1, i64 6) - %1 = call <vscale x 1 x double> @llvm.riscv.tuple.extract.nxv1f64.triscv.vector.tuple_nxv8i8_2t(target("riscv.vector.tuple", <vscale x 8 x i8>, 2) %0, i32 1) - ret <vscale x 1 x double> %1 + ret target("riscv.vector.tuple", <vscale x 8 x i8>, 2) %0 } - - -define <vscale x 2 x double> @test_vlseg2_nxv2f64_triscv.vector.tuple_nxv16i8_2t(ptr %base, i64 %vl) { +define target("riscv.vector.tuple", <vscale x 16 x i8>, 2) @test_vlseg2_nxv2f64_triscv.vector.tuple_nxv16i8_2t(ptr %base, i64 %vl) { ; CHECK-LABEL: test_vlseg2_nxv2f64_triscv.vector.tuple_nxv16i8_2t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e64, m2, ta, ma -; CHECK-NEXT: vlseg2e64.v v6, (a0) +; CHECK-NEXT: vlseg2e64.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 2) @llvm.riscv.vlseg2.triscv.vector.tuple_nxv16i8_2t(target("riscv.vector.tuple", <vscale x 16 x i8>, 2) undef, ptr %base, i64 %vl, i64 6) - %1 = call <vscale x 2 x double> @llvm.riscv.tuple.extract.nxv2f64.triscv.vector.tuple_nxv16i8_2t(target("riscv.vector.tuple", <vscale x 16 x i8>, 2) %0, i32 1) - ret <vscale x 2 x double> %1 + ret target("riscv.vector.tuple", <vscale x 16 x i8>, 2) %0 } - -define <vscale x 2 x double> @test_vlseg2_mask_nxv2f64_triscv.vector.tuple_nxv16i8_2t(ptr %base, i64 %vl, <vscale x 2 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 16 x i8>, 2) @test_vlseg2_mask_nxv2f64_triscv.vector.tuple_nxv16i8_2t(ptr %base, i64 %vl, <vscale x 2 x i1> %mask) { ; CHECK-LABEL: test_vlseg2_mask_nxv2f64_triscv.vector.tuple_nxv16i8_2t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e64, m2, ta, ma -; CHECK-NEXT: vlseg2e64.v v6, (a0), v0.t +; CHECK-NEXT: vlseg2e64.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 2) @llvm.riscv.vlseg2.mask.triscv.vector.tuple_nxv16i8_2t.nxv2i1(target("riscv.vector.tuple", <vscale x 16 x i8>, 2) undef, ptr %base, <vscale x 2 x i1> %mask, i64 %vl, i64 1, i64 6) - %1 = call <vscale x 2 x double> @llvm.riscv.tuple.extract.nxv2f64.triscv.vector.tuple_nxv16i8_2t(target("riscv.vector.tuple", <vscale x 16 x i8>, 2) %0, i32 1) - ret <vscale x 2 x double> %1 + ret target("riscv.vector.tuple", <vscale x 16 x i8>, 2) %0 } - - -define <vscale x 4 x double> @test_vlseg2_nxv4f64_triscv.vector.tuple_nxv32i8_2t(ptr %base, i64 %vl) { +define target("riscv.vector.tuple", <vscale x 32 x i8>, 2) @test_vlseg2_nxv4f64_triscv.vector.tuple_nxv32i8_2t(ptr %base, i64 %vl) { ; CHECK-LABEL: test_vlseg2_nxv4f64_triscv.vector.tuple_nxv32i8_2t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e64, m4, ta, ma -; CHECK-NEXT: vlseg2e64.v v4, (a0) +; CHECK-NEXT: vlseg2e64.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 32 x i8>, 2) @llvm.riscv.vlseg2.triscv.vector.tuple_nxv32i8_2t(target("riscv.vector.tuple", <vscale x 32 x i8>, 2) undef, ptr %base, i64 %vl, i64 6) - %1 = call <vscale x 4 x double> @llvm.riscv.tuple.extract.nxv4f64.triscv.vector.tuple_nxv32i8_2t(target("riscv.vector.tuple", <vscale x 32 x i8>, 2) %0, i32 1) - ret <vscale x 4 x double> %1 + ret target("riscv.vector.tuple", <vscale x 32 x i8>, 2) %0 } - -define <vscale x 4 x double> @test_vlseg2_mask_nxv4f64_triscv.vector.tuple_nxv32i8_2t(ptr %base, i64 %vl, <vscale x 4 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 32 x i8>, 2) @test_vlseg2_mask_nxv4f64_triscv.vector.tuple_nxv32i8_2t(ptr %base, i64 %vl, <vscale x 4 x i1> %mask) { ; CHECK-LABEL: test_vlseg2_mask_nxv4f64_triscv.vector.tuple_nxv32i8_2t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e64, m4, ta, ma -; CHECK-NEXT: vlseg2e64.v v4, (a0), v0.t +; CHECK-NEXT: vlseg2e64.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 32 x i8>, 2) @llvm.riscv.vlseg2.mask.triscv.vector.tuple_nxv32i8_2t.nxv4i1(target("riscv.vector.tuple", <vscale x 32 x i8>, 2) undef, ptr %base, <vscale x 4 x i1> %mask, i64 %vl, i64 1, i64 6) - %1 = call <vscale x 4 x double> @llvm.riscv.tuple.extract.nxv4f64.triscv.vector.tuple_nxv32i8_2t(target("riscv.vector.tuple", <vscale x 32 x i8>, 2) %0, i32 1) - ret <vscale x 4 x double> %1 + ret target("riscv.vector.tuple", <vscale x 32 x i8>, 2) %0 } - - -define <vscale x 1 x double> @test_vlseg3_nxv1f64_triscv.vector.tuple_nxv8i8_3t(ptr %base, i64 %vl) { +define target("riscv.vector.tuple", <vscale x 8 x i8>, 3) @test_vlseg3_nxv1f64_triscv.vector.tuple_nxv8i8_3t(ptr %base, i64 %vl) { ; CHECK-LABEL: test_vlseg3_nxv1f64_triscv.vector.tuple_nxv8i8_3t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e64, m1, ta, ma -; CHECK-NEXT: vlseg3e64.v v7, (a0) +; CHECK-NEXT: vlseg3e64.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 3) @llvm.riscv.vlseg3.triscv.vector.tuple_nxv8i8_3t(target("riscv.vector.tuple", <vscale x 8 x i8>, 3) undef, ptr %base, i64 %vl, i64 6) - %1 = call <vscale x 1 x double> @llvm.riscv.tuple.extract.nxv1f64.triscv.vector.tuple_nxv8i8_3t(target("riscv.vector.tuple", <vscale x 8 x i8>, 3) %0, i32 1) - ret <vscale x 1 x double> %1 + ret target("riscv.vector.tuple", <vscale x 8 x i8>, 3) %0 } - -define <vscale x 1 x double> @test_vlseg3_mask_nxv1f64_triscv.vector.tuple_nxv8i8_3t(ptr %base, i64 %vl, <vscale x 1 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 8 x i8>, 3) @test_vlseg3_mask_nxv1f64_triscv.vector.tuple_nxv8i8_3t(ptr %base, i64 %vl, <vscale x 1 x i1> %mask) { ; CHECK-LABEL: test_vlseg3_mask_nxv1f64_triscv.vector.tuple_nxv8i8_3t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e64, m1, ta, ma -; CHECK-NEXT: vlseg3e64.v v7, (a0), v0.t +; CHECK-NEXT: vlseg3e64.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 3) @llvm.riscv.vlseg3.mask.triscv.vector.tuple_nxv8i8_3t.nxv1i1(target("riscv.vector.tuple", <vscale x 8 x i8>, 3) undef, ptr %base, <vscale x 1 x i1> %mask, i64 %vl, i64 1, i64 6) - %1 = call <vscale x 1 x double> @llvm.riscv.tuple.extract.nxv1f64.triscv.vector.tuple_nxv8i8_3t(target("riscv.vector.tuple", <vscale x 8 x i8>, 3) %0, i32 1) - ret <vscale x 1 x double> %1 + ret target("riscv.vector.tuple", <vscale x 8 x i8>, 3) %0 } - - -define <vscale x 2 x double> @test_vlseg3_nxv2f64_triscv.vector.tuple_nxv16i8_3t(ptr %base, i64 %vl) { +define target("riscv.vector.tuple", <vscale x 16 x i8>, 3) @test_vlseg3_nxv2f64_triscv.vector.tuple_nxv16i8_3t(ptr %base, i64 %vl) { ; CHECK-LABEL: test_vlseg3_nxv2f64_triscv.vector.tuple_nxv16i8_3t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e64, m2, ta, ma -; CHECK-NEXT: vlseg3e64.v v6, (a0) +; CHECK-NEXT: vlseg3e64.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 3) @llvm.riscv.vlseg3.triscv.vector.tuple_nxv16i8_3t(target("riscv.vector.tuple", <vscale x 16 x i8>, 3) undef, ptr %base, i64 %vl, i64 6) - %1 = call <vscale x 2 x double> @llvm.riscv.tuple.extract.nxv2f64.triscv.vector.tuple_nxv16i8_3t(target("riscv.vector.tuple", <vscale x 16 x i8>, 3) %0, i32 1) - ret <vscale x 2 x double> %1 + ret target("riscv.vector.tuple", <vscale x 16 x i8>, 3) %0 } - -define <vscale x 2 x double> @test_vlseg3_mask_nxv2f64_triscv.vector.tuple_nxv16i8_3t(ptr %base, i64 %vl, <vscale x 2 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 16 x i8>, 3) @test_vlseg3_mask_nxv2f64_triscv.vector.tuple_nxv16i8_3t(ptr %base, i64 %vl, <vscale x 2 x i1> %mask) { ; CHECK-LABEL: test_vlseg3_mask_nxv2f64_triscv.vector.tuple_nxv16i8_3t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e64, m2, ta, ma -; CHECK-NEXT: vlseg3e64.v v6, (a0), v0.t +; CHECK-NEXT: vlseg3e64.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 3) @llvm.riscv.vlseg3.mask.triscv.vector.tuple_nxv16i8_3t.nxv2i1(target("riscv.vector.tuple", <vscale x 16 x i8>, 3) undef, ptr %base, <vscale x 2 x i1> %mask, i64 %vl, i64 1, i64 6) - %1 = call <vscale x 2 x double> @llvm.riscv.tuple.extract.nxv2f64.triscv.vector.tuple_nxv16i8_3t(target("riscv.vector.tuple", <vscale x 16 x i8>, 3) %0, i32 1) - ret <vscale x 2 x double> %1 + ret target("riscv.vector.tuple", <vscale x 16 x i8>, 3) %0 } - - -define <vscale x 1 x double> @test_vlseg4_nxv1f64_triscv.vector.tuple_nxv8i8_4t(ptr %base, i64 %vl) { +define target("riscv.vector.tuple", <vscale x 8 x i8>, 4) @test_vlseg4_nxv1f64_triscv.vector.tuple_nxv8i8_4t(ptr %base, i64 %vl) { ; CHECK-LABEL: test_vlseg4_nxv1f64_triscv.vector.tuple_nxv8i8_4t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e64, m1, ta, ma -; CHECK-NEXT: vlseg4e64.v v7, (a0) +; CHECK-NEXT: vlseg4e64.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 4) @llvm.riscv.vlseg4.triscv.vector.tuple_nxv8i8_4t(target("riscv.vector.tuple", <vscale x 8 x i8>, 4) undef, ptr %base, i64 %vl, i64 6) - %1 = call <vscale x 1 x double> @llvm.riscv.tuple.extract.nxv1f64.triscv.vector.tuple_nxv8i8_4t(target("riscv.vector.tuple", <vscale x 8 x i8>, 4) %0, i32 1) - ret <vscale x 1 x double> %1 + ret target("riscv.vector.tuple", <vscale x 8 x i8>, 4) %0 } - -define <vscale x 1 x double> @test_vlseg4_mask_nxv1f64_triscv.vector.tuple_nxv8i8_4t(ptr %base, i64 %vl, <vscale x 1 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 8 x i8>, 4) @test_vlseg4_mask_nxv1f64_triscv.vector.tuple_nxv8i8_4t(ptr %base, i64 %vl, <vscale x 1 x i1> %mask) { ; CHECK-LABEL: test_vlseg4_mask_nxv1f64_triscv.vector.tuple_nxv8i8_4t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e64, m1, ta, ma -; CHECK-NEXT: vlseg4e64.v v7, (a0), v0.t +; CHECK-NEXT: vlseg4e64.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 4) @llvm.riscv.vlseg4.mask.triscv.vector.tuple_nxv8i8_4t.nxv1i1(target("riscv.vector.tuple", <vscale x 8 x i8>, 4) undef, ptr %base, <vscale x 1 x i1> %mask, i64 %vl, i64 1, i64 6) - %1 = call <vscale x 1 x double> @llvm.riscv.tuple.extract.nxv1f64.triscv.vector.tuple_nxv8i8_4t(target("riscv.vector.tuple", <vscale x 8 x i8>, 4) %0, i32 1) - ret <vscale x 1 x double> %1 + ret target("riscv.vector.tuple", <vscale x 8 x i8>, 4) %0 } - - -define <vscale x 2 x double> @test_vlseg4_nxv2f64_triscv.vector.tuple_nxv16i8_4t(ptr %base, i64 %vl) { +define target("riscv.vector.tuple", <vscale x 16 x i8>, 4) @test_vlseg4_nxv2f64_triscv.vector.tuple_nxv16i8_4t(ptr %base, i64 %vl) { ; CHECK-LABEL: test_vlseg4_nxv2f64_triscv.vector.tuple_nxv16i8_4t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e64, m2, ta, ma -; CHECK-NEXT: vlseg4e64.v v6, (a0) +; CHECK-NEXT: vlseg4e64.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 4) @llvm.riscv.vlseg4.triscv.vector.tuple_nxv16i8_4t(target("riscv.vector.tuple", <vscale x 16 x i8>, 4) undef, ptr %base, i64 %vl, i64 6) - %1 = call <vscale x 2 x double> @llvm.riscv.tuple.extract.nxv2f64.triscv.vector.tuple_nxv16i8_4t(target("riscv.vector.tuple", <vscale x 16 x i8>, 4) %0, i32 1) - ret <vscale x 2 x double> %1 + ret target("riscv.vector.tuple", <vscale x 16 x i8>, 4) %0 } - -define <vscale x 2 x double> @test_vlseg4_mask_nxv2f64_triscv.vector.tuple_nxv16i8_4t(ptr %base, i64 %vl, <vscale x 2 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 16 x i8>, 4) @test_vlseg4_mask_nxv2f64_triscv.vector.tuple_nxv16i8_4t(ptr %base, i64 %vl, <vscale x 2 x i1> %mask) { ; CHECK-LABEL: test_vlseg4_mask_nxv2f64_triscv.vector.tuple_nxv16i8_4t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e64, m2, ta, ma -; CHECK-NEXT: vlseg4e64.v v6, (a0), v0.t +; CHECK-NEXT: vlseg4e64.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 4) @llvm.riscv.vlseg4.mask.triscv.vector.tuple_nxv16i8_4t.nxv2i1(target("riscv.vector.tuple", <vscale x 16 x i8>, 4) undef, ptr %base, <vscale x 2 x i1> %mask, i64 %vl, i64 1, i64 6) - %1 = call <vscale x 2 x double> @llvm.riscv.tuple.extract.nxv2f64.triscv.vector.tuple_nxv16i8_4t(target("riscv.vector.tuple", <vscale x 16 x i8>, 4) %0, i32 1) - ret <vscale x 2 x double> %1 + ret target("riscv.vector.tuple", <vscale x 16 x i8>, 4) %0 } - - -define <vscale x 1 x double> @test_vlseg5_nxv1f64_triscv.vector.tuple_nxv8i8_5t(ptr %base, i64 %vl) { +define target("riscv.vector.tuple", <vscale x 8 x i8>, 5) @test_vlseg5_nxv1f64_triscv.vector.tuple_nxv8i8_5t(ptr %base, i64 %vl) { ; CHECK-LABEL: test_vlseg5_nxv1f64_triscv.vector.tuple_nxv8i8_5t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e64, m1, ta, ma -; CHECK-NEXT: vlseg5e64.v v7, (a0) +; CHECK-NEXT: vlseg5e64.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 5) @llvm.riscv.vlseg5.triscv.vector.tuple_nxv8i8_5t(target("riscv.vector.tuple", <vscale x 8 x i8>, 5) undef, ptr %base, i64 %vl, i64 6) - %1 = call <vscale x 1 x double> @llvm.riscv.tuple.extract.nxv1f64.triscv.vector.tuple_nxv8i8_5t(target("riscv.vector.tuple", <vscale x 8 x i8>, 5) %0, i32 1) - ret <vscale x 1 x double> %1 + ret target("riscv.vector.tuple", <vscale x 8 x i8>, 5) %0 } - -define <vscale x 1 x double> @test_vlseg5_mask_nxv1f64_triscv.vector.tuple_nxv8i8_5t(ptr %base, i64 %vl, <vscale x 1 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 8 x i8>, 5) @test_vlseg5_mask_nxv1f64_triscv.vector.tuple_nxv8i8_5t(ptr %base, i64 %vl, <vscale x 1 x i1> %mask) { ; CHECK-LABEL: test_vlseg5_mask_nxv1f64_triscv.vector.tuple_nxv8i8_5t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e64, m1, ta, ma -; CHECK-NEXT: vlseg5e64.v v7, (a0), v0.t +; CHECK-NEXT: vlseg5e64.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 5) @llvm.riscv.vlseg5.mask.triscv.vector.tuple_nxv8i8_5t.nxv1i1(target("riscv.vector.tuple", <vscale x 8 x i8>, 5) undef, ptr %base, <vscale x 1 x i1> %mask, i64 %vl, i64 1, i64 6) - %1 = call <vscale x 1 x double> @llvm.riscv.tuple.extract.nxv1f64.triscv.vector.tuple_nxv8i8_5t(target("riscv.vector.tuple", <vscale x 8 x i8>, 5) %0, i32 1) - ret <vscale x 1 x double> %1 + ret target("riscv.vector.tuple", <vscale x 8 x i8>, 5) %0 } - - -define <vscale x 1 x double> @test_vlseg6_nxv1f64_triscv.vector.tuple_nxv8i8_6t(ptr %base, i64 %vl) { +define target("riscv.vector.tuple", <vscale x 8 x i8>, 6) @test_vlseg6_nxv1f64_triscv.vector.tuple_nxv8i8_6t(ptr %base, i64 %vl) { ; CHECK-LABEL: test_vlseg6_nxv1f64_triscv.vector.tuple_nxv8i8_6t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e64, m1, ta, ma -; CHECK-NEXT: vlseg6e64.v v7, (a0) +; CHECK-NEXT: vlseg6e64.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 6) @llvm.riscv.vlseg6.triscv.vector.tuple_nxv8i8_6t(target("riscv.vector.tuple", <vscale x 8 x i8>, 6) undef, ptr %base, i64 %vl, i64 6) - %1 = call <vscale x 1 x double> @llvm.riscv.tuple.extract.nxv1f64.triscv.vector.tuple_nxv8i8_6t(target("riscv.vector.tuple", <vscale x 8 x i8>, 6) %0, i32 1) - ret <vscale x 1 x double> %1 + ret target("riscv.vector.tuple", <vscale x 8 x i8>, 6) %0 } - -define <vscale x 1 x double> @test_vlseg6_mask_nxv1f64_triscv.vector.tuple_nxv8i8_6t(ptr %base, i64 %vl, <vscale x 1 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 8 x i8>, 6) @test_vlseg6_mask_nxv1f64_triscv.vector.tuple_nxv8i8_6t(ptr %base, i64 %vl, <vscale x 1 x i1> %mask) { ; CHECK-LABEL: test_vlseg6_mask_nxv1f64_triscv.vector.tuple_nxv8i8_6t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e64, m1, ta, ma -; CHECK-NEXT: vlseg6e64.v v7, (a0), v0.t +; CHECK-NEXT: vlseg6e64.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 6) @llvm.riscv.vlseg6.mask.triscv.vector.tuple_nxv8i8_6t.nxv1i1(target("riscv.vector.tuple", <vscale x 8 x i8>, 6) undef, ptr %base, <vscale x 1 x i1> %mask, i64 %vl, i64 1, i64 6) - %1 = call <vscale x 1 x double> @llvm.riscv.tuple.extract.nxv1f64.triscv.vector.tuple_nxv8i8_6t(target("riscv.vector.tuple", <vscale x 8 x i8>, 6) %0, i32 1) - ret <vscale x 1 x double> %1 + ret target("riscv.vector.tuple", <vscale x 8 x i8>, 6) %0 } - - -define <vscale x 1 x double> @test_vlseg7_nxv1f64_triscv.vector.tuple_nxv8i8_7t(ptr %base, i64 %vl) { +define target("riscv.vector.tuple", <vscale x 8 x i8>, 7) @test_vlseg7_nxv1f64_triscv.vector.tuple_nxv8i8_7t(ptr %base, i64 %vl) { ; CHECK-LABEL: test_vlseg7_nxv1f64_triscv.vector.tuple_nxv8i8_7t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e64, m1, ta, ma -; CHECK-NEXT: vlseg7e64.v v7, (a0) +; CHECK-NEXT: vlseg7e64.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 7) @llvm.riscv.vlseg7.triscv.vector.tuple_nxv8i8_7t(target("riscv.vector.tuple", <vscale x 8 x i8>, 7) undef, ptr %base, i64 %vl, i64 6) - %1 = call <vscale x 1 x double> @llvm.riscv.tuple.extract.nxv1f64.triscv.vector.tuple_nxv8i8_7t(target("riscv.vector.tuple", <vscale x 8 x i8>, 7) %0, i32 1) - ret <vscale x 1 x double> %1 + ret target("riscv.vector.tuple", <vscale x 8 x i8>, 7) %0 } - -define <vscale x 1 x double> @test_vlseg7_mask_nxv1f64_triscv.vector.tuple_nxv8i8_7t(ptr %base, i64 %vl, <vscale x 1 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 8 x i8>, 7) @test_vlseg7_mask_nxv1f64_triscv.vector.tuple_nxv8i8_7t(ptr %base, i64 %vl, <vscale x 1 x i1> %mask) { ; CHECK-LABEL: test_vlseg7_mask_nxv1f64_triscv.vector.tuple_nxv8i8_7t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e64, m1, ta, ma -; CHECK-NEXT: vlseg7e64.v v7, (a0), v0.t +; CHECK-NEXT: vlseg7e64.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 7) @llvm.riscv.vlseg7.mask.triscv.vector.tuple_nxv8i8_7t.nxv1i1(target("riscv.vector.tuple", <vscale x 8 x i8>, 7) undef, ptr %base, <vscale x 1 x i1> %mask, i64 %vl, i64 1, i64 6) - %1 = call <vscale x 1 x double> @llvm.riscv.tuple.extract.nxv1f64.triscv.vector.tuple_nxv8i8_7t(target("riscv.vector.tuple", <vscale x 8 x i8>, 7) %0, i32 1) - ret <vscale x 1 x double> %1 + ret target("riscv.vector.tuple", <vscale x 8 x i8>, 7) %0 } - - -define <vscale x 1 x double> @test_vlseg8_nxv1f64_triscv.vector.tuple_nxv8i8_8t(ptr %base, i64 %vl) { +define target("riscv.vector.tuple", <vscale x 8 x i8>, 8) @test_vlseg8_nxv1f64_triscv.vector.tuple_nxv8i8_8t(ptr %base, i64 %vl) { ; CHECK-LABEL: test_vlseg8_nxv1f64_triscv.vector.tuple_nxv8i8_8t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e64, m1, ta, ma -; CHECK-NEXT: vlseg8e64.v v7, (a0) +; CHECK-NEXT: vlseg8e64.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 8) @llvm.riscv.vlseg8.triscv.vector.tuple_nxv8i8_8t(target("riscv.vector.tuple", <vscale x 8 x i8>, 8) undef, ptr %base, i64 %vl, i64 6) - %1 = call <vscale x 1 x double> @llvm.riscv.tuple.extract.nxv1f64.triscv.vector.tuple_nxv8i8_8t(target("riscv.vector.tuple", <vscale x 8 x i8>, 8) %0, i32 1) - ret <vscale x 1 x double> %1 + ret target("riscv.vector.tuple", <vscale x 8 x i8>, 8) %0 } - -define <vscale x 1 x double> @test_vlseg8_mask_nxv1f64_triscv.vector.tuple_nxv8i8_8t(ptr %base, i64 %vl, <vscale x 1 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 8 x i8>, 8) @test_vlseg8_mask_nxv1f64_triscv.vector.tuple_nxv8i8_8t(ptr %base, i64 %vl, <vscale x 1 x i1> %mask) { ; CHECK-LABEL: test_vlseg8_mask_nxv1f64_triscv.vector.tuple_nxv8i8_8t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e64, m1, ta, ma -; CHECK-NEXT: vlseg8e64.v v7, (a0), v0.t +; CHECK-NEXT: vlseg8e64.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 8) @llvm.riscv.vlseg8.mask.triscv.vector.tuple_nxv8i8_8t.nxv1i1(target("riscv.vector.tuple", <vscale x 8 x i8>, 8) undef, ptr %base, <vscale x 1 x i1> %mask, i64 %vl, i64 1, i64 6) - %1 = call <vscale x 1 x double> @llvm.riscv.tuple.extract.nxv1f64.triscv.vector.tuple_nxv8i8_8t(target("riscv.vector.tuple", <vscale x 8 x i8>, 8) %0, i32 1) - ret <vscale x 1 x double> %1 + ret target("riscv.vector.tuple", <vscale x 8 x i8>, 8) %0 } - - -define <vscale x 1 x bfloat> @test_vlseg2_nxv1bf16_triscv.vector.tuple_nxv2i8_2t(ptr %base, i64 %vl) { +define target("riscv.vector.tuple", <vscale x 2 x i8>, 2) @test_vlseg2_nxv1bf16_triscv.vector.tuple_nxv2i8_2t(ptr %base, i64 %vl) { ; CHECK-LABEL: test_vlseg2_nxv1bf16_triscv.vector.tuple_nxv2i8_2t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, ma -; CHECK-NEXT: vlseg2e16.v v7, (a0) +; CHECK-NEXT: vlseg2e16.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 2 x i8>, 2) @llvm.riscv.vlseg2.triscv.vector.tuple_nxv2i8_2t(target("riscv.vector.tuple", <vscale x 2 x i8>, 2) undef, ptr %base, i64 %vl, i64 4) - %1 = call <vscale x 1 x bfloat> @llvm.riscv.tuple.extract.nxv1bf16.triscv.vector.tuple_nxv2i8_2t(target("riscv.vector.tuple", <vscale x 2 x i8>, 2) %0, i32 1) - ret <vscale x 1 x bfloat> %1 + ret target("riscv.vector.tuple", <vscale x 2 x i8>, 2) %0 } - -define <vscale x 1 x bfloat> @test_vlseg2_mask_nxv1bf16_triscv.vector.tuple_nxv2i8_2t(ptr %base, i64 %vl, <vscale x 1 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 2 x i8>, 2) @test_vlseg2_mask_nxv1bf16_triscv.vector.tuple_nxv2i8_2t(ptr %base, i64 %vl, <vscale x 1 x i1> %mask) { ; CHECK-LABEL: test_vlseg2_mask_nxv1bf16_triscv.vector.tuple_nxv2i8_2t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, ma -; CHECK-NEXT: vlseg2e16.v v7, (a0), v0.t +; CHECK-NEXT: vlseg2e16.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 2 x i8>, 2) @llvm.riscv.vlseg2.mask.triscv.vector.tuple_nxv2i8_2t.nxv1i1(target("riscv.vector.tuple", <vscale x 2 x i8>, 2) undef, ptr %base, <vscale x 1 x i1> %mask, i64 %vl, i64 1, i64 4) - %1 = call <vscale x 1 x bfloat> @llvm.riscv.tuple.extract.nxv1bf16.triscv.vector.tuple_nxv2i8_2t(target("riscv.vector.tuple", <vscale x 2 x i8>, 2) %0, i32 1) - ret <vscale x 1 x bfloat> %1 + ret target("riscv.vector.tuple", <vscale x 2 x i8>, 2) %0 } - - -define <vscale x 2 x bfloat> @test_vlseg2_nxv2bf16_triscv.vector.tuple_nxv4i8_2t(ptr %base, i64 %vl) { +define target("riscv.vector.tuple", <vscale x 4 x i8>, 2) @test_vlseg2_nxv2bf16_triscv.vector.tuple_nxv4i8_2t(ptr %base, i64 %vl) { ; CHECK-LABEL: test_vlseg2_nxv2bf16_triscv.vector.tuple_nxv4i8_2t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, ma -; CHECK-NEXT: vlseg2e16.v v7, (a0) +; CHECK-NEXT: vlseg2e16.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 2) @llvm.riscv.vlseg2.triscv.vector.tuple_nxv4i8_2t(target("riscv.vector.tuple", <vscale x 4 x i8>, 2) undef, ptr %base, i64 %vl, i64 4) - %1 = call <vscale x 2 x bfloat> @llvm.riscv.tuple.extract.nxv2bf16.triscv.vector.tuple_nxv4i8_2t(target("riscv.vector.tuple", <vscale x 4 x i8>, 2) %0, i32 1) - ret <vscale x 2 x bfloat> %1 + ret target("riscv.vector.tuple", <vscale x 4 x i8>, 2) %0 } - -define <vscale x 2 x bfloat> @test_vlseg2_mask_nxv2bf16_triscv.vector.tuple_nxv4i8_2t(ptr %base, i64 %vl, <vscale x 2 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 4 x i8>, 2) @test_vlseg2_mask_nxv2bf16_triscv.vector.tuple_nxv4i8_2t(ptr %base, i64 %vl, <vscale x 2 x i1> %mask) { ; CHECK-LABEL: test_vlseg2_mask_nxv2bf16_triscv.vector.tuple_nxv4i8_2t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, ma -; CHECK-NEXT: vlseg2e16.v v7, (a0), v0.t +; CHECK-NEXT: vlseg2e16.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 2) @llvm.riscv.vlseg2.mask.triscv.vector.tuple_nxv4i8_2t.nxv2i1(target("riscv.vector.tuple", <vscale x 4 x i8>, 2) undef, ptr %base, <vscale x 2 x i1> %mask, i64 %vl, i64 1, i64 4) - %1 = call <vscale x 2 x bfloat> @llvm.riscv.tuple.extract.nxv2bf16.triscv.vector.tuple_nxv4i8_2t(target("riscv.vector.tuple", <vscale x 4 x i8>, 2) %0, i32 1) - ret <vscale x 2 x bfloat> %1 + ret target("riscv.vector.tuple", <vscale x 4 x i8>, 2) %0 } - - -define <vscale x 4 x bfloat> @test_vlseg2_nxv4bf16_triscv.vector.tuple_nxv8i8_2t(ptr %base, i64 %vl) { +define target("riscv.vector.tuple", <vscale x 8 x i8>, 2) @test_vlseg2_nxv4bf16_triscv.vector.tuple_nxv8i8_2t(ptr %base, i64 %vl) { ; CHECK-LABEL: test_vlseg2_nxv4bf16_triscv.vector.tuple_nxv8i8_2t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma -; CHECK-NEXT: vlseg2e16.v v7, (a0) +; CHECK-NEXT: vlseg2e16.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 2) @llvm.riscv.vlseg2.triscv.vector.tuple_nxv8i8_2t(target("riscv.vector.tuple", <vscale x 8 x i8>, 2) undef, ptr %base, i64 %vl, i64 4) - %1 = call <vscale x 4 x bfloat> @llvm.riscv.tuple.extract.nxv4bf16.triscv.vector.tuple_nxv8i8_2t(target("riscv.vector.tuple", <vscale x 8 x i8>, 2) %0, i32 1) - ret <vscale x 4 x bfloat> %1 + ret target("riscv.vector.tuple", <vscale x 8 x i8>, 2) %0 } - -define <vscale x 4 x bfloat> @test_vlseg2_mask_nxv4bf16_triscv.vector.tuple_nxv8i8_2t(ptr %base, i64 %vl, <vscale x 4 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 8 x i8>, 2) @test_vlseg2_mask_nxv4bf16_triscv.vector.tuple_nxv8i8_2t(ptr %base, i64 %vl, <vscale x 4 x i1> %mask) { ; CHECK-LABEL: test_vlseg2_mask_nxv4bf16_triscv.vector.tuple_nxv8i8_2t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma -; CHECK-NEXT: vlseg2e16.v v7, (a0), v0.t +; CHECK-NEXT: vlseg2e16.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 2) @llvm.riscv.vlseg2.mask.triscv.vector.tuple_nxv8i8_2t.nxv4i1(target("riscv.vector.tuple", <vscale x 8 x i8>, 2) undef, ptr %base, <vscale x 4 x i1> %mask, i64 %vl, i64 1, i64 4) - %1 = call <vscale x 4 x bfloat> @llvm.riscv.tuple.extract.nxv4bf16.triscv.vector.tuple_nxv8i8_2t(target("riscv.vector.tuple", <vscale x 8 x i8>, 2) %0, i32 1) - ret <vscale x 4 x bfloat> %1 + ret target("riscv.vector.tuple", <vscale x 8 x i8>, 2) %0 } - - -define <vscale x 8 x bfloat> @test_vlseg2_nxv8bf16_triscv.vector.tuple_nxv16i8_2t(ptr %base, i64 %vl) { +define target("riscv.vector.tuple", <vscale x 16 x i8>, 2) @test_vlseg2_nxv8bf16_triscv.vector.tuple_nxv16i8_2t(ptr %base, i64 %vl) { ; CHECK-LABEL: test_vlseg2_nxv8bf16_triscv.vector.tuple_nxv16i8_2t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, ma -; CHECK-NEXT: vlseg2e16.v v6, (a0) +; CHECK-NEXT: vlseg2e16.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 2) @llvm.riscv.vlseg2.triscv.vector.tuple_nxv16i8_2t(target("riscv.vector.tuple", <vscale x 16 x i8>, 2) undef, ptr %base, i64 %vl, i64 4) - %1 = call <vscale x 8 x bfloat> @llvm.riscv.tuple.extract.nxv8bf16.triscv.vector.tuple_nxv16i8_2t(target("riscv.vector.tuple", <vscale x 16 x i8>, 2) %0, i32 1) - ret <vscale x 8 x bfloat> %1 + ret target("riscv.vector.tuple", <vscale x 16 x i8>, 2) %0 } - -define <vscale x 8 x bfloat> @test_vlseg2_mask_nxv8bf16_triscv.vector.tuple_nxv16i8_2t(ptr %base, i64 %vl, <vscale x 8 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 16 x i8>, 2) @test_vlseg2_mask_nxv8bf16_triscv.vector.tuple_nxv16i8_2t(ptr %base, i64 %vl, <vscale x 8 x i1> %mask) { ; CHECK-LABEL: test_vlseg2_mask_nxv8bf16_triscv.vector.tuple_nxv16i8_2t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, ma -; CHECK-NEXT: vlseg2e16.v v6, (a0), v0.t +; CHECK-NEXT: vlseg2e16.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 2) @llvm.riscv.vlseg2.mask.triscv.vector.tuple_nxv16i8_2t.nxv8i1(target("riscv.vector.tuple", <vscale x 16 x i8>, 2) undef, ptr %base, <vscale x 8 x i1> %mask, i64 %vl, i64 1, i64 4) - %1 = call <vscale x 8 x bfloat> @llvm.riscv.tuple.extract.nxv8bf16.triscv.vector.tuple_nxv16i8_2t(target("riscv.vector.tuple", <vscale x 16 x i8>, 2) %0, i32 1) - ret <vscale x 8 x bfloat> %1 + ret target("riscv.vector.tuple", <vscale x 16 x i8>, 2) %0 } - - -define <vscale x 16 x bfloat> @test_vlseg2_nxv16bf16_triscv.vector.tuple_nxv32i8_2t(ptr %base, i64 %vl) { +define target("riscv.vector.tuple", <vscale x 32 x i8>, 2) @test_vlseg2_nxv16bf16_triscv.vector.tuple_nxv32i8_2t(ptr %base, i64 %vl) { ; CHECK-LABEL: test_vlseg2_nxv16bf16_triscv.vector.tuple_nxv32i8_2t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, ma -; CHECK-NEXT: vlseg2e16.v v4, (a0) +; CHECK-NEXT: vlseg2e16.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 32 x i8>, 2) @llvm.riscv.vlseg2.triscv.vector.tuple_nxv32i8_2t(target("riscv.vector.tuple", <vscale x 32 x i8>, 2) undef, ptr %base, i64 %vl, i64 4) - %1 = call <vscale x 16 x bfloat> @llvm.riscv.tuple.extract.nxv16bf16.triscv.vector.tuple_nxv32i8_2t(target("riscv.vector.tuple", <vscale x 32 x i8>, 2) %0, i32 1) - ret <vscale x 16 x bfloat> %1 + ret target("riscv.vector.tuple", <vscale x 32 x i8>, 2) %0 } - -define <vscale x 16 x bfloat> @test_vlseg2_mask_nxv16bf16_triscv.vector.tuple_nxv32i8_2t(ptr %base, i64 %vl, <vscale x 16 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 32 x i8>, 2) @test_vlseg2_mask_nxv16bf16_triscv.vector.tuple_nxv32i8_2t(ptr %base, i64 %vl, <vscale x 16 x i1> %mask) { ; CHECK-LABEL: test_vlseg2_mask_nxv16bf16_triscv.vector.tuple_nxv32i8_2t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, ma -; CHECK-NEXT: vlseg2e16.v v4, (a0), v0.t +; CHECK-NEXT: vlseg2e16.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 32 x i8>, 2) @llvm.riscv.vlseg2.mask.triscv.vector.tuple_nxv32i8_2t.nxv16i1(target("riscv.vector.tuple", <vscale x 32 x i8>, 2) undef, ptr %base, <vscale x 16 x i1> %mask, i64 %vl, i64 1, i64 4) - %1 = call <vscale x 16 x bfloat> @llvm.riscv.tuple.extract.nxv16bf16.triscv.vector.tuple_nxv32i8_2t(target("riscv.vector.tuple", <vscale x 32 x i8>, 2) %0, i32 1) - ret <vscale x 16 x bfloat> %1 + ret target("riscv.vector.tuple", <vscale x 32 x i8>, 2) %0 } - - -define <vscale x 1 x bfloat> @test_vlseg3_nxv1bf16_triscv.vector.tuple_nxv2i8_3t(ptr %base, i64 %vl) { +define target("riscv.vector.tuple", <vscale x 2 x i8>, 3) @test_vlseg3_nxv1bf16_triscv.vector.tuple_nxv2i8_3t(ptr %base, i64 %vl) { ; CHECK-LABEL: test_vlseg3_nxv1bf16_triscv.vector.tuple_nxv2i8_3t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, ma -; CHECK-NEXT: vlseg3e16.v v7, (a0) +; CHECK-NEXT: vlseg3e16.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 2 x i8>, 3) @llvm.riscv.vlseg3.triscv.vector.tuple_nxv2i8_3t(target("riscv.vector.tuple", <vscale x 2 x i8>, 3) undef, ptr %base, i64 %vl, i64 4) - %1 = call <vscale x 1 x bfloat> @llvm.riscv.tuple.extract.nxv1bf16.triscv.vector.tuple_nxv2i8_3t(target("riscv.vector.tuple", <vscale x 2 x i8>, 3) %0, i32 1) - ret <vscale x 1 x bfloat> %1 + ret target("riscv.vector.tuple", <vscale x 2 x i8>, 3) %0 } - -define <vscale x 1 x bfloat> @test_vlseg3_mask_nxv1bf16_triscv.vector.tuple_nxv2i8_3t(ptr %base, i64 %vl, <vscale x 1 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 2 x i8>, 3) @test_vlseg3_mask_nxv1bf16_triscv.vector.tuple_nxv2i8_3t(ptr %base, i64 %vl, <vscale x 1 x i1> %mask) { ; CHECK-LABEL: test_vlseg3_mask_nxv1bf16_triscv.vector.tuple_nxv2i8_3t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, ma -; CHECK-NEXT: vlseg3e16.v v7, (a0), v0.t +; CHECK-NEXT: vlseg3e16.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 2 x i8>, 3) @llvm.riscv.vlseg3.mask.triscv.vector.tuple_nxv2i8_3t.nxv1i1(target("riscv.vector.tuple", <vscale x 2 x i8>, 3) undef, ptr %base, <vscale x 1 x i1> %mask, i64 %vl, i64 1, i64 4) - %1 = call <vscale x 1 x bfloat> @llvm.riscv.tuple.extract.nxv1bf16.triscv.vector.tuple_nxv2i8_3t(target("riscv.vector.tuple", <vscale x 2 x i8>, 3) %0, i32 1) - ret <vscale x 1 x bfloat> %1 + ret target("riscv.vector.tuple", <vscale x 2 x i8>, 3) %0 } - - -define <vscale x 2 x bfloat> @test_vlseg3_nxv2bf16_triscv.vector.tuple_nxv4i8_3t(ptr %base, i64 %vl) { +define target("riscv.vector.tuple", <vscale x 4 x i8>, 3) @test_vlseg3_nxv2bf16_triscv.vector.tuple_nxv4i8_3t(ptr %base, i64 %vl) { ; CHECK-LABEL: test_vlseg3_nxv2bf16_triscv.vector.tuple_nxv4i8_3t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, ma -; CHECK-NEXT: vlseg3e16.v v7, (a0) +; CHECK-NEXT: vlseg3e16.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 3) @llvm.riscv.vlseg3.triscv.vector.tuple_nxv4i8_3t(target("riscv.vector.tuple", <vscale x 4 x i8>, 3) undef, ptr %base, i64 %vl, i64 4) - %1 = call <vscale x 2 x bfloat> @llvm.riscv.tuple.extract.nxv2bf16.triscv.vector.tuple_nxv4i8_3t(target("riscv.vector.tuple", <vscale x 4 x i8>, 3) %0, i32 1) - ret <vscale x 2 x bfloat> %1 + ret target("riscv.vector.tuple", <vscale x 4 x i8>, 3) %0 } - -define <vscale x 2 x bfloat> @test_vlseg3_mask_nxv2bf16_triscv.vector.tuple_nxv4i8_3t(ptr %base, i64 %vl, <vscale x 2 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 4 x i8>, 3) @test_vlseg3_mask_nxv2bf16_triscv.vector.tuple_nxv4i8_3t(ptr %base, i64 %vl, <vscale x 2 x i1> %mask) { ; CHECK-LABEL: test_vlseg3_mask_nxv2bf16_triscv.vector.tuple_nxv4i8_3t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, ma -; CHECK-NEXT: vlseg3e16.v v7, (a0), v0.t +; CHECK-NEXT: vlseg3e16.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 3) @llvm.riscv.vlseg3.mask.triscv.vector.tuple_nxv4i8_3t.nxv2i1(target("riscv.vector.tuple", <vscale x 4 x i8>, 3) undef, ptr %base, <vscale x 2 x i1> %mask, i64 %vl, i64 1, i64 4) - %1 = call <vscale x 2 x bfloat> @llvm.riscv.tuple.extract.nxv2bf16.triscv.vector.tuple_nxv4i8_3t(target("riscv.vector.tuple", <vscale x 4 x i8>, 3) %0, i32 1) - ret <vscale x 2 x bfloat> %1 + ret target("riscv.vector.tuple", <vscale x 4 x i8>, 3) %0 } - - -define <vscale x 4 x bfloat> @test_vlseg3_nxv4bf16_triscv.vector.tuple_nxv8i8_3t(ptr %base, i64 %vl) { +define target("riscv.vector.tuple", <vscale x 8 x i8>, 3) @test_vlseg3_nxv4bf16_triscv.vector.tuple_nxv8i8_3t(ptr %base, i64 %vl) { ; CHECK-LABEL: test_vlseg3_nxv4bf16_triscv.vector.tuple_nxv8i8_3t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma -; CHECK-NEXT: vlseg3e16.v v7, (a0) +; CHECK-NEXT: vlseg3e16.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 3) @llvm.riscv.vlseg3.triscv.vector.tuple_nxv8i8_3t(target("riscv.vector.tuple", <vscale x 8 x i8>, 3) undef, ptr %base, i64 %vl, i64 4) - %1 = call <vscale x 4 x bfloat> @llvm.riscv.tuple.extract.nxv4bf16.triscv.vector.tuple_nxv8i8_3t(target("riscv.vector.tuple", <vscale x 8 x i8>, 3) %0, i32 1) - ret <vscale x 4 x bfloat> %1 + ret target("riscv.vector.tuple", <vscale x 8 x i8>, 3) %0 } - -define <vscale x 4 x bfloat> @test_vlseg3_mask_nxv4bf16_triscv.vector.tuple_nxv8i8_3t(ptr %base, i64 %vl, <vscale x 4 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 8 x i8>, 3) @test_vlseg3_mask_nxv4bf16_triscv.vector.tuple_nxv8i8_3t(ptr %base, i64 %vl, <vscale x 4 x i1> %mask) { ; CHECK-LABEL: test_vlseg3_mask_nxv4bf16_triscv.vector.tuple_nxv8i8_3t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma -; CHECK-NEXT: vlseg3e16.v v7, (a0), v0.t +; CHECK-NEXT: vlseg3e16.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 3) @llvm.riscv.vlseg3.mask.triscv.vector.tuple_nxv8i8_3t.nxv4i1(target("riscv.vector.tuple", <vscale x 8 x i8>, 3) undef, ptr %base, <vscale x 4 x i1> %mask, i64 %vl, i64 1, i64 4) - %1 = call <vscale x 4 x bfloat> @llvm.riscv.tuple.extract.nxv4bf16.triscv.vector.tuple_nxv8i8_3t(target("riscv.vector.tuple", <vscale x 8 x i8>, 3) %0, i32 1) - ret <vscale x 4 x bfloat> %1 + ret target("riscv.vector.tuple", <vscale x 8 x i8>, 3) %0 } - - -define <vscale x 8 x bfloat> @test_vlseg3_nxv8bf16_triscv.vector.tuple_nxv16i8_3t(ptr %base, i64 %vl) { +define target("riscv.vector.tuple", <vscale x 16 x i8>, 3) @test_vlseg3_nxv8bf16_triscv.vector.tuple_nxv16i8_3t(ptr %base, i64 %vl) { ; CHECK-LABEL: test_vlseg3_nxv8bf16_triscv.vector.tuple_nxv16i8_3t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, ma -; CHECK-NEXT: vlseg3e16.v v6, (a0) +; CHECK-NEXT: vlseg3e16.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 3) @llvm.riscv.vlseg3.triscv.vector.tuple_nxv16i8_3t(target("riscv.vector.tuple", <vscale x 16 x i8>, 3) undef, ptr %base, i64 %vl, i64 4) - %1 = call <vscale x 8 x bfloat> @llvm.riscv.tuple.extract.nxv8bf16.triscv.vector.tuple_nxv16i8_3t(target("riscv.vector.tuple", <vscale x 16 x i8>, 3) %0, i32 1) - ret <vscale x 8 x bfloat> %1 + ret target("riscv.vector.tuple", <vscale x 16 x i8>, 3) %0 } - -define <vscale x 8 x bfloat> @test_vlseg3_mask_nxv8bf16_triscv.vector.tuple_nxv16i8_3t(ptr %base, i64 %vl, <vscale x 8 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 16 x i8>, 3) @test_vlseg3_mask_nxv8bf16_triscv.vector.tuple_nxv16i8_3t(ptr %base, i64 %vl, <vscale x 8 x i1> %mask) { ; CHECK-LABEL: test_vlseg3_mask_nxv8bf16_triscv.vector.tuple_nxv16i8_3t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, ma -; CHECK-NEXT: vlseg3e16.v v6, (a0), v0.t +; CHECK-NEXT: vlseg3e16.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 3) @llvm.riscv.vlseg3.mask.triscv.vector.tuple_nxv16i8_3t.nxv8i1(target("riscv.vector.tuple", <vscale x 16 x i8>, 3) undef, ptr %base, <vscale x 8 x i1> %mask, i64 %vl, i64 1, i64 4) - %1 = call <vscale x 8 x bfloat> @llvm.riscv.tuple.extract.nxv8bf16.triscv.vector.tuple_nxv16i8_3t(target("riscv.vector.tuple", <vscale x 16 x i8>, 3) %0, i32 1) - ret <vscale x 8 x bfloat> %1 + ret target("riscv.vector.tuple", <vscale x 16 x i8>, 3) %0 } - - -define <vscale x 1 x bfloat> @test_vlseg4_nxv1bf16_triscv.vector.tuple_nxv2i8_4t(ptr %base, i64 %vl) { +define target("riscv.vector.tuple", <vscale x 2 x i8>, 4) @test_vlseg4_nxv1bf16_triscv.vector.tuple_nxv2i8_4t(ptr %base, i64 %vl) { ; CHECK-LABEL: test_vlseg4_nxv1bf16_triscv.vector.tuple_nxv2i8_4t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, ma -; CHECK-NEXT: vlseg4e16.v v7, (a0) +; CHECK-NEXT: vlseg4e16.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 2 x i8>, 4) @llvm.riscv.vlseg4.triscv.vector.tuple_nxv2i8_4t(target("riscv.vector.tuple", <vscale x 2 x i8>, 4) undef, ptr %base, i64 %vl, i64 4) - %1 = call <vscale x 1 x bfloat> @llvm.riscv.tuple.extract.nxv1bf16.triscv.vector.tuple_nxv2i8_4t(target("riscv.vector.tuple", <vscale x 2 x i8>, 4) %0, i32 1) - ret <vscale x 1 x bfloat> %1 + ret target("riscv.vector.tuple", <vscale x 2 x i8>, 4) %0 } - -define <vscale x 1 x bfloat> @test_vlseg4_mask_nxv1bf16_triscv.vector.tuple_nxv2i8_4t(ptr %base, i64 %vl, <vscale x 1 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 2 x i8>, 4) @test_vlseg4_mask_nxv1bf16_triscv.vector.tuple_nxv2i8_4t(ptr %base, i64 %vl, <vscale x 1 x i1> %mask) { ; CHECK-LABEL: test_vlseg4_mask_nxv1bf16_triscv.vector.tuple_nxv2i8_4t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, ma -; CHECK-NEXT: vlseg4e16.v v7, (a0), v0.t +; CHECK-NEXT: vlseg4e16.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 2 x i8>, 4) @llvm.riscv.vlseg4.mask.triscv.vector.tuple_nxv2i8_4t.nxv1i1(target("riscv.vector.tuple", <vscale x 2 x i8>, 4) undef, ptr %base, <vscale x 1 x i1> %mask, i64 %vl, i64 1, i64 4) - %1 = call <vscale x 1 x bfloat> @llvm.riscv.tuple.extract.nxv1bf16.triscv.vector.tuple_nxv2i8_4t(target("riscv.vector.tuple", <vscale x 2 x i8>, 4) %0, i32 1) - ret <vscale x 1 x bfloat> %1 + ret target("riscv.vector.tuple", <vscale x 2 x i8>, 4) %0 } - - -define <vscale x 2 x bfloat> @test_vlseg4_nxv2bf16_triscv.vector.tuple_nxv4i8_4t(ptr %base, i64 %vl) { +define target("riscv.vector.tuple", <vscale x 4 x i8>, 4) @test_vlseg4_nxv2bf16_triscv.vector.tuple_nxv4i8_4t(ptr %base, i64 %vl) { ; CHECK-LABEL: test_vlseg4_nxv2bf16_triscv.vector.tuple_nxv4i8_4t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, ma -; CHECK-NEXT: vlseg4e16.v v7, (a0) +; CHECK-NEXT: vlseg4e16.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 4) @llvm.riscv.vlseg4.triscv.vector.tuple_nxv4i8_4t(target("riscv.vector.tuple", <vscale x 4 x i8>, 4) undef, ptr %base, i64 %vl, i64 4) - %1 = call <vscale x 2 x bfloat> @llvm.riscv.tuple.extract.nxv2bf16.triscv.vector.tuple_nxv4i8_4t(target("riscv.vector.tuple", <vscale x 4 x i8>, 4) %0, i32 1) - ret <vscale x 2 x bfloat> %1 + ret target("riscv.vector.tuple", <vscale x 4 x i8>, 4) %0 } - -define <vscale x 2 x bfloat> @test_vlseg4_mask_nxv2bf16_triscv.vector.tuple_nxv4i8_4t(ptr %base, i64 %vl, <vscale x 2 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 4 x i8>, 4) @test_vlseg4_mask_nxv2bf16_triscv.vector.tuple_nxv4i8_4t(ptr %base, i64 %vl, <vscale x 2 x i1> %mask) { ; CHECK-LABEL: test_vlseg4_mask_nxv2bf16_triscv.vector.tuple_nxv4i8_4t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, ma -; CHECK-NEXT: vlseg4e16.v v7, (a0), v0.t +; CHECK-NEXT: vlseg4e16.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 4) @llvm.riscv.vlseg4.mask.triscv.vector.tuple_nxv4i8_4t.nxv2i1(target("riscv.vector.tuple", <vscale x 4 x i8>, 4) undef, ptr %base, <vscale x 2 x i1> %mask, i64 %vl, i64 1, i64 4) - %1 = call <vscale x 2 x bfloat> @llvm.riscv.tuple.extract.nxv2bf16.triscv.vector.tuple_nxv4i8_4t(target("riscv.vector.tuple", <vscale x 4 x i8>, 4) %0, i32 1) - ret <vscale x 2 x bfloat> %1 + ret target("riscv.vector.tuple", <vscale x 4 x i8>, 4) %0 } - - -define <vscale x 4 x bfloat> @test_vlseg4_nxv4bf16_triscv.vector.tuple_nxv8i8_4t(ptr %base, i64 %vl) { +define target("riscv.vector.tuple", <vscale x 8 x i8>, 4) @test_vlseg4_nxv4bf16_triscv.vector.tuple_nxv8i8_4t(ptr %base, i64 %vl) { ; CHECK-LABEL: test_vlseg4_nxv4bf16_triscv.vector.tuple_nxv8i8_4t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma -; CHECK-NEXT: vlseg4e16.v v7, (a0) +; CHECK-NEXT: vlseg4e16.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 4) @llvm.riscv.vlseg4.triscv.vector.tuple_nxv8i8_4t(target("riscv.vector.tuple", <vscale x 8 x i8>, 4) undef, ptr %base, i64 %vl, i64 4) - %1 = call <vscale x 4 x bfloat> @llvm.riscv.tuple.extract.nxv4bf16.triscv.vector.tuple_nxv8i8_4t(target("riscv.vector.tuple", <vscale x 8 x i8>, 4) %0, i32 1) - ret <vscale x 4 x bfloat> %1 + ret target("riscv.vector.tuple", <vscale x 8 x i8>, 4) %0 } - -define <vscale x 4 x bfloat> @test_vlseg4_mask_nxv4bf16_triscv.vector.tuple_nxv8i8_4t(ptr %base, i64 %vl, <vscale x 4 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 8 x i8>, 4) @test_vlseg4_mask_nxv4bf16_triscv.vector.tuple_nxv8i8_4t(ptr %base, i64 %vl, <vscale x 4 x i1> %mask) { ; CHECK-LABEL: test_vlseg4_mask_nxv4bf16_triscv.vector.tuple_nxv8i8_4t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma -; CHECK-NEXT: vlseg4e16.v v7, (a0), v0.t +; CHECK-NEXT: vlseg4e16.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 4) @llvm.riscv.vlseg4.mask.triscv.vector.tuple_nxv8i8_4t.nxv4i1(target("riscv.vector.tuple", <vscale x 8 x i8>, 4) undef, ptr %base, <vscale x 4 x i1> %mask, i64 %vl, i64 1, i64 4) - %1 = call <vscale x 4 x bfloat> @llvm.riscv.tuple.extract.nxv4bf16.triscv.vector.tuple_nxv8i8_4t(target("riscv.vector.tuple", <vscale x 8 x i8>, 4) %0, i32 1) - ret <vscale x 4 x bfloat> %1 + ret target("riscv.vector.tuple", <vscale x 8 x i8>, 4) %0 } - - -define <vscale x 8 x bfloat> @test_vlseg4_nxv8bf16_triscv.vector.tuple_nxv16i8_4t(ptr %base, i64 %vl) { +define target("riscv.vector.tuple", <vscale x 16 x i8>, 4) @test_vlseg4_nxv8bf16_triscv.vector.tuple_nxv16i8_4t(ptr %base, i64 %vl) { ; CHECK-LABEL: test_vlseg4_nxv8bf16_triscv.vector.tuple_nxv16i8_4t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, ma -; CHECK-NEXT: vlseg4e16.v v6, (a0) +; CHECK-NEXT: vlseg4e16.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 4) @llvm.riscv.vlseg4.triscv.vector.tuple_nxv16i8_4t(target("riscv.vector.tuple", <vscale x 16 x i8>, 4) undef, ptr %base, i64 %vl, i64 4) - %1 = call <vscale x 8 x bfloat> @llvm.riscv.tuple.extract.nxv8bf16.triscv.vector.tuple_nxv16i8_4t(target("riscv.vector.tuple", <vscale x 16 x i8>, 4) %0, i32 1) - ret <vscale x 8 x bfloat> %1 + ret target("riscv.vector.tuple", <vscale x 16 x i8>, 4) %0 } - -define <vscale x 8 x bfloat> @test_vlseg4_mask_nxv8bf16_triscv.vector.tuple_nxv16i8_4t(ptr %base, i64 %vl, <vscale x 8 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 16 x i8>, 4) @test_vlseg4_mask_nxv8bf16_triscv.vector.tuple_nxv16i8_4t(ptr %base, i64 %vl, <vscale x 8 x i1> %mask) { ; CHECK-LABEL: test_vlseg4_mask_nxv8bf16_triscv.vector.tuple_nxv16i8_4t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, ma -; CHECK-NEXT: vlseg4e16.v v6, (a0), v0.t +; CHECK-NEXT: vlseg4e16.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 4) @llvm.riscv.vlseg4.mask.triscv.vector.tuple_nxv16i8_4t.nxv8i1(target("riscv.vector.tuple", <vscale x 16 x i8>, 4) undef, ptr %base, <vscale x 8 x i1> %mask, i64 %vl, i64 1, i64 4) - %1 = call <vscale x 8 x bfloat> @llvm.riscv.tuple.extract.nxv8bf16.triscv.vector.tuple_nxv16i8_4t(target("riscv.vector.tuple", <vscale x 16 x i8>, 4) %0, i32 1) - ret <vscale x 8 x bfloat> %1 + ret target("riscv.vector.tuple", <vscale x 16 x i8>, 4) %0 } - - -define <vscale x 1 x bfloat> @test_vlseg5_nxv1bf16_triscv.vector.tuple_nxv2i8_5t(ptr %base, i64 %vl) { +define target("riscv.vector.tuple", <vscale x 2 x i8>, 5) @test_vlseg5_nxv1bf16_triscv.vector.tuple_nxv2i8_5t(ptr %base, i64 %vl) { ; CHECK-LABEL: test_vlseg5_nxv1bf16_triscv.vector.tuple_nxv2i8_5t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, ma -; CHECK-NEXT: vlseg5e16.v v7, (a0) +; CHECK-NEXT: vlseg5e16.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 2 x i8>, 5) @llvm.riscv.vlseg5.triscv.vector.tuple_nxv2i8_5t(target("riscv.vector.tuple", <vscale x 2 x i8>, 5) undef, ptr %base, i64 %vl, i64 4) - %1 = call <vscale x 1 x bfloat> @llvm.riscv.tuple.extract.nxv1bf16.triscv.vector.tuple_nxv2i8_5t(target("riscv.vector.tuple", <vscale x 2 x i8>, 5) %0, i32 1) - ret <vscale x 1 x bfloat> %1 + ret target("riscv.vector.tuple", <vscale x 2 x i8>, 5) %0 } - -define <vscale x 1 x bfloat> @test_vlseg5_mask_nxv1bf16_triscv.vector.tuple_nxv2i8_5t(ptr %base, i64 %vl, <vscale x 1 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 2 x i8>, 5) @test_vlseg5_mask_nxv1bf16_triscv.vector.tuple_nxv2i8_5t(ptr %base, i64 %vl, <vscale x 1 x i1> %mask) { ; CHECK-LABEL: test_vlseg5_mask_nxv1bf16_triscv.vector.tuple_nxv2i8_5t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, ma -; CHECK-NEXT: vlseg5e16.v v7, (a0), v0.t +; CHECK-NEXT: vlseg5e16.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 2 x i8>, 5) @llvm.riscv.vlseg5.mask.triscv.vector.tuple_nxv2i8_5t.nxv1i1(target("riscv.vector.tuple", <vscale x 2 x i8>, 5) undef, ptr %base, <vscale x 1 x i1> %mask, i64 %vl, i64 1, i64 4) - %1 = call <vscale x 1 x bfloat> @llvm.riscv.tuple.extract.nxv1bf16.triscv.vector.tuple_nxv2i8_5t(target("riscv.vector.tuple", <vscale x 2 x i8>, 5) %0, i32 1) - ret <vscale x 1 x bfloat> %1 + ret target("riscv.vector.tuple", <vscale x 2 x i8>, 5) %0 } - - -define <vscale x 2 x bfloat> @test_vlseg5_nxv2bf16_triscv.vector.tuple_nxv4i8_5t(ptr %base, i64 %vl) { +define target("riscv.vector.tuple", <vscale x 4 x i8>, 5) @test_vlseg5_nxv2bf16_triscv.vector.tuple_nxv4i8_5t(ptr %base, i64 %vl) { ; CHECK-LABEL: test_vlseg5_nxv2bf16_triscv.vector.tuple_nxv4i8_5t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, ma -; CHECK-NEXT: vlseg5e16.v v7, (a0) +; CHECK-NEXT: vlseg5e16.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 5) @llvm.riscv.vlseg5.triscv.vector.tuple_nxv4i8_5t(target("riscv.vector.tuple", <vscale x 4 x i8>, 5) undef, ptr %base, i64 %vl, i64 4) - %1 = call <vscale x 2 x bfloat> @llvm.riscv.tuple.extract.nxv2bf16.triscv.vector.tuple_nxv4i8_5t(target("riscv.vector.tuple", <vscale x 4 x i8>, 5) %0, i32 1) - ret <vscale x 2 x bfloat> %1 + ret target("riscv.vector.tuple", <vscale x 4 x i8>, 5) %0 } - -define <vscale x 2 x bfloat> @test_vlseg5_mask_nxv2bf16_triscv.vector.tuple_nxv4i8_5t(ptr %base, i64 %vl, <vscale x 2 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 4 x i8>, 5) @test_vlseg5_mask_nxv2bf16_triscv.vector.tuple_nxv4i8_5t(ptr %base, i64 %vl, <vscale x 2 x i1> %mask) { ; CHECK-LABEL: test_vlseg5_mask_nxv2bf16_triscv.vector.tuple_nxv4i8_5t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, ma -; CHECK-NEXT: vlseg5e16.v v7, (a0), v0.t +; CHECK-NEXT: vlseg5e16.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 5) @llvm.riscv.vlseg5.mask.triscv.vector.tuple_nxv4i8_5t.nxv2i1(target("riscv.vector.tuple", <vscale x 4 x i8>, 5) undef, ptr %base, <vscale x 2 x i1> %mask, i64 %vl, i64 1, i64 4) - %1 = call <vscale x 2 x bfloat> @llvm.riscv.tuple.extract.nxv2bf16.triscv.vector.tuple_nxv4i8_5t(target("riscv.vector.tuple", <vscale x 4 x i8>, 5) %0, i32 1) - ret <vscale x 2 x bfloat> %1 + ret target("riscv.vector.tuple", <vscale x 4 x i8>, 5) %0 } - - -define <vscale x 4 x bfloat> @test_vlseg5_nxv4bf16_triscv.vector.tuple_nxv8i8_5t(ptr %base, i64 %vl) { +define target("riscv.vector.tuple", <vscale x 8 x i8>, 5) @test_vlseg5_nxv4bf16_triscv.vector.tuple_nxv8i8_5t(ptr %base, i64 %vl) { ; CHECK-LABEL: test_vlseg5_nxv4bf16_triscv.vector.tuple_nxv8i8_5t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma -; CHECK-NEXT: vlseg5e16.v v7, (a0) +; CHECK-NEXT: vlseg5e16.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 5) @llvm.riscv.vlseg5.triscv.vector.tuple_nxv8i8_5t(target("riscv.vector.tuple", <vscale x 8 x i8>, 5) undef, ptr %base, i64 %vl, i64 4) - %1 = call <vscale x 4 x bfloat> @llvm.riscv.tuple.extract.nxv4bf16.triscv.vector.tuple_nxv8i8_5t(target("riscv.vector.tuple", <vscale x 8 x i8>, 5) %0, i32 1) - ret <vscale x 4 x bfloat> %1 + ret target("riscv.vector.tuple", <vscale x 8 x i8>, 5) %0 } - -define <vscale x 4 x bfloat> @test_vlseg5_mask_nxv4bf16_triscv.vector.tuple_nxv8i8_5t(ptr %base, i64 %vl, <vscale x 4 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 8 x i8>, 5) @test_vlseg5_mask_nxv4bf16_triscv.vector.tuple_nxv8i8_5t(ptr %base, i64 %vl, <vscale x 4 x i1> %mask) { ; CHECK-LABEL: test_vlseg5_mask_nxv4bf16_triscv.vector.tuple_nxv8i8_5t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma -; CHECK-NEXT: vlseg5e16.v v7, (a0), v0.t +; CHECK-NEXT: vlseg5e16.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 5) @llvm.riscv.vlseg5.mask.triscv.vector.tuple_nxv8i8_5t.nxv4i1(target("riscv.vector.tuple", <vscale x 8 x i8>, 5) undef, ptr %base, <vscale x 4 x i1> %mask, i64 %vl, i64 1, i64 4) - %1 = call <vscale x 4 x bfloat> @llvm.riscv.tuple.extract.nxv4bf16.triscv.vector.tuple_nxv8i8_5t(target("riscv.vector.tuple", <vscale x 8 x i8>, 5) %0, i32 1) - ret <vscale x 4 x bfloat> %1 + ret target("riscv.vector.tuple", <vscale x 8 x i8>, 5) %0 } - - -define <vscale x 1 x bfloat> @test_vlseg6_nxv1bf16_triscv.vector.tuple_nxv2i8_6t(ptr %base, i64 %vl) { +define target("riscv.vector.tuple", <vscale x 2 x i8>, 6) @test_vlseg6_nxv1bf16_triscv.vector.tuple_nxv2i8_6t(ptr %base, i64 %vl) { ; CHECK-LABEL: test_vlseg6_nxv1bf16_triscv.vector.tuple_nxv2i8_6t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, ma -; CHECK-NEXT: vlseg6e16.v v7, (a0) +; CHECK-NEXT: vlseg6e16.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 2 x i8>, 6) @llvm.riscv.vlseg6.triscv.vector.tuple_nxv2i8_6t(target("riscv.vector.tuple", <vscale x 2 x i8>, 6) undef, ptr %base, i64 %vl, i64 4) - %1 = call <vscale x 1 x bfloat> @llvm.riscv.tuple.extract.nxv1bf16.triscv.vector.tuple_nxv2i8_6t(target("riscv.vector.tuple", <vscale x 2 x i8>, 6) %0, i32 1) - ret <vscale x 1 x bfloat> %1 + ret target("riscv.vector.tuple", <vscale x 2 x i8>, 6) %0 } - -define <vscale x 1 x bfloat> @test_vlseg6_mask_nxv1bf16_triscv.vector.tuple_nxv2i8_6t(ptr %base, i64 %vl, <vscale x 1 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 2 x i8>, 6) @test_vlseg6_mask_nxv1bf16_triscv.vector.tuple_nxv2i8_6t(ptr %base, i64 %vl, <vscale x 1 x i1> %mask) { ; CHECK-LABEL: test_vlseg6_mask_nxv1bf16_triscv.vector.tuple_nxv2i8_6t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, ma -; CHECK-NEXT: vlseg6e16.v v7, (a0), v0.t +; CHECK-NEXT: vlseg6e16.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 2 x i8>, 6) @llvm.riscv.vlseg6.mask.triscv.vector.tuple_nxv2i8_6t.nxv1i1(target("riscv.vector.tuple", <vscale x 2 x i8>, 6) undef, ptr %base, <vscale x 1 x i1> %mask, i64 %vl, i64 1, i64 4) - %1 = call <vscale x 1 x bfloat> @llvm.riscv.tuple.extract.nxv1bf16.triscv.vector.tuple_nxv2i8_6t(target("riscv.vector.tuple", <vscale x 2 x i8>, 6) %0, i32 1) - ret <vscale x 1 x bfloat> %1 + ret target("riscv.vector.tuple", <vscale x 2 x i8>, 6) %0 } - - -define <vscale x 2 x bfloat> @test_vlseg6_nxv2bf16_triscv.vector.tuple_nxv4i8_6t(ptr %base, i64 %vl) { +define target("riscv.vector.tuple", <vscale x 4 x i8>, 6) @test_vlseg6_nxv2bf16_triscv.vector.tuple_nxv4i8_6t(ptr %base, i64 %vl) { ; CHECK-LABEL: test_vlseg6_nxv2bf16_triscv.vector.tuple_nxv4i8_6t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, ma -; CHECK-NEXT: vlseg6e16.v v7, (a0) +; CHECK-NEXT: vlseg6e16.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 6) @llvm.riscv.vlseg6.triscv.vector.tuple_nxv4i8_6t(target("riscv.vector.tuple", <vscale x 4 x i8>, 6) undef, ptr %base, i64 %vl, i64 4) - %1 = call <vscale x 2 x bfloat> @llvm.riscv.tuple.extract.nxv2bf16.triscv.vector.tuple_nxv4i8_6t(target("riscv.vector.tuple", <vscale x 4 x i8>, 6) %0, i32 1) - ret <vscale x 2 x bfloat> %1 + ret target("riscv.vector.tuple", <vscale x 4 x i8>, 6) %0 } - -define <vscale x 2 x bfloat> @test_vlseg6_mask_nxv2bf16_triscv.vector.tuple_nxv4i8_6t(ptr %base, i64 %vl, <vscale x 2 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 4 x i8>, 6) @test_vlseg6_mask_nxv2bf16_triscv.vector.tuple_nxv4i8_6t(ptr %base, i64 %vl, <vscale x 2 x i1> %mask) { ; CHECK-LABEL: test_vlseg6_mask_nxv2bf16_triscv.vector.tuple_nxv4i8_6t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, ma -; CHECK-NEXT: vlseg6e16.v v7, (a0), v0.t +; CHECK-NEXT: vlseg6e16.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 6) @llvm.riscv.vlseg6.mask.triscv.vector.tuple_nxv4i8_6t.nxv2i1(target("riscv.vector.tuple", <vscale x 4 x i8>, 6) undef, ptr %base, <vscale x 2 x i1> %mask, i64 %vl, i64 1, i64 4) - %1 = call <vscale x 2 x bfloat> @llvm.riscv.tuple.extract.nxv2bf16.triscv.vector.tuple_nxv4i8_6t(target("riscv.vector.tuple", <vscale x 4 x i8>, 6) %0, i32 1) - ret <vscale x 2 x bfloat> %1 + ret target("riscv.vector.tuple", <vscale x 4 x i8>, 6) %0 } - - -define <vscale x 4 x bfloat> @test_vlseg6_nxv4bf16_triscv.vector.tuple_nxv8i8_6t(ptr %base, i64 %vl) { +define target("riscv.vector.tuple", <vscale x 8 x i8>, 6) @test_vlseg6_nxv4bf16_triscv.vector.tuple_nxv8i8_6t(ptr %base, i64 %vl) { ; CHECK-LABEL: test_vlseg6_nxv4bf16_triscv.vector.tuple_nxv8i8_6t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma -; CHECK-NEXT: vlseg6e16.v v7, (a0) +; CHECK-NEXT: vlseg6e16.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 6) @llvm.riscv.vlseg6.triscv.vector.tuple_nxv8i8_6t(target("riscv.vector.tuple", <vscale x 8 x i8>, 6) undef, ptr %base, i64 %vl, i64 4) - %1 = call <vscale x 4 x bfloat> @llvm.riscv.tuple.extract.nxv4bf16.triscv.vector.tuple_nxv8i8_6t(target("riscv.vector.tuple", <vscale x 8 x i8>, 6) %0, i32 1) - ret <vscale x 4 x bfloat> %1 + ret target("riscv.vector.tuple", <vscale x 8 x i8>, 6) %0 } - -define <vscale x 4 x bfloat> @test_vlseg6_mask_nxv4bf16_triscv.vector.tuple_nxv8i8_6t(ptr %base, i64 %vl, <vscale x 4 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 8 x i8>, 6) @test_vlseg6_mask_nxv4bf16_triscv.vector.tuple_nxv8i8_6t(ptr %base, i64 %vl, <vscale x 4 x i1> %mask) { ; CHECK-LABEL: test_vlseg6_mask_nxv4bf16_triscv.vector.tuple_nxv8i8_6t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma -; CHECK-NEXT: vlseg6e16.v v7, (a0), v0.t +; CHECK-NEXT: vlseg6e16.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 6) @llvm.riscv.vlseg6.mask.triscv.vector.tuple_nxv8i8_6t.nxv4i1(target("riscv.vector.tuple", <vscale x 8 x i8>, 6) undef, ptr %base, <vscale x 4 x i1> %mask, i64 %vl, i64 1, i64 4) - %1 = call <vscale x 4 x bfloat> @llvm.riscv.tuple.extract.nxv4bf16.triscv.vector.tuple_nxv8i8_6t(target("riscv.vector.tuple", <vscale x 8 x i8>, 6) %0, i32 1) - ret <vscale x 4 x bfloat> %1 + ret target("riscv.vector.tuple", <vscale x 8 x i8>, 6) %0 } - - -define <vscale x 1 x bfloat> @test_vlseg7_nxv1bf16_triscv.vector.tuple_nxv2i8_7t(ptr %base, i64 %vl) { +define target("riscv.vector.tuple", <vscale x 2 x i8>, 7) @test_vlseg7_nxv1bf16_triscv.vector.tuple_nxv2i8_7t(ptr %base, i64 %vl) { ; CHECK-LABEL: test_vlseg7_nxv1bf16_triscv.vector.tuple_nxv2i8_7t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, ma -; CHECK-NEXT: vlseg7e16.v v7, (a0) +; CHECK-NEXT: vlseg7e16.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 2 x i8>, 7) @llvm.riscv.vlseg7.triscv.vector.tuple_nxv2i8_7t(target("riscv.vector.tuple", <vscale x 2 x i8>, 7) undef, ptr %base, i64 %vl, i64 4) - %1 = call <vscale x 1 x bfloat> @llvm.riscv.tuple.extract.nxv1bf16.triscv.vector.tuple_nxv2i8_7t(target("riscv.vector.tuple", <vscale x 2 x i8>, 7) %0, i32 1) - ret <vscale x 1 x bfloat> %1 + ret target("riscv.vector.tuple", <vscale x 2 x i8>, 7) %0 } - -define <vscale x 1 x bfloat> @test_vlseg7_mask_nxv1bf16_triscv.vector.tuple_nxv2i8_7t(ptr %base, i64 %vl, <vscale x 1 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 2 x i8>, 7) @test_vlseg7_mask_nxv1bf16_triscv.vector.tuple_nxv2i8_7t(ptr %base, i64 %vl, <vscale x 1 x i1> %mask) { ; CHECK-LABEL: test_vlseg7_mask_nxv1bf16_triscv.vector.tuple_nxv2i8_7t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, ma -; CHECK-NEXT: vlseg7e16.v v7, (a0), v0.t +; CHECK-NEXT: vlseg7e16.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 2 x i8>, 7) @llvm.riscv.vlseg7.mask.triscv.vector.tuple_nxv2i8_7t.nxv1i1(target("riscv.vector.tuple", <vscale x 2 x i8>, 7) undef, ptr %base, <vscale x 1 x i1> %mask, i64 %vl, i64 1, i64 4) - %1 = call <vscale x 1 x bfloat> @llvm.riscv.tuple.extract.nxv1bf16.triscv.vector.tuple_nxv2i8_7t(target("riscv.vector.tuple", <vscale x 2 x i8>, 7) %0, i32 1) - ret <vscale x 1 x bfloat> %1 + ret target("riscv.vector.tuple", <vscale x 2 x i8>, 7) %0 } - - -define <vscale x 2 x bfloat> @test_vlseg7_nxv2bf16_triscv.vector.tuple_nxv4i8_7t(ptr %base, i64 %vl) { +define target("riscv.vector.tuple", <vscale x 4 x i8>, 7) @test_vlseg7_nxv2bf16_triscv.vector.tuple_nxv4i8_7t(ptr %base, i64 %vl) { ; CHECK-LABEL: test_vlseg7_nxv2bf16_triscv.vector.tuple_nxv4i8_7t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, ma -; CHECK-NEXT: vlseg7e16.v v7, (a0) +; CHECK-NEXT: vlseg7e16.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 7) @llvm.riscv.vlseg7.triscv.vector.tuple_nxv4i8_7t(target("riscv.vector.tuple", <vscale x 4 x i8>, 7) undef, ptr %base, i64 %vl, i64 4) - %1 = call <vscale x 2 x bfloat> @llvm.riscv.tuple.extract.nxv2bf16.triscv.vector.tuple_nxv4i8_7t(target("riscv.vector.tuple", <vscale x 4 x i8>, 7) %0, i32 1) - ret <vscale x 2 x bfloat> %1 + ret target("riscv.vector.tuple", <vscale x 4 x i8>, 7) %0 } - -define <vscale x 2 x bfloat> @test_vlseg7_mask_nxv2bf16_triscv.vector.tuple_nxv4i8_7t(ptr %base, i64 %vl, <vscale x 2 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 4 x i8>, 7) @test_vlseg7_mask_nxv2bf16_triscv.vector.tuple_nxv4i8_7t(ptr %base, i64 %vl, <vscale x 2 x i1> %mask) { ; CHECK-LABEL: test_vlseg7_mask_nxv2bf16_triscv.vector.tuple_nxv4i8_7t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, ma -; CHECK-NEXT: vlseg7e16.v v7, (a0), v0.t +; CHECK-NEXT: vlseg7e16.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 7) @llvm.riscv.vlseg7.mask.triscv.vector.tuple_nxv4i8_7t.nxv2i1(target("riscv.vector.tuple", <vscale x 4 x i8>, 7) undef, ptr %base, <vscale x 2 x i1> %mask, i64 %vl, i64 1, i64 4) - %1 = call <vscale x 2 x bfloat> @llvm.riscv.tuple.extract.nxv2bf16.triscv.vector.tuple_nxv4i8_7t(target("riscv.vector.tuple", <vscale x 4 x i8>, 7) %0, i32 1) - ret <vscale x 2 x bfloat> %1 + ret target("riscv.vector.tuple", <vscale x 4 x i8>, 7) %0 } - - -define <vscale x 4 x bfloat> @test_vlseg7_nxv4bf16_triscv.vector.tuple_nxv8i8_7t(ptr %base, i64 %vl) { +define target("riscv.vector.tuple", <vscale x 8 x i8>, 7) @test_vlseg7_nxv4bf16_triscv.vector.tuple_nxv8i8_7t(ptr %base, i64 %vl) { ; CHECK-LABEL: test_vlseg7_nxv4bf16_triscv.vector.tuple_nxv8i8_7t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma -; CHECK-NEXT: vlseg7e16.v v7, (a0) +; CHECK-NEXT: vlseg7e16.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 7) @llvm.riscv.vlseg7.triscv.vector.tuple_nxv8i8_7t(target("riscv.vector.tuple", <vscale x 8 x i8>, 7) undef, ptr %base, i64 %vl, i64 4) - %1 = call <vscale x 4 x bfloat> @llvm.riscv.tuple.extract.nxv4bf16.triscv.vector.tuple_nxv8i8_7t(target("riscv.vector.tuple", <vscale x 8 x i8>, 7) %0, i32 1) - ret <vscale x 4 x bfloat> %1 + ret target("riscv.vector.tuple", <vscale x 8 x i8>, 7) %0 } - -define <vscale x 4 x bfloat> @test_vlseg7_mask_nxv4bf16_triscv.vector.tuple_nxv8i8_7t(ptr %base, i64 %vl, <vscale x 4 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 8 x i8>, 7) @test_vlseg7_mask_nxv4bf16_triscv.vector.tuple_nxv8i8_7t(ptr %base, i64 %vl, <vscale x 4 x i1> %mask) { ; CHECK-LABEL: test_vlseg7_mask_nxv4bf16_triscv.vector.tuple_nxv8i8_7t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma -; CHECK-NEXT: vlseg7e16.v v7, (a0), v0.t +; CHECK-NEXT: vlseg7e16.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 7) @llvm.riscv.vlseg7.mask.triscv.vector.tuple_nxv8i8_7t.nxv4i1(target("riscv.vector.tuple", <vscale x 8 x i8>, 7) undef, ptr %base, <vscale x 4 x i1> %mask, i64 %vl, i64 1, i64 4) - %1 = call <vscale x 4 x bfloat> @llvm.riscv.tuple.extract.nxv4bf16.triscv.vector.tuple_nxv8i8_7t(target("riscv.vector.tuple", <vscale x 8 x i8>, 7) %0, i32 1) - ret <vscale x 4 x bfloat> %1 + ret target("riscv.vector.tuple", <vscale x 8 x i8>, 7) %0 } - - -define <vscale x 1 x bfloat> @test_vlseg8_nxv1bf16_triscv.vector.tuple_nxv2i8_8t(ptr %base, i64 %vl) { +define target("riscv.vector.tuple", <vscale x 2 x i8>, 8) @test_vlseg8_nxv1bf16_triscv.vector.tuple_nxv2i8_8t(ptr %base, i64 %vl) { ; CHECK-LABEL: test_vlseg8_nxv1bf16_triscv.vector.tuple_nxv2i8_8t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, ma -; CHECK-NEXT: vlseg8e16.v v7, (a0) +; CHECK-NEXT: vlseg8e16.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 2 x i8>, 8) @llvm.riscv.vlseg8.triscv.vector.tuple_nxv2i8_8t(target("riscv.vector.tuple", <vscale x 2 x i8>, 8) undef, ptr %base, i64 %vl, i64 4) - %1 = call <vscale x 1 x bfloat> @llvm.riscv.tuple.extract.nxv1bf16.triscv.vector.tuple_nxv2i8_8t(target("riscv.vector.tuple", <vscale x 2 x i8>, 8) %0, i32 1) - ret <vscale x 1 x bfloat> %1 + ret target("riscv.vector.tuple", <vscale x 2 x i8>, 8) %0 } - -define <vscale x 1 x bfloat> @test_vlseg8_mask_nxv1bf16_triscv.vector.tuple_nxv2i8_8t(ptr %base, i64 %vl, <vscale x 1 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 2 x i8>, 8) @test_vlseg8_mask_nxv1bf16_triscv.vector.tuple_nxv2i8_8t(ptr %base, i64 %vl, <vscale x 1 x i1> %mask) { ; CHECK-LABEL: test_vlseg8_mask_nxv1bf16_triscv.vector.tuple_nxv2i8_8t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, ma -; CHECK-NEXT: vlseg8e16.v v7, (a0), v0.t +; CHECK-NEXT: vlseg8e16.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 2 x i8>, 8) @llvm.riscv.vlseg8.mask.triscv.vector.tuple_nxv2i8_8t.nxv1i1(target("riscv.vector.tuple", <vscale x 2 x i8>, 8) undef, ptr %base, <vscale x 1 x i1> %mask, i64 %vl, i64 1, i64 4) - %1 = call <vscale x 1 x bfloat> @llvm.riscv.tuple.extract.nxv1bf16.triscv.vector.tuple_nxv2i8_8t(target("riscv.vector.tuple", <vscale x 2 x i8>, 8) %0, i32 1) - ret <vscale x 1 x bfloat> %1 + ret target("riscv.vector.tuple", <vscale x 2 x i8>, 8) %0 } - - -define <vscale x 2 x bfloat> @test_vlseg8_nxv2bf16_triscv.vector.tuple_nxv4i8_8t(ptr %base, i64 %vl) { +define target("riscv.vector.tuple", <vscale x 4 x i8>, 8) @test_vlseg8_nxv2bf16_triscv.vector.tuple_nxv4i8_8t(ptr %base, i64 %vl) { ; CHECK-LABEL: test_vlseg8_nxv2bf16_triscv.vector.tuple_nxv4i8_8t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, ma -; CHECK-NEXT: vlseg8e16.v v7, (a0) +; CHECK-NEXT: vlseg8e16.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 8) @llvm.riscv.vlseg8.triscv.vector.tuple_nxv4i8_8t(target("riscv.vector.tuple", <vscale x 4 x i8>, 8) undef, ptr %base, i64 %vl, i64 4) - %1 = call <vscale x 2 x bfloat> @llvm.riscv.tuple.extract.nxv2bf16.triscv.vector.tuple_nxv4i8_8t(target("riscv.vector.tuple", <vscale x 4 x i8>, 8) %0, i32 1) - ret <vscale x 2 x bfloat> %1 + ret target("riscv.vector.tuple", <vscale x 4 x i8>, 8) %0 } - -define <vscale x 2 x bfloat> @test_vlseg8_mask_nxv2bf16_triscv.vector.tuple_nxv4i8_8t(ptr %base, i64 %vl, <vscale x 2 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 4 x i8>, 8) @test_vlseg8_mask_nxv2bf16_triscv.vector.tuple_nxv4i8_8t(ptr %base, i64 %vl, <vscale x 2 x i1> %mask) { ; CHECK-LABEL: test_vlseg8_mask_nxv2bf16_triscv.vector.tuple_nxv4i8_8t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, ma -; CHECK-NEXT: vlseg8e16.v v7, (a0), v0.t +; CHECK-NEXT: vlseg8e16.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 8) @llvm.riscv.vlseg8.mask.triscv.vector.tuple_nxv4i8_8t.nxv2i1(target("riscv.vector.tuple", <vscale x 4 x i8>, 8) undef, ptr %base, <vscale x 2 x i1> %mask, i64 %vl, i64 1, i64 4) - %1 = call <vscale x 2 x bfloat> @llvm.riscv.tuple.extract.nxv2bf16.triscv.vector.tuple_nxv4i8_8t(target("riscv.vector.tuple", <vscale x 4 x i8>, 8) %0, i32 1) - ret <vscale x 2 x bfloat> %1 + ret target("riscv.vector.tuple", <vscale x 4 x i8>, 8) %0 } - - -define <vscale x 4 x bfloat> @test_vlseg8_nxv4bf16_triscv.vector.tuple_nxv8i8_8t(ptr %base, i64 %vl) { +define target("riscv.vector.tuple", <vscale x 8 x i8>, 8) @test_vlseg8_nxv4bf16_triscv.vector.tuple_nxv8i8_8t(ptr %base, i64 %vl) { ; CHECK-LABEL: test_vlseg8_nxv4bf16_triscv.vector.tuple_nxv8i8_8t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma -; CHECK-NEXT: vlseg8e16.v v7, (a0) +; CHECK-NEXT: vlseg8e16.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 8) @llvm.riscv.vlseg8.triscv.vector.tuple_nxv8i8_8t(target("riscv.vector.tuple", <vscale x 8 x i8>, 8) undef, ptr %base, i64 %vl, i64 4) - %1 = call <vscale x 4 x bfloat> @llvm.riscv.tuple.extract.nxv4bf16.triscv.vector.tuple_nxv8i8_8t(target("riscv.vector.tuple", <vscale x 8 x i8>, 8) %0, i32 1) - ret <vscale x 4 x bfloat> %1 + ret target("riscv.vector.tuple", <vscale x 8 x i8>, 8) %0 } - -define <vscale x 4 x bfloat> @test_vlseg8_mask_nxv4bf16_triscv.vector.tuple_nxv8i8_8t(ptr %base, i64 %vl, <vscale x 4 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 8 x i8>, 8) @test_vlseg8_mask_nxv4bf16_triscv.vector.tuple_nxv8i8_8t(ptr %base, i64 %vl, <vscale x 4 x i1> %mask) { ; CHECK-LABEL: test_vlseg8_mask_nxv4bf16_triscv.vector.tuple_nxv8i8_8t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma -; CHECK-NEXT: vlseg8e16.v v7, (a0), v0.t +; CHECK-NEXT: vlseg8e16.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 8) @llvm.riscv.vlseg8.mask.triscv.vector.tuple_nxv8i8_8t.nxv4i1(target("riscv.vector.tuple", <vscale x 8 x i8>, 8) undef, ptr %base, <vscale x 4 x i1> %mask, i64 %vl, i64 1, i64 4) - %1 = call <vscale x 4 x bfloat> @llvm.riscv.tuple.extract.nxv4bf16.triscv.vector.tuple_nxv8i8_8t(target("riscv.vector.tuple", <vscale x 8 x i8>, 8) %0, i32 1) - ret <vscale x 4 x bfloat> %1 + ret target("riscv.vector.tuple", <vscale x 8 x i8>, 8) %0 } - diff --git a/llvm/test/CodeGen/RISCV/rvv/vmv.v.v-peephole.ll b/llvm/test/CodeGen/RISCV/rvv/vmv.v.v-peephole.ll index 1e2e779..2f2035b 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vmv.v.v-peephole.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vmv.v.v-peephole.ll @@ -222,3 +222,14 @@ define <vscale x 1 x i64> @vleff_move_past_passthru(ptr %p, ptr %q, iXLen %avl) %b = call <vscale x 1 x i64> @llvm.riscv.vmv.v.v.nxv1i64(<vscale x 1 x i64> %passthru, <vscale x 1 x i64> %vec, iXLen %avl) ret <vscale x 1 x i64> %b } + +define <vscale x 1 x i64> @vmerge(<vscale x 1 x i64> %passthru, <vscale x 1 x i64> %x, <vscale x 1 x i64> %y, <vscale x 1 x i1> %m, iXLen %avl) { +; CHECK-LABEL: vmerge: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m1, tu, ma +; CHECK-NEXT: vmerge.vvm v8, v9, v10, v0 +; CHECK-NEXT: ret + %a = call <vscale x 1 x i64> @llvm.riscv.vmerge.nxv1i64.nxv1i64(<vscale x 1 x i64> %passthru, <vscale x 1 x i64> %x, <vscale x 1 x i64> %y, <vscale x 1 x i1> %m, iXLen %avl) + %b = call <vscale x 1 x i64> @llvm.riscv.vmv.v.v.nxv1i64(<vscale x 1 x i64> %passthru, <vscale x 1 x i64> %a, iXLen %avl) + ret <vscale x 1 x i64> %b +} diff --git a/llvm/test/CodeGen/RISCV/rvv/vmv.v.v-peephole.mir b/llvm/test/CodeGen/RISCV/rvv/vmv.v.v-peephole.mir index 6e106e5..9c3e96d 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vmv.v.v-peephole.mir +++ b/llvm/test/CodeGen/RISCV/rvv/vmv.v.v-peephole.mir @@ -152,3 +152,19 @@ body: | %y:gpr = ADDI $x0, 1 %z:vr = PseudoVMV_V_V_M1 %passthru, %x, 4, 5 /* e32 */, 0 /* tu, mu */ ... +--- +name: vmerge_vvm +body: | + bb.0: + liveins: $v8, $v0 + ; CHECK-LABEL: name: vmerge_vvm + ; CHECK: liveins: $v8, $v0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: %passthru:vrnov0 = COPY $v8 + ; CHECK-NEXT: %mask:vmv0 = COPY $v0 + ; CHECK-NEXT: %x:vrnov0 = PseudoVMERGE_VVM_M1 %passthru, %passthru, $noreg, %mask, 4, 5 /* e32 */ + %passthru:vr = COPY $v8 + %mask:vmv0 = COPY $v0 + %x:vrnov0 = PseudoVMERGE_VVM_M1 $noreg, %passthru, $noreg, %mask, 4, 5 /* e32 */ + %z:vr = PseudoVMV_V_V_M1 %passthru, %x, 4, 5 /* e32 */, 0 /* tu, mu */ +... diff --git a/llvm/test/CodeGen/RISCV/rvv/vp-vector-interleaved-access.ll b/llvm/test/CodeGen/RISCV/rvv/vp-vector-interleaved-access.ll index 23c0c82..2afb72f 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vp-vector-interleaved-access.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vp-vector-interleaved-access.ll @@ -674,16 +674,20 @@ define <vscale x 2 x i32> @load_factor2_oneactive(ptr %ptr, i32 %evl) { define <vscale x 2 x i32> @load_factor5_oneactive(ptr %ptr, i32 %evl) { ; RV32-LABEL: load_factor5_oneactive: ; RV32: # %bb.0: +; RV32-NEXT: addi a0, a0, 12 +; RV32-NEXT: li a2, 20 ; RV32-NEXT: vsetvli zero, a1, e32, m1, ta, ma -; RV32-NEXT: vlseg5e32.v v5, (a0) +; RV32-NEXT: vlse32.v v8, (a0), a2 ; RV32-NEXT: ret ; ; RV64-LABEL: load_factor5_oneactive: ; RV64: # %bb.0: ; RV64-NEXT: slli a1, a1, 32 +; RV64-NEXT: addi a0, a0, 12 ; RV64-NEXT: srli a1, a1, 32 +; RV64-NEXT: li a2, 20 ; RV64-NEXT: vsetvli zero, a1, e32, m1, ta, ma -; RV64-NEXT: vlseg5e32.v v5, (a0) +; RV64-NEXT: vlse32.v v8, (a0), a2 ; RV64-NEXT: ret %rvl = mul nuw i32 %evl, 5 %wide.masked.load = call <vscale x 10 x i32> @llvm.vp.load(ptr %ptr, <vscale x 10 x i1> splat (i1 true), i32 %rvl) diff --git a/llvm/test/CodeGen/RISCV/rvv/vxrm.mir b/llvm/test/CodeGen/RISCV/rvv/vxrm.mir index 2bac1ee..87787c1 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vxrm.mir +++ b/llvm/test/CodeGen/RISCV/rvv/vxrm.mir @@ -13,7 +13,7 @@ body: | ; MIR-NEXT: {{ $}} ; MIR-NEXT: WriteVXRMImm 0, implicit-def $vxrm ; MIR-NEXT: dead $x0 = PseudoVSETVLI killed renamable $x10, 197 /* e8, mf8, ta, ma */, implicit-def $vl, implicit-def $vtype - ; MIR-NEXT: renamable $v8 = PseudoVAADD_VV_MF8 undef $v8, killed renamable $v8, killed renamable $v9, 0, $noreg, 3 /* e8 */, 0 /* tu, mu */, implicit $vxrm, implicit $vl, implicit $vtype + ; MIR-NEXT: renamable $v8 = PseudoVAADD_VV_MF8 undef renamable $v8, killed renamable $v8, killed renamable $v9, 0, $noreg, 3 /* e8 */, 0 /* tu, mu */, implicit $vxrm, implicit $vl, implicit $vtype ; MIR-NEXT: PseudoRET implicit $v8 ; ASM-LABEL: verify_vxrm: ; ASM: # %bb.0: @@ -24,6 +24,7 @@ body: | %0:vr = COPY $v8 %1:vr = COPY $v9 %2:gprnox0 = COPY $x10 - renamable $v8 = PseudoVAADD_VV_MF8 undef $noreg, %0, %1, 0, %2, 3 /* e8 */, 0 + %3:vr = PseudoVAADD_VV_MF8 undef $noreg, %0, %1, 0, %2, 3 /* e8 */, 0 + $v8 = COPY %3 PseudoRET implicit $v8 ... diff --git a/llvm/test/CodeGen/RISCV/wide-scalar-shift-legalization.ll b/llvm/test/CodeGen/RISCV/wide-scalar-shift-legalization.ll index 32753ca..cd7f30d 100644 --- a/llvm/test/CodeGen/RISCV/wide-scalar-shift-legalization.ll +++ b/llvm/test/CodeGen/RISCV/wide-scalar-shift-legalization.ll @@ -716,92 +716,101 @@ define void @lshr_16bytes(ptr %src.ptr, ptr %bitOff.ptr, ptr %dst) nounwind { ; RV32I-NEXT: slli a4, a4, 8 ; RV32I-NEXT: slli a5, a5, 16 ; RV32I-NEXT: slli a6, a6, 24 +; RV32I-NEXT: or a3, a4, a3 +; RV32I-NEXT: or a4, a6, a5 +; RV32I-NEXT: lbu a5, 8(a0) +; RV32I-NEXT: lbu a6, 9(a0) +; RV32I-NEXT: lbu t3, 10(a0) +; RV32I-NEXT: lbu t4, 11(a0) ; RV32I-NEXT: slli t0, t0, 8 -; RV32I-NEXT: or a4, a4, a3 -; RV32I-NEXT: or a5, a6, a5 -; RV32I-NEXT: or a3, t0, a7 -; RV32I-NEXT: lbu a6, 8(a0) -; RV32I-NEXT: lbu a7, 9(a0) -; RV32I-NEXT: lbu t0, 10(a0) -; RV32I-NEXT: lbu t3, 11(a0) ; RV32I-NEXT: slli t1, t1, 16 ; RV32I-NEXT: slli t2, t2, 24 -; RV32I-NEXT: slli a7, a7, 8 -; RV32I-NEXT: slli t0, t0, 16 -; RV32I-NEXT: slli t3, t3, 24 -; RV32I-NEXT: or t1, t2, t1 -; RV32I-NEXT: or a6, a7, a6 -; RV32I-NEXT: or a7, t3, t0 -; RV32I-NEXT: lbu t0, 12(a0) -; RV32I-NEXT: lbu t2, 13(a0) -; RV32I-NEXT: lbu t3, 14(a0) -; RV32I-NEXT: lbu t4, 15(a0) -; RV32I-NEXT: lbu a0, 0(a1) +; RV32I-NEXT: slli a6, a6, 8 +; RV32I-NEXT: or a7, t0, a7 +; RV32I-NEXT: or t0, t2, t1 +; RV32I-NEXT: or a5, a6, a5 +; RV32I-NEXT: lbu a6, 12(a0) +; RV32I-NEXT: lbu t1, 13(a0) +; RV32I-NEXT: lbu t2, 14(a0) +; RV32I-NEXT: lbu a0, 15(a0) +; RV32I-NEXT: slli t3, t3, 16 +; RV32I-NEXT: slli t4, t4, 24 +; RV32I-NEXT: slli t1, t1, 8 +; RV32I-NEXT: slli t2, t2, 16 +; RV32I-NEXT: slli a0, a0, 24 +; RV32I-NEXT: or t3, t4, t3 +; RV32I-NEXT: or a6, t1, a6 +; RV32I-NEXT: or a0, a0, t2 +; RV32I-NEXT: lbu t1, 1(a1) +; RV32I-NEXT: lbu t2, 0(a1) +; RV32I-NEXT: lbu t4, 2(a1) +; RV32I-NEXT: lbu a1, 3(a1) +; RV32I-NEXT: slli t1, t1, 8 +; RV32I-NEXT: or t1, t1, t2 ; RV32I-NEXT: sw zero, 16(sp) ; RV32I-NEXT: sw zero, 20(sp) ; RV32I-NEXT: sw zero, 24(sp) ; RV32I-NEXT: sw zero, 28(sp) -; RV32I-NEXT: slli t2, t2, 8 -; RV32I-NEXT: or a1, t2, t0 -; RV32I-NEXT: mv t0, sp -; RV32I-NEXT: slli t3, t3, 16 -; RV32I-NEXT: slli t4, t4, 24 -; RV32I-NEXT: or t2, t4, t3 -; RV32I-NEXT: srli t3, a0, 3 -; RV32I-NEXT: or a4, a5, a4 -; RV32I-NEXT: andi a5, a0, 31 -; RV32I-NEXT: andi t3, t3, 12 -; RV32I-NEXT: xori a5, a5, 31 -; RV32I-NEXT: or a3, t1, a3 -; RV32I-NEXT: or a6, a7, a6 -; RV32I-NEXT: or a1, t2, a1 -; RV32I-NEXT: add t0, t0, t3 -; RV32I-NEXT: sw a4, 0(sp) -; RV32I-NEXT: sw a3, 4(sp) -; RV32I-NEXT: sw a6, 8(sp) -; RV32I-NEXT: sw a1, 12(sp) -; RV32I-NEXT: lw a1, 4(t0) -; RV32I-NEXT: lw a3, 8(t0) -; RV32I-NEXT: lw a4, 0(t0) -; RV32I-NEXT: lw a6, 12(t0) -; RV32I-NEXT: srl a7, a1, a0 -; RV32I-NEXT: slli t0, a3, 1 -; RV32I-NEXT: srl a4, a4, a0 -; RV32I-NEXT: slli a1, a1, 1 -; RV32I-NEXT: srl a3, a3, a0 -; RV32I-NEXT: slli t1, a6, 1 -; RV32I-NEXT: srl a0, a6, a0 -; RV32I-NEXT: sll a6, t0, a5 -; RV32I-NEXT: sll a1, a1, a5 -; RV32I-NEXT: sll a5, t1, a5 +; RV32I-NEXT: slli t4, t4, 16 +; RV32I-NEXT: slli a1, a1, 24 +; RV32I-NEXT: or a1, a1, t4 +; RV32I-NEXT: mv t2, sp +; RV32I-NEXT: or a3, a4, a3 +; RV32I-NEXT: or a4, t0, a7 +; RV32I-NEXT: or a5, t3, a5 +; RV32I-NEXT: or a0, a0, a6 +; RV32I-NEXT: or a1, a1, t1 +; RV32I-NEXT: sw a3, 0(sp) +; RV32I-NEXT: sw a4, 4(sp) +; RV32I-NEXT: sw a5, 8(sp) +; RV32I-NEXT: sw a0, 12(sp) +; RV32I-NEXT: srli a0, a1, 3 +; RV32I-NEXT: andi a3, a1, 31 +; RV32I-NEXT: andi a0, a0, 12 +; RV32I-NEXT: xori a3, a3, 31 +; RV32I-NEXT: add a0, t2, a0 +; RV32I-NEXT: lw a4, 4(a0) +; RV32I-NEXT: lw a5, 8(a0) +; RV32I-NEXT: lw a6, 0(a0) +; RV32I-NEXT: lw a0, 12(a0) +; RV32I-NEXT: srl a7, a4, a1 +; RV32I-NEXT: slli t0, a5, 1 +; RV32I-NEXT: srl a6, a6, a1 +; RV32I-NEXT: slli a4, a4, 1 +; RV32I-NEXT: srl a5, a5, a1 +; RV32I-NEXT: slli t1, a0, 1 +; RV32I-NEXT: srl a0, a0, a1 +; RV32I-NEXT: sll a1, t0, a3 +; RV32I-NEXT: sll a4, a4, a3 +; RV32I-NEXT: sll a3, t1, a3 ; RV32I-NEXT: srli t0, a0, 16 ; RV32I-NEXT: srli t1, a0, 24 ; RV32I-NEXT: srli t2, a0, 8 -; RV32I-NEXT: or a6, a7, a6 -; RV32I-NEXT: or a1, a4, a1 -; RV32I-NEXT: or a3, a3, a5 +; RV32I-NEXT: or a1, a7, a1 +; RV32I-NEXT: or a4, a6, a4 +; RV32I-NEXT: or a3, a5, a3 ; RV32I-NEXT: sb a0, 12(a2) ; RV32I-NEXT: sb t2, 13(a2) ; RV32I-NEXT: sb t0, 14(a2) ; RV32I-NEXT: sb t1, 15(a2) ; RV32I-NEXT: srli a0, a3, 16 -; RV32I-NEXT: srli a4, a3, 24 -; RV32I-NEXT: srli a5, a3, 8 -; RV32I-NEXT: srli a7, a1, 16 -; RV32I-NEXT: srli t0, a1, 24 -; RV32I-NEXT: srli t1, a1, 8 -; RV32I-NEXT: srli t2, a6, 16 -; RV32I-NEXT: srli t3, a6, 24 +; RV32I-NEXT: srli a5, a3, 24 +; RV32I-NEXT: srli a6, a3, 8 +; RV32I-NEXT: srli a7, a4, 16 +; RV32I-NEXT: srli t0, a4, 24 +; RV32I-NEXT: srli t1, a4, 8 +; RV32I-NEXT: srli t2, a1, 16 +; RV32I-NEXT: srli t3, a1, 24 ; RV32I-NEXT: sb a3, 8(a2) -; RV32I-NEXT: sb a5, 9(a2) +; RV32I-NEXT: sb a6, 9(a2) ; RV32I-NEXT: sb a0, 10(a2) -; RV32I-NEXT: sb a4, 11(a2) -; RV32I-NEXT: srli a0, a6, 8 -; RV32I-NEXT: sb a1, 0(a2) +; RV32I-NEXT: sb a5, 11(a2) +; RV32I-NEXT: srli a0, a1, 8 +; RV32I-NEXT: sb a4, 0(a2) ; RV32I-NEXT: sb t1, 1(a2) ; RV32I-NEXT: sb a7, 2(a2) ; RV32I-NEXT: sb t0, 3(a2) -; RV32I-NEXT: sb a6, 4(a2) +; RV32I-NEXT: sb a1, 4(a2) ; RV32I-NEXT: sb a0, 5(a2) ; RV32I-NEXT: sb t2, 6(a2) ; RV32I-NEXT: sb t3, 7(a2) @@ -943,93 +952,102 @@ define void @shl_16bytes(ptr %src.ptr, ptr %bitOff.ptr, ptr %dst) nounwind { ; RV32I-NEXT: slli a4, a4, 8 ; RV32I-NEXT: slli a5, a5, 16 ; RV32I-NEXT: slli a6, a6, 24 +; RV32I-NEXT: or a3, a4, a3 +; RV32I-NEXT: or a4, a6, a5 +; RV32I-NEXT: lbu a5, 8(a0) +; RV32I-NEXT: lbu a6, 9(a0) +; RV32I-NEXT: lbu t3, 10(a0) +; RV32I-NEXT: lbu t4, 11(a0) ; RV32I-NEXT: slli t0, t0, 8 -; RV32I-NEXT: or a4, a4, a3 -; RV32I-NEXT: or a5, a6, a5 -; RV32I-NEXT: or a3, t0, a7 -; RV32I-NEXT: lbu a6, 8(a0) -; RV32I-NEXT: lbu a7, 9(a0) -; RV32I-NEXT: lbu t0, 10(a0) -; RV32I-NEXT: lbu t3, 11(a0) ; RV32I-NEXT: slli t1, t1, 16 ; RV32I-NEXT: slli t2, t2, 24 -; RV32I-NEXT: slli a7, a7, 8 -; RV32I-NEXT: slli t0, t0, 16 -; RV32I-NEXT: slli t3, t3, 24 -; RV32I-NEXT: or t1, t2, t1 -; RV32I-NEXT: or a6, a7, a6 -; RV32I-NEXT: or a7, t3, t0 -; RV32I-NEXT: lbu t0, 12(a0) -; RV32I-NEXT: lbu t2, 13(a0) -; RV32I-NEXT: lbu t3, 14(a0) -; RV32I-NEXT: lbu t4, 15(a0) -; RV32I-NEXT: lbu a0, 0(a1) +; RV32I-NEXT: slli a6, a6, 8 +; RV32I-NEXT: or a7, t0, a7 +; RV32I-NEXT: or t0, t2, t1 +; RV32I-NEXT: or a5, a6, a5 +; RV32I-NEXT: lbu a6, 12(a0) +; RV32I-NEXT: lbu t1, 13(a0) +; RV32I-NEXT: lbu t2, 14(a0) +; RV32I-NEXT: lbu a0, 15(a0) +; RV32I-NEXT: slli t3, t3, 16 +; RV32I-NEXT: slli t4, t4, 24 +; RV32I-NEXT: slli t1, t1, 8 +; RV32I-NEXT: slli t2, t2, 16 +; RV32I-NEXT: slli a0, a0, 24 +; RV32I-NEXT: or t3, t4, t3 +; RV32I-NEXT: or a6, t1, a6 +; RV32I-NEXT: or a0, a0, t2 +; RV32I-NEXT: lbu t1, 1(a1) +; RV32I-NEXT: lbu t2, 0(a1) +; RV32I-NEXT: lbu t4, 2(a1) +; RV32I-NEXT: lbu a1, 3(a1) +; RV32I-NEXT: slli t1, t1, 8 +; RV32I-NEXT: or t1, t1, t2 ; RV32I-NEXT: sw zero, 0(sp) ; RV32I-NEXT: sw zero, 4(sp) ; RV32I-NEXT: sw zero, 8(sp) ; RV32I-NEXT: sw zero, 12(sp) -; RV32I-NEXT: slli t2, t2, 8 -; RV32I-NEXT: or a1, t2, t0 -; RV32I-NEXT: addi t0, sp, 16 -; RV32I-NEXT: slli t3, t3, 16 -; RV32I-NEXT: slli t4, t4, 24 -; RV32I-NEXT: or t2, t4, t3 -; RV32I-NEXT: srli t3, a0, 3 -; RV32I-NEXT: or a4, a5, a4 -; RV32I-NEXT: andi a5, a0, 31 -; RV32I-NEXT: andi t3, t3, 12 -; RV32I-NEXT: or a3, t1, a3 -; RV32I-NEXT: or a6, a7, a6 -; RV32I-NEXT: or a1, t2, a1 -; RV32I-NEXT: sub a7, t0, t3 -; RV32I-NEXT: sw a4, 16(sp) -; RV32I-NEXT: sw a3, 20(sp) -; RV32I-NEXT: sw a6, 24(sp) -; RV32I-NEXT: sw a1, 28(sp) -; RV32I-NEXT: lw a1, 0(a7) -; RV32I-NEXT: lw a3, 4(a7) -; RV32I-NEXT: lw a4, 8(a7) -; RV32I-NEXT: lw a6, 12(a7) -; RV32I-NEXT: xori a5, a5, 31 -; RV32I-NEXT: sll a7, a3, a0 -; RV32I-NEXT: srli t0, a1, 1 -; RV32I-NEXT: sll a6, a6, a0 -; RV32I-NEXT: srli t1, a4, 1 -; RV32I-NEXT: sll a4, a4, a0 -; RV32I-NEXT: srli a3, a3, 1 -; RV32I-NEXT: sll a0, a1, a0 -; RV32I-NEXT: srl a1, t0, a5 -; RV32I-NEXT: srl t0, t1, a5 -; RV32I-NEXT: srl a3, a3, a5 -; RV32I-NEXT: srli a5, a0, 16 -; RV32I-NEXT: srli t1, a0, 24 -; RV32I-NEXT: srli t2, a0, 8 -; RV32I-NEXT: or a1, a7, a1 -; RV32I-NEXT: or a6, a6, t0 +; RV32I-NEXT: slli t4, t4, 16 +; RV32I-NEXT: slli a1, a1, 24 +; RV32I-NEXT: or a1, a1, t4 +; RV32I-NEXT: addi t2, sp, 16 ; RV32I-NEXT: or a3, a4, a3 -; RV32I-NEXT: sb a0, 0(a2) +; RV32I-NEXT: or a4, t0, a7 +; RV32I-NEXT: or a5, t3, a5 +; RV32I-NEXT: or a0, a0, a6 +; RV32I-NEXT: or a1, a1, t1 +; RV32I-NEXT: sw a3, 16(sp) +; RV32I-NEXT: sw a4, 20(sp) +; RV32I-NEXT: sw a5, 24(sp) +; RV32I-NEXT: sw a0, 28(sp) +; RV32I-NEXT: srli a0, a1, 3 +; RV32I-NEXT: andi a3, a1, 31 +; RV32I-NEXT: andi a0, a0, 12 +; RV32I-NEXT: sub a0, t2, a0 +; RV32I-NEXT: lw a4, 0(a0) +; RV32I-NEXT: lw a5, 4(a0) +; RV32I-NEXT: lw a6, 8(a0) +; RV32I-NEXT: lw a0, 12(a0) +; RV32I-NEXT: xori a3, a3, 31 +; RV32I-NEXT: sll a7, a5, a1 +; RV32I-NEXT: srli t0, a4, 1 +; RV32I-NEXT: sll a0, a0, a1 +; RV32I-NEXT: srli t1, a6, 1 +; RV32I-NEXT: sll a6, a6, a1 +; RV32I-NEXT: srli a5, a5, 1 +; RV32I-NEXT: sll a1, a4, a1 +; RV32I-NEXT: srl a4, t0, a3 +; RV32I-NEXT: srl t0, t1, a3 +; RV32I-NEXT: srl a3, a5, a3 +; RV32I-NEXT: srli a5, a1, 16 +; RV32I-NEXT: srli t1, a1, 24 +; RV32I-NEXT: srli t2, a1, 8 +; RV32I-NEXT: or a4, a7, a4 +; RV32I-NEXT: or a0, a0, t0 +; RV32I-NEXT: or a3, a6, a3 +; RV32I-NEXT: sb a1, 0(a2) ; RV32I-NEXT: sb t2, 1(a2) ; RV32I-NEXT: sb a5, 2(a2) ; RV32I-NEXT: sb t1, 3(a2) -; RV32I-NEXT: srli a0, a3, 16 -; RV32I-NEXT: srli a4, a3, 24 -; RV32I-NEXT: srli a5, a3, 8 -; RV32I-NEXT: srli a7, a6, 16 -; RV32I-NEXT: srli t0, a6, 24 -; RV32I-NEXT: srli t1, a6, 8 -; RV32I-NEXT: srli t2, a1, 16 -; RV32I-NEXT: srli t3, a1, 24 +; RV32I-NEXT: srli a1, a3, 16 +; RV32I-NEXT: srli a5, a3, 24 +; RV32I-NEXT: srli a6, a3, 8 +; RV32I-NEXT: srli a7, a0, 16 +; RV32I-NEXT: srli t0, a0, 24 +; RV32I-NEXT: srli t1, a0, 8 +; RV32I-NEXT: srli t2, a4, 16 +; RV32I-NEXT: srli t3, a4, 24 ; RV32I-NEXT: sb a3, 8(a2) -; RV32I-NEXT: sb a5, 9(a2) -; RV32I-NEXT: sb a0, 10(a2) -; RV32I-NEXT: sb a4, 11(a2) -; RV32I-NEXT: srli a0, a1, 8 -; RV32I-NEXT: sb a6, 12(a2) +; RV32I-NEXT: sb a6, 9(a2) +; RV32I-NEXT: sb a1, 10(a2) +; RV32I-NEXT: sb a5, 11(a2) +; RV32I-NEXT: srli a1, a4, 8 +; RV32I-NEXT: sb a0, 12(a2) ; RV32I-NEXT: sb t1, 13(a2) ; RV32I-NEXT: sb a7, 14(a2) ; RV32I-NEXT: sb t0, 15(a2) -; RV32I-NEXT: sb a1, 4(a2) -; RV32I-NEXT: sb a0, 5(a2) +; RV32I-NEXT: sb a4, 4(a2) +; RV32I-NEXT: sb a1, 5(a2) ; RV32I-NEXT: sb t2, 6(a2) ; RV32I-NEXT: sb t3, 7(a2) ; RV32I-NEXT: addi sp, sp, 32 @@ -1168,73 +1186,82 @@ define void @ashr_16bytes(ptr %src.ptr, ptr %bitOff.ptr, ptr %dst) nounwind { ; RV32I-NEXT: lbu t1, 6(a0) ; RV32I-NEXT: lbu t2, 7(a0) ; RV32I-NEXT: slli a4, a4, 8 +; RV32I-NEXT: or a3, a4, a3 +; RV32I-NEXT: lbu a4, 8(a0) +; RV32I-NEXT: lbu t3, 9(a0) +; RV32I-NEXT: lbu t4, 10(a0) +; RV32I-NEXT: lbu t5, 11(a0) ; RV32I-NEXT: slli a5, a5, 16 ; RV32I-NEXT: slli a6, a6, 24 ; RV32I-NEXT: slli t0, t0, 8 -; RV32I-NEXT: or a3, a4, a3 -; RV32I-NEXT: or a4, a6, a5 -; RV32I-NEXT: or a5, t0, a7 -; RV32I-NEXT: lbu a6, 8(a0) -; RV32I-NEXT: lbu a7, 9(a0) -; RV32I-NEXT: lbu t0, 10(a0) -; RV32I-NEXT: lbu t3, 11(a0) ; RV32I-NEXT: slli t1, t1, 16 ; RV32I-NEXT: slli t2, t2, 24 -; RV32I-NEXT: slli a7, a7, 8 -; RV32I-NEXT: slli t0, t0, 16 -; RV32I-NEXT: slli t3, t3, 24 -; RV32I-NEXT: or t1, t2, t1 -; RV32I-NEXT: or a6, a7, a6 -; RV32I-NEXT: or a7, t3, t0 +; RV32I-NEXT: or a5, a6, a5 +; RV32I-NEXT: or a6, t0, a7 +; RV32I-NEXT: or a7, t2, t1 ; RV32I-NEXT: lbu t0, 12(a0) -; RV32I-NEXT: lbu t2, 13(a0) -; RV32I-NEXT: lbu t3, 14(a0) -; RV32I-NEXT: lbu t4, 15(a0) -; RV32I-NEXT: lbu a0, 0(a1) -; RV32I-NEXT: slli t2, t2, 8 -; RV32I-NEXT: or a1, t2, t0 -; RV32I-NEXT: mv t0, sp -; RV32I-NEXT: slli t3, t3, 16 -; RV32I-NEXT: slli t4, t4, 24 -; RV32I-NEXT: or a3, a4, a3 -; RV32I-NEXT: srli a4, a0, 3 -; RV32I-NEXT: or a5, t1, a5 -; RV32I-NEXT: andi t1, a0, 31 -; RV32I-NEXT: or t2, t4, t3 -; RV32I-NEXT: srai t3, t4, 31 -; RV32I-NEXT: andi a4, a4, 12 -; RV32I-NEXT: xori t1, t1, 31 +; RV32I-NEXT: lbu t1, 13(a0) +; RV32I-NEXT: lbu t2, 14(a0) +; RV32I-NEXT: lbu a0, 15(a0) +; RV32I-NEXT: slli t3, t3, 8 +; RV32I-NEXT: slli t4, t4, 16 +; RV32I-NEXT: slli t5, t5, 24 +; RV32I-NEXT: slli t1, t1, 8 +; RV32I-NEXT: or a4, t3, a4 +; RV32I-NEXT: or t3, t5, t4 +; RV32I-NEXT: or t0, t1, t0 +; RV32I-NEXT: lbu t1, 1(a1) +; RV32I-NEXT: lbu t4, 0(a1) +; RV32I-NEXT: lbu t5, 2(a1) +; RV32I-NEXT: lbu a1, 3(a1) +; RV32I-NEXT: slli t1, t1, 8 +; RV32I-NEXT: or t1, t1, t4 +; RV32I-NEXT: slli t5, t5, 16 +; RV32I-NEXT: slli a1, a1, 24 +; RV32I-NEXT: or a1, a1, t5 +; RV32I-NEXT: or a3, a5, a3 +; RV32I-NEXT: mv a5, sp +; RV32I-NEXT: slli t2, t2, 16 +; RV32I-NEXT: slli a0, a0, 24 +; RV32I-NEXT: or t2, a0, t2 +; RV32I-NEXT: srai a0, a0, 31 ; RV32I-NEXT: or a6, a7, a6 -; RV32I-NEXT: or a1, t2, a1 -; RV32I-NEXT: sw t3, 16(sp) -; RV32I-NEXT: sw t3, 20(sp) -; RV32I-NEXT: sw t3, 24(sp) -; RV32I-NEXT: sw t3, 28(sp) -; RV32I-NEXT: add a4, t0, a4 +; RV32I-NEXT: or a4, t3, a4 +; RV32I-NEXT: or a7, t2, t0 +; RV32I-NEXT: or a1, a1, t1 +; RV32I-NEXT: sw a0, 16(sp) +; RV32I-NEXT: sw a0, 20(sp) +; RV32I-NEXT: sw a0, 24(sp) +; RV32I-NEXT: sw a0, 28(sp) ; RV32I-NEXT: sw a3, 0(sp) -; RV32I-NEXT: sw a5, 4(sp) -; RV32I-NEXT: sw a6, 8(sp) -; RV32I-NEXT: sw a1, 12(sp) -; RV32I-NEXT: lw a1, 4(a4) -; RV32I-NEXT: lw a3, 8(a4) -; RV32I-NEXT: lw a5, 0(a4) -; RV32I-NEXT: lw a4, 12(a4) -; RV32I-NEXT: srl a6, a1, a0 -; RV32I-NEXT: slli a7, a3, 1 -; RV32I-NEXT: srl a5, a5, a0 -; RV32I-NEXT: slli a1, a1, 1 -; RV32I-NEXT: srl a3, a3, a0 -; RV32I-NEXT: slli t0, a4, 1 -; RV32I-NEXT: sra a0, a4, a0 -; RV32I-NEXT: sll a4, a7, t1 -; RV32I-NEXT: sll a1, a1, t1 -; RV32I-NEXT: sll a7, t0, t1 +; RV32I-NEXT: sw a6, 4(sp) +; RV32I-NEXT: sw a4, 8(sp) +; RV32I-NEXT: sw a7, 12(sp) +; RV32I-NEXT: srli a0, a1, 3 +; RV32I-NEXT: andi a3, a1, 31 +; RV32I-NEXT: andi a0, a0, 12 +; RV32I-NEXT: xori a3, a3, 31 +; RV32I-NEXT: add a0, a5, a0 +; RV32I-NEXT: lw a4, 4(a0) +; RV32I-NEXT: lw a5, 8(a0) +; RV32I-NEXT: lw a6, 0(a0) +; RV32I-NEXT: lw a0, 12(a0) +; RV32I-NEXT: srl a7, a4, a1 +; RV32I-NEXT: slli t0, a5, 1 +; RV32I-NEXT: srl a6, a6, a1 +; RV32I-NEXT: slli a4, a4, 1 +; RV32I-NEXT: srl a5, a5, a1 +; RV32I-NEXT: slli t1, a0, 1 +; RV32I-NEXT: sra a0, a0, a1 +; RV32I-NEXT: sll a1, t0, a3 +; RV32I-NEXT: sll a4, a4, a3 +; RV32I-NEXT: sll a3, t1, a3 ; RV32I-NEXT: srli t0, a0, 16 ; RV32I-NEXT: srli t1, a0, 24 ; RV32I-NEXT: srli t2, a0, 8 +; RV32I-NEXT: or a1, a7, a1 ; RV32I-NEXT: or a4, a6, a4 -; RV32I-NEXT: or a1, a5, a1 -; RV32I-NEXT: or a3, a3, a7 +; RV32I-NEXT: or a3, a5, a3 ; RV32I-NEXT: sb a0, 12(a2) ; RV32I-NEXT: sb t2, 13(a2) ; RV32I-NEXT: sb t0, 14(a2) @@ -1242,21 +1269,21 @@ define void @ashr_16bytes(ptr %src.ptr, ptr %bitOff.ptr, ptr %dst) nounwind { ; RV32I-NEXT: srli a0, a3, 16 ; RV32I-NEXT: srli a5, a3, 24 ; RV32I-NEXT: srli a6, a3, 8 -; RV32I-NEXT: srli a7, a1, 16 -; RV32I-NEXT: srli t0, a1, 24 -; RV32I-NEXT: srli t1, a1, 8 -; RV32I-NEXT: srli t2, a4, 16 -; RV32I-NEXT: srli t3, a4, 24 +; RV32I-NEXT: srli a7, a4, 16 +; RV32I-NEXT: srli t0, a4, 24 +; RV32I-NEXT: srli t1, a4, 8 +; RV32I-NEXT: srli t2, a1, 16 +; RV32I-NEXT: srli t3, a1, 24 ; RV32I-NEXT: sb a3, 8(a2) ; RV32I-NEXT: sb a6, 9(a2) ; RV32I-NEXT: sb a0, 10(a2) ; RV32I-NEXT: sb a5, 11(a2) -; RV32I-NEXT: srli a0, a4, 8 -; RV32I-NEXT: sb a1, 0(a2) +; RV32I-NEXT: srli a0, a1, 8 +; RV32I-NEXT: sb a4, 0(a2) ; RV32I-NEXT: sb t1, 1(a2) ; RV32I-NEXT: sb a7, 2(a2) ; RV32I-NEXT: sb t0, 3(a2) -; RV32I-NEXT: sb a4, 4(a2) +; RV32I-NEXT: sb a1, 4(a2) ; RV32I-NEXT: sb a0, 5(a2) ; RV32I-NEXT: sb t2, 6(a2) ; RV32I-NEXT: sb t3, 7(a2) @@ -1272,17 +1299,19 @@ define void @ashr_16bytes(ptr %src.ptr, ptr %bitOff.ptr, ptr %dst) nounwind { define void @lshr_32bytes(ptr %src.ptr, ptr %bitOff.ptr, ptr %dst) nounwind { ; RV64I-LABEL: lshr_32bytes: ; RV64I: # %bb.0: -; RV64I-NEXT: addi sp, sp, -144 -; RV64I-NEXT: sd s0, 136(sp) # 8-byte Folded Spill -; RV64I-NEXT: sd s1, 128(sp) # 8-byte Folded Spill -; RV64I-NEXT: sd s2, 120(sp) # 8-byte Folded Spill -; RV64I-NEXT: sd s3, 112(sp) # 8-byte Folded Spill -; RV64I-NEXT: sd s4, 104(sp) # 8-byte Folded Spill -; RV64I-NEXT: sd s5, 96(sp) # 8-byte Folded Spill -; RV64I-NEXT: sd s6, 88(sp) # 8-byte Folded Spill -; RV64I-NEXT: sd s7, 80(sp) # 8-byte Folded Spill -; RV64I-NEXT: sd s8, 72(sp) # 8-byte Folded Spill -; RV64I-NEXT: sd s9, 64(sp) # 8-byte Folded Spill +; RV64I-NEXT: addi sp, sp, -160 +; RV64I-NEXT: sd s0, 152(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s1, 144(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s2, 136(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s3, 128(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s4, 120(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s5, 112(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s6, 104(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s7, 96(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s8, 88(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s9, 80(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s10, 72(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s11, 64(sp) # 8-byte Folded Spill ; RV64I-NEXT: lbu a3, 0(a0) ; RV64I-NEXT: lbu a4, 1(a0) ; RV64I-NEXT: lbu a5, 2(a0) @@ -1299,122 +1328,143 @@ define void @lshr_32bytes(ptr %src.ptr, ptr %bitOff.ptr, ptr %dst) nounwind { ; RV64I-NEXT: lbu s1, 13(a0) ; RV64I-NEXT: lbu s2, 14(a0) ; RV64I-NEXT: lbu s3, 15(a0) -; RV64I-NEXT: slli a4, a4, 8 -; RV64I-NEXT: slli a5, a5, 16 -; RV64I-NEXT: slli a6, a6, 24 -; RV64I-NEXT: or a3, a4, a3 -; RV64I-NEXT: or a4, a6, a5 ; RV64I-NEXT: lbu s4, 16(a0) ; RV64I-NEXT: lbu s5, 17(a0) ; RV64I-NEXT: lbu s6, 18(a0) ; RV64I-NEXT: lbu s7, 19(a0) +; RV64I-NEXT: slli a4, a4, 8 +; RV64I-NEXT: slli s8, a5, 16 +; RV64I-NEXT: slli a6, a6, 24 ; RV64I-NEXT: slli t0, t0, 8 ; RV64I-NEXT: slli t1, t1, 16 ; RV64I-NEXT: slli t2, t2, 24 +; RV64I-NEXT: or a5, a4, a3 +; RV64I-NEXT: or a6, a6, s8 +; RV64I-NEXT: or a3, t0, a7 +; RV64I-NEXT: or a4, t2, t1 +; RV64I-NEXT: lbu s8, 20(a0) +; RV64I-NEXT: lbu s9, 21(a0) +; RV64I-NEXT: lbu s10, 22(a0) +; RV64I-NEXT: lbu s11, 23(a0) ; RV64I-NEXT: slli t4, t4, 8 ; RV64I-NEXT: slli t5, t5, 16 ; RV64I-NEXT: slli t6, t6, 24 -; RV64I-NEXT: or a5, t0, a7 -; RV64I-NEXT: or a6, t2, t1 -; RV64I-NEXT: or a7, t4, t3 -; RV64I-NEXT: or t0, t6, t5 -; RV64I-NEXT: lbu t5, 20(a0) -; RV64I-NEXT: lbu t6, 21(a0) -; RV64I-NEXT: lbu s8, 22(a0) -; RV64I-NEXT: lbu s9, 23(a0) ; RV64I-NEXT: slli s1, s1, 8 ; RV64I-NEXT: slli s2, s2, 16 ; RV64I-NEXT: slli s3, s3, 24 +; RV64I-NEXT: or a7, t4, t3 +; RV64I-NEXT: or t0, t6, t5 +; RV64I-NEXT: or t1, s1, s0 +; RV64I-NEXT: or t2, s3, s2 +; RV64I-NEXT: lbu t6, 24(a0) +; RV64I-NEXT: lbu s0, 25(a0) +; RV64I-NEXT: lbu s1, 26(a0) +; RV64I-NEXT: lbu s2, 27(a0) ; RV64I-NEXT: slli s5, s5, 8 ; RV64I-NEXT: slli s6, s6, 16 ; RV64I-NEXT: slli s7, s7, 24 -; RV64I-NEXT: or t1, s1, s0 -; RV64I-NEXT: or t2, s3, s2 +; RV64I-NEXT: slli s9, s9, 8 ; RV64I-NEXT: or t3, s5, s4 ; RV64I-NEXT: or t4, s7, s6 -; RV64I-NEXT: lbu s0, 24(a0) -; RV64I-NEXT: lbu s1, 25(a0) -; RV64I-NEXT: lbu s2, 26(a0) -; RV64I-NEXT: lbu s3, 27(a0) -; RV64I-NEXT: slli t6, t6, 8 -; RV64I-NEXT: slli s8, s8, 16 -; RV64I-NEXT: slli s9, s9, 24 -; RV64I-NEXT: slli s1, s1, 8 -; RV64I-NEXT: or t5, t6, t5 -; RV64I-NEXT: or t6, s9, s8 -; RV64I-NEXT: or s0, s1, s0 -; RV64I-NEXT: lbu s1, 28(a0) +; RV64I-NEXT: or t5, s9, s8 +; RV64I-NEXT: lbu s3, 28(a0) ; RV64I-NEXT: lbu s4, 29(a0) ; RV64I-NEXT: lbu s5, 30(a0) ; RV64I-NEXT: lbu s6, 31(a0) -; RV64I-NEXT: lbu a0, 0(a1) +; RV64I-NEXT: slli s10, s10, 16 +; RV64I-NEXT: slli s11, s11, 24 +; RV64I-NEXT: slli s0, s0, 8 +; RV64I-NEXT: slli s1, s1, 16 +; RV64I-NEXT: slli s2, s2, 24 +; RV64I-NEXT: slli s4, s4, 8 +; RV64I-NEXT: or a0, s11, s10 +; RV64I-NEXT: or t6, s0, t6 +; RV64I-NEXT: or s0, s2, s1 +; RV64I-NEXT: or s1, s4, s3 +; RV64I-NEXT: lbu s2, 0(a1) +; RV64I-NEXT: lbu s3, 1(a1) +; RV64I-NEXT: lbu s4, 2(a1) +; RV64I-NEXT: lbu s7, 3(a1) +; RV64I-NEXT: slli s5, s5, 16 +; RV64I-NEXT: slli s6, s6, 24 +; RV64I-NEXT: slli s3, s3, 8 +; RV64I-NEXT: slli s4, s4, 16 +; RV64I-NEXT: slli s7, s7, 24 +; RV64I-NEXT: or s5, s6, s5 +; RV64I-NEXT: or s2, s3, s2 +; RV64I-NEXT: or s3, s7, s4 +; RV64I-NEXT: lbu s4, 5(a1) +; RV64I-NEXT: lbu s6, 4(a1) +; RV64I-NEXT: lbu s7, 6(a1) +; RV64I-NEXT: lbu a1, 7(a1) +; RV64I-NEXT: slli s4, s4, 8 +; RV64I-NEXT: or s4, s4, s6 +; RV64I-NEXT: slli s7, s7, 16 +; RV64I-NEXT: slli a1, a1, 24 +; RV64I-NEXT: or a1, a1, s7 ; RV64I-NEXT: sd zero, 32(sp) ; RV64I-NEXT: sd zero, 40(sp) ; RV64I-NEXT: sd zero, 48(sp) ; RV64I-NEXT: sd zero, 56(sp) -; RV64I-NEXT: slli s2, s2, 16 -; RV64I-NEXT: slli s3, s3, 24 -; RV64I-NEXT: or a1, s3, s2 -; RV64I-NEXT: mv s2, sp -; RV64I-NEXT: slli s4, s4, 8 -; RV64I-NEXT: slli s5, s5, 16 -; RV64I-NEXT: slli s6, s6, 24 -; RV64I-NEXT: or s1, s4, s1 -; RV64I-NEXT: srli s3, a0, 3 -; RV64I-NEXT: or s4, s6, s5 -; RV64I-NEXT: andi s5, a0, 63 -; RV64I-NEXT: andi s3, s3, 24 -; RV64I-NEXT: xori s5, s5, 63 -; RV64I-NEXT: or a3, a4, a3 -; RV64I-NEXT: or a4, a6, a5 -; RV64I-NEXT: or a5, t0, a7 -; RV64I-NEXT: or a6, t2, t1 -; RV64I-NEXT: or a7, t4, t3 -; RV64I-NEXT: or t0, t6, t5 -; RV64I-NEXT: or a1, a1, s0 -; RV64I-NEXT: or t1, s4, s1 -; RV64I-NEXT: add s2, s2, s3 -; RV64I-NEXT: slli a4, a4, 32 -; RV64I-NEXT: slli a6, a6, 32 -; RV64I-NEXT: slli t0, t0, 32 -; RV64I-NEXT: slli t1, t1, 32 +; RV64I-NEXT: or a5, a6, a5 +; RV64I-NEXT: mv a6, sp ; RV64I-NEXT: or a3, a4, a3 -; RV64I-NEXT: or a4, a6, a5 -; RV64I-NEXT: or a5, t0, a7 -; RV64I-NEXT: or a1, t1, a1 +; RV64I-NEXT: or a4, t0, a7 +; RV64I-NEXT: or a7, t2, t1 +; RV64I-NEXT: or t0, t4, t3 +; RV64I-NEXT: or a0, a0, t5 +; RV64I-NEXT: or t1, s0, t6 +; RV64I-NEXT: or t2, s5, s1 +; RV64I-NEXT: or t3, s3, s2 +; RV64I-NEXT: or a1, a1, s4 +; RV64I-NEXT: slli a3, a3, 32 +; RV64I-NEXT: slli a7, a7, 32 +; RV64I-NEXT: slli a0, a0, 32 +; RV64I-NEXT: slli t2, t2, 32 +; RV64I-NEXT: slli a1, a1, 32 +; RV64I-NEXT: or a3, a3, a5 +; RV64I-NEXT: or a4, a7, a4 +; RV64I-NEXT: or a0, a0, t0 +; RV64I-NEXT: or a5, t2, t1 +; RV64I-NEXT: or a1, a1, t3 ; RV64I-NEXT: sd a3, 0(sp) ; RV64I-NEXT: sd a4, 8(sp) -; RV64I-NEXT: sd a5, 16(sp) -; RV64I-NEXT: sd a1, 24(sp) -; RV64I-NEXT: ld a1, 8(s2) -; RV64I-NEXT: ld a3, 16(s2) -; RV64I-NEXT: ld a4, 0(s2) -; RV64I-NEXT: ld a5, 24(s2) -; RV64I-NEXT: srl a6, a1, a0 -; RV64I-NEXT: slli a7, a3, 1 -; RV64I-NEXT: srl a4, a4, a0 -; RV64I-NEXT: slli a1, a1, 1 -; RV64I-NEXT: srl a3, a3, a0 +; RV64I-NEXT: sd a0, 16(sp) +; RV64I-NEXT: sd a5, 24(sp) +; RV64I-NEXT: srli a0, a1, 3 +; RV64I-NEXT: andi a3, a1, 63 +; RV64I-NEXT: andi a0, a0, 24 +; RV64I-NEXT: xori a3, a3, 63 +; RV64I-NEXT: add a0, a6, a0 +; RV64I-NEXT: ld a4, 8(a0) +; RV64I-NEXT: ld a5, 16(a0) +; RV64I-NEXT: ld a6, 0(a0) +; RV64I-NEXT: ld a0, 24(a0) +; RV64I-NEXT: srl a7, a4, a1 ; RV64I-NEXT: slli t0, a5, 1 -; RV64I-NEXT: srl a5, a5, a0 -; RV64I-NEXT: sll a0, a7, s5 -; RV64I-NEXT: sll a1, a1, s5 -; RV64I-NEXT: sll a7, t0, s5 -; RV64I-NEXT: srli t0, a5, 56 -; RV64I-NEXT: srli t1, a5, 48 -; RV64I-NEXT: srli t2, a5, 40 -; RV64I-NEXT: srli t3, a5, 32 -; RV64I-NEXT: srli t4, a5, 24 -; RV64I-NEXT: srli t5, a5, 16 -; RV64I-NEXT: srli t6, a5, 8 -; RV64I-NEXT: or a0, a6, a0 -; RV64I-NEXT: or a1, a4, a1 -; RV64I-NEXT: or a3, a3, a7 +; RV64I-NEXT: srl a6, a6, a1 +; RV64I-NEXT: slli a4, a4, 1 +; RV64I-NEXT: srl a5, a5, a1 +; RV64I-NEXT: slli t1, a0, 1 +; RV64I-NEXT: srl t2, a0, a1 +; RV64I-NEXT: sll a0, t0, a3 +; RV64I-NEXT: sll a1, a4, a3 +; RV64I-NEXT: sll a3, t1, a3 +; RV64I-NEXT: srli a4, t2, 56 +; RV64I-NEXT: srli t0, t2, 48 +; RV64I-NEXT: srli t1, t2, 40 +; RV64I-NEXT: srli t3, t2, 32 +; RV64I-NEXT: srli t4, t2, 24 +; RV64I-NEXT: srli t5, t2, 16 +; RV64I-NEXT: srli t6, t2, 8 +; RV64I-NEXT: or a0, a7, a0 +; RV64I-NEXT: or a1, a6, a1 +; RV64I-NEXT: or a3, a5, a3 ; RV64I-NEXT: sb t3, 28(a2) -; RV64I-NEXT: sb t2, 29(a2) -; RV64I-NEXT: sb t1, 30(a2) -; RV64I-NEXT: sb t0, 31(a2) -; RV64I-NEXT: sb a5, 24(a2) +; RV64I-NEXT: sb t1, 29(a2) +; RV64I-NEXT: sb t0, 30(a2) +; RV64I-NEXT: sb a4, 31(a2) +; RV64I-NEXT: sb t2, 24(a2) ; RV64I-NEXT: sb t6, 25(a2) ; RV64I-NEXT: sb t5, 26(a2) ; RV64I-NEXT: sb t4, 27(a2) @@ -1463,17 +1513,19 @@ define void @lshr_32bytes(ptr %src.ptr, ptr %bitOff.ptr, ptr %dst) nounwind { ; RV64I-NEXT: sb a1, 9(a2) ; RV64I-NEXT: sb a5, 10(a2) ; RV64I-NEXT: sb a3, 11(a2) -; RV64I-NEXT: ld s0, 136(sp) # 8-byte Folded Reload -; RV64I-NEXT: ld s1, 128(sp) # 8-byte Folded Reload -; RV64I-NEXT: ld s2, 120(sp) # 8-byte Folded Reload -; RV64I-NEXT: ld s3, 112(sp) # 8-byte Folded Reload -; RV64I-NEXT: ld s4, 104(sp) # 8-byte Folded Reload -; RV64I-NEXT: ld s5, 96(sp) # 8-byte Folded Reload -; RV64I-NEXT: ld s6, 88(sp) # 8-byte Folded Reload -; RV64I-NEXT: ld s7, 80(sp) # 8-byte Folded Reload -; RV64I-NEXT: ld s8, 72(sp) # 8-byte Folded Reload -; RV64I-NEXT: ld s9, 64(sp) # 8-byte Folded Reload -; RV64I-NEXT: addi sp, sp, 144 +; RV64I-NEXT: ld s0, 152(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s1, 144(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s2, 136(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s3, 128(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s4, 120(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s5, 112(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s6, 104(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s7, 96(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s8, 88(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s9, 80(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s10, 72(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s11, 64(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 160 ; RV64I-NEXT: ret ; ; RV32I-LABEL: lshr_32bytes: @@ -1498,55 +1550,67 @@ define void @lshr_32bytes(ptr %src.ptr, ptr %bitOff.ptr, ptr %dst) nounwind { ; RV32I-NEXT: lbu a7, 3(a0) ; RV32I-NEXT: lbu a5, 4(a0) ; RV32I-NEXT: lbu t0, 5(a0) -; RV32I-NEXT: lbu t3, 6(a0) -; RV32I-NEXT: lbu t6, 7(a0) -; RV32I-NEXT: lbu s2, 8(a0) -; RV32I-NEXT: lbu s3, 9(a0) -; RV32I-NEXT: lbu s4, 10(a0) -; RV32I-NEXT: lbu s5, 11(a0) -; RV32I-NEXT: lbu s7, 12(a0) -; RV32I-NEXT: lbu s8, 13(a0) -; RV32I-NEXT: lbu s9, 14(a0) -; RV32I-NEXT: lbu s10, 15(a0) -; RV32I-NEXT: lbu s11, 16(a0) -; RV32I-NEXT: lbu ra, 17(a0) -; RV32I-NEXT: lbu t4, 18(a0) -; RV32I-NEXT: lbu s0, 19(a0) +; RV32I-NEXT: lbu t1, 6(a0) +; RV32I-NEXT: lbu t2, 7(a0) +; RV32I-NEXT: lbu t3, 8(a0) +; RV32I-NEXT: lbu t4, 9(a0) +; RV32I-NEXT: lbu t5, 10(a0) +; RV32I-NEXT: lbu t6, 11(a0) +; RV32I-NEXT: lbu s0, 12(a0) +; RV32I-NEXT: lbu s2, 13(a0) +; RV32I-NEXT: lbu s4, 14(a0) +; RV32I-NEXT: lbu s5, 15(a0) +; RV32I-NEXT: lbu s6, 16(a0) +; RV32I-NEXT: lbu s7, 17(a0) +; RV32I-NEXT: lbu s8, 18(a0) +; RV32I-NEXT: lbu s9, 19(a0) ; RV32I-NEXT: slli a4, a4, 8 ; RV32I-NEXT: slli a6, a6, 16 ; RV32I-NEXT: slli a7, a7, 24 ; RV32I-NEXT: or a3, a4, a3 +; RV32I-NEXT: sw a3, 4(sp) # 4-byte Folded Spill ; RV32I-NEXT: or a4, a7, a6 -; RV32I-NEXT: lbu t1, 20(a0) -; RV32I-NEXT: lbu t2, 21(a0) -; RV32I-NEXT: lbu t5, 22(a0) -; RV32I-NEXT: lbu s1, 23(a0) +; RV32I-NEXT: lbu s10, 20(a0) +; RV32I-NEXT: lbu s11, 21(a0) +; RV32I-NEXT: lbu ra, 22(a0) +; RV32I-NEXT: lbu a3, 23(a0) ; RV32I-NEXT: slli t0, t0, 8 -; RV32I-NEXT: slli t3, t3, 16 +; RV32I-NEXT: slli t1, t1, 16 +; RV32I-NEXT: slli t2, t2, 24 +; RV32I-NEXT: slli t4, t4, 8 +; RV32I-NEXT: slli t5, t5, 16 ; RV32I-NEXT: slli t6, t6, 24 -; RV32I-NEXT: slli s3, s3, 8 +; RV32I-NEXT: or a5, t0, a5 +; RV32I-NEXT: or a6, t2, t1 +; RV32I-NEXT: or a7, t4, t3 +; RV32I-NEXT: or t0, t6, t5 +; RV32I-NEXT: lbu s1, 24(a0) +; RV32I-NEXT: lbu s3, 25(a0) +; RV32I-NEXT: lbu t4, 26(a0) +; RV32I-NEXT: lbu t5, 27(a0) +; RV32I-NEXT: slli s2, s2, 8 ; RV32I-NEXT: slli s4, s4, 16 ; RV32I-NEXT: slli s5, s5, 24 -; RV32I-NEXT: or a5, t0, a5 -; RV32I-NEXT: or a6, t6, t3 -; RV32I-NEXT: or a7, s3, s2 -; RV32I-NEXT: or t0, s5, s4 -; RV32I-NEXT: lbu t3, 24(a0) -; RV32I-NEXT: lbu s5, 25(a0) -; RV32I-NEXT: lbu s6, 26(a0) -; RV32I-NEXT: lbu t6, 27(a0) -; RV32I-NEXT: slli s8, s8, 8 -; RV32I-NEXT: slli s9, s9, 16 -; RV32I-NEXT: slli s10, s10, 24 -; RV32I-NEXT: slli ra, ra, 8 -; RV32I-NEXT: or s7, s8, s7 -; RV32I-NEXT: or s2, s10, s9 -; RV32I-NEXT: or s3, ra, s11 -; RV32I-NEXT: lbu s4, 28(a0) -; RV32I-NEXT: lbu s8, 29(a0) -; RV32I-NEXT: lbu s9, 30(a0) -; RV32I-NEXT: lbu s10, 31(a0) -; RV32I-NEXT: lbu a0, 0(a1) +; RV32I-NEXT: slli s7, s7, 8 +; RV32I-NEXT: or t1, s2, s0 +; RV32I-NEXT: or t2, s5, s4 +; RV32I-NEXT: or t3, s7, s6 +; RV32I-NEXT: lbu t6, 28(a0) +; RV32I-NEXT: lbu s4, 29(a0) +; RV32I-NEXT: lbu s5, 30(a0) +; RV32I-NEXT: lbu s6, 31(a0) +; RV32I-NEXT: slli s8, s8, 16 +; RV32I-NEXT: slli s9, s9, 24 +; RV32I-NEXT: slli s11, s11, 8 +; RV32I-NEXT: slli ra, ra, 16 +; RV32I-NEXT: slli a3, a3, 24 +; RV32I-NEXT: or a0, s9, s8 +; RV32I-NEXT: or s0, s11, s10 +; RV32I-NEXT: or s2, a3, ra +; RV32I-NEXT: lbu a3, 0(a1) +; RV32I-NEXT: lbu s7, 1(a1) +; RV32I-NEXT: lbu s8, 2(a1) +; RV32I-NEXT: lbu a1, 3(a1) ; RV32I-NEXT: sw zero, 56(sp) ; RV32I-NEXT: sw zero, 60(sp) ; RV32I-NEXT: sw zero, 64(sp) @@ -1555,90 +1619,89 @@ define void @lshr_32bytes(ptr %src.ptr, ptr %bitOff.ptr, ptr %dst) nounwind { ; RV32I-NEXT: sw zero, 44(sp) ; RV32I-NEXT: sw zero, 48(sp) ; RV32I-NEXT: sw zero, 52(sp) +; RV32I-NEXT: slli s3, s3, 8 +; RV32I-NEXT: or s1, s3, s1 +; RV32I-NEXT: addi s3, sp, 8 ; RV32I-NEXT: slli t4, t4, 16 -; RV32I-NEXT: slli s0, s0, 24 -; RV32I-NEXT: or t4, s0, t4 -; RV32I-NEXT: addi s0, sp, 8 -; RV32I-NEXT: slli t2, t2, 8 -; RV32I-NEXT: slli t5, t5, 16 -; RV32I-NEXT: slli s1, s1, 24 -; RV32I-NEXT: slli s5, s5, 8 -; RV32I-NEXT: slli s6, s6, 16 -; RV32I-NEXT: slli t6, t6, 24 -; RV32I-NEXT: slli s8, s8, 8 -; RV32I-NEXT: slli s9, s9, 16 -; RV32I-NEXT: slli s10, s10, 24 -; RV32I-NEXT: or t1, t2, t1 +; RV32I-NEXT: slli t5, t5, 24 +; RV32I-NEXT: slli s4, s4, 8 +; RV32I-NEXT: slli s5, s5, 16 +; RV32I-NEXT: slli s6, s6, 24 +; RV32I-NEXT: slli s7, s7, 8 +; RV32I-NEXT: slli s8, s8, 16 +; RV32I-NEXT: slli a1, a1, 24 +; RV32I-NEXT: or t4, t5, t4 +; RV32I-NEXT: or t5, s4, t6 +; RV32I-NEXT: or t6, s6, s5 +; RV32I-NEXT: or a3, s7, a3 +; RV32I-NEXT: or a1, a1, s8 +; RV32I-NEXT: lw s4, 4(sp) # 4-byte Folded Reload +; RV32I-NEXT: or a4, a4, s4 +; RV32I-NEXT: or a5, a6, a5 +; RV32I-NEXT: or a6, t0, a7 +; RV32I-NEXT: or a7, t2, t1 +; RV32I-NEXT: or t0, a0, t3 +; RV32I-NEXT: or t1, s2, s0 +; RV32I-NEXT: or t2, t4, s1 +; RV32I-NEXT: or t3, t6, t5 +; RV32I-NEXT: or a0, a1, a3 +; RV32I-NEXT: sw t0, 24(sp) +; RV32I-NEXT: sw t1, 28(sp) +; RV32I-NEXT: sw t2, 32(sp) +; RV32I-NEXT: sw t3, 36(sp) +; RV32I-NEXT: sw a4, 8(sp) +; RV32I-NEXT: sw a5, 12(sp) +; RV32I-NEXT: sw a6, 16(sp) +; RV32I-NEXT: sw a7, 20(sp) ; RV32I-NEXT: srli a1, a0, 3 -; RV32I-NEXT: or t2, s1, t5 -; RV32I-NEXT: andi t5, a0, 31 -; RV32I-NEXT: or t3, s5, t3 -; RV32I-NEXT: or t6, t6, s6 -; RV32I-NEXT: or s1, s8, s4 -; RV32I-NEXT: or s4, s10, s9 -; RV32I-NEXT: andi s5, a1, 28 -; RV32I-NEXT: xori a1, t5, 31 -; RV32I-NEXT: or a3, a4, a3 -; RV32I-NEXT: or a4, a6, a5 -; RV32I-NEXT: or a5, t0, a7 -; RV32I-NEXT: or a6, s2, s7 -; RV32I-NEXT: or a7, t4, s3 -; RV32I-NEXT: or t0, t2, t1 -; RV32I-NEXT: or t1, t6, t3 -; RV32I-NEXT: or t2, s4, s1 -; RV32I-NEXT: add s0, s0, s5 -; RV32I-NEXT: sw a7, 24(sp) -; RV32I-NEXT: sw t0, 28(sp) -; RV32I-NEXT: sw t1, 32(sp) -; RV32I-NEXT: sw t2, 36(sp) -; RV32I-NEXT: sw a3, 8(sp) -; RV32I-NEXT: sw a4, 12(sp) -; RV32I-NEXT: sw a5, 16(sp) -; RV32I-NEXT: sw a6, 20(sp) -; RV32I-NEXT: lw a3, 0(s0) -; RV32I-NEXT: lw a4, 4(s0) -; RV32I-NEXT: lw a5, 8(s0) -; RV32I-NEXT: lw a6, 12(s0) -; RV32I-NEXT: lw a7, 16(s0) -; RV32I-NEXT: lw t0, 20(s0) -; RV32I-NEXT: lw t1, 24(s0) -; RV32I-NEXT: lw t2, 28(s0) -; RV32I-NEXT: srl t3, a4, a0 -; RV32I-NEXT: slli t4, a5, 1 +; RV32I-NEXT: andi a3, a0, 31 +; RV32I-NEXT: andi a4, a1, 28 +; RV32I-NEXT: xori a1, a3, 31 +; RV32I-NEXT: add a4, s3, a4 +; RV32I-NEXT: lw a3, 0(a4) +; RV32I-NEXT: lw a5, 4(a4) +; RV32I-NEXT: lw a6, 8(a4) +; RV32I-NEXT: lw a7, 12(a4) +; RV32I-NEXT: lw t0, 16(a4) +; RV32I-NEXT: lw t1, 20(a4) +; RV32I-NEXT: lw t2, 24(a4) +; RV32I-NEXT: lw a4, 28(a4) +; RV32I-NEXT: srl t3, a5, a0 +; RV32I-NEXT: slli t4, a6, 1 ; RV32I-NEXT: srl a3, a3, a0 -; RV32I-NEXT: slli a4, a4, 1 -; RV32I-NEXT: srl t5, a6, a0 -; RV32I-NEXT: slli t6, a7, 1 -; RV32I-NEXT: srl a5, a5, a0 -; RV32I-NEXT: slli a6, a6, 1 -; RV32I-NEXT: srl s0, t0, a0 -; RV32I-NEXT: slli s1, t1, 1 -; RV32I-NEXT: srl a7, a7, a0 -; RV32I-NEXT: slli t0, t0, 1 -; RV32I-NEXT: srl t1, t1, a0 -; RV32I-NEXT: slli s2, t2, 1 +; RV32I-NEXT: slli a5, a5, 1 +; RV32I-NEXT: srl t5, a7, a0 +; RV32I-NEXT: slli t6, t0, 1 +; RV32I-NEXT: srl a6, a6, a0 +; RV32I-NEXT: slli a7, a7, 1 +; RV32I-NEXT: srl s0, t1, a0 +; RV32I-NEXT: slli s1, t2, 1 +; RV32I-NEXT: srl t0, t0, a0 +; RV32I-NEXT: slli t1, t1, 1 ; RV32I-NEXT: srl t2, t2, a0 +; RV32I-NEXT: slli s2, a4, 1 +; RV32I-NEXT: srl s3, a4, a0 ; RV32I-NEXT: sll a0, t4, a1 -; RV32I-NEXT: sll a4, a4, a1 -; RV32I-NEXT: sll t4, t6, a1 -; RV32I-NEXT: sll a6, a6, a1 -; RV32I-NEXT: sll t6, s1, a1 -; RV32I-NEXT: sll t0, t0, a1 -; RV32I-NEXT: sll s1, s2, a1 -; RV32I-NEXT: srli s2, t2, 24 -; RV32I-NEXT: srli s3, t2, 16 -; RV32I-NEXT: srli s4, t2, 8 +; RV32I-NEXT: sll a4, a5, a1 +; RV32I-NEXT: sll a5, t6, a1 +; RV32I-NEXT: sll a7, a7, a1 +; RV32I-NEXT: sll t4, s1, a1 +; RV32I-NEXT: sll t1, t1, a1 +; RV32I-NEXT: sll t6, s2, a1 +; RV32I-NEXT: srli s1, s3, 24 +; RV32I-NEXT: srli s2, s3, 16 +; RV32I-NEXT: srli s4, s3, 8 ; RV32I-NEXT: or a0, t3, a0 ; RV32I-NEXT: or a1, a3, a4 -; RV32I-NEXT: or a3, t5, t4 -; RV32I-NEXT: or a4, a5, a6 -; RV32I-NEXT: or a5, s0, t6 -; RV32I-NEXT: or a6, a7, t0 -; RV32I-NEXT: or a7, t1, s1 -; RV32I-NEXT: sb t2, 28(a2) +; RV32I-NEXT: or a3, t5, a5 +; RV32I-NEXT: or a4, a6, a7 +; RV32I-NEXT: or a5, s0, t4 +; RV32I-NEXT: or a6, t0, t1 +; RV32I-NEXT: or a7, t2, t6 +; RV32I-NEXT: sb s3, 28(a2) ; RV32I-NEXT: sb s4, 29(a2) -; RV32I-NEXT: sb s3, 30(a2) -; RV32I-NEXT: sb s2, 31(a2) +; RV32I-NEXT: sb s2, 30(a2) +; RV32I-NEXT: sb s1, 31(a2) ; RV32I-NEXT: srli t0, a7, 24 ; RV32I-NEXT: srli t1, a7, 16 ; RV32I-NEXT: srli t2, a7, 8 @@ -1712,17 +1775,19 @@ define void @lshr_32bytes(ptr %src.ptr, ptr %bitOff.ptr, ptr %dst) nounwind { define void @shl_32bytes(ptr %src.ptr, ptr %bitOff.ptr, ptr %dst) nounwind { ; RV64I-LABEL: shl_32bytes: ; RV64I: # %bb.0: -; RV64I-NEXT: addi sp, sp, -144 -; RV64I-NEXT: sd s0, 136(sp) # 8-byte Folded Spill -; RV64I-NEXT: sd s1, 128(sp) # 8-byte Folded Spill -; RV64I-NEXT: sd s2, 120(sp) # 8-byte Folded Spill -; RV64I-NEXT: sd s3, 112(sp) # 8-byte Folded Spill -; RV64I-NEXT: sd s4, 104(sp) # 8-byte Folded Spill -; RV64I-NEXT: sd s5, 96(sp) # 8-byte Folded Spill -; RV64I-NEXT: sd s6, 88(sp) # 8-byte Folded Spill -; RV64I-NEXT: sd s7, 80(sp) # 8-byte Folded Spill -; RV64I-NEXT: sd s8, 72(sp) # 8-byte Folded Spill -; RV64I-NEXT: sd s9, 64(sp) # 8-byte Folded Spill +; RV64I-NEXT: addi sp, sp, -160 +; RV64I-NEXT: sd s0, 152(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s1, 144(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s2, 136(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s3, 128(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s4, 120(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s5, 112(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s6, 104(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s7, 96(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s8, 88(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s9, 80(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s10, 72(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s11, 64(sp) # 8-byte Folded Spill ; RV64I-NEXT: lbu a3, 0(a0) ; RV64I-NEXT: lbu a4, 1(a0) ; RV64I-NEXT: lbu a5, 2(a0) @@ -1739,125 +1804,146 @@ define void @shl_32bytes(ptr %src.ptr, ptr %bitOff.ptr, ptr %dst) nounwind { ; RV64I-NEXT: lbu s1, 13(a0) ; RV64I-NEXT: lbu s2, 14(a0) ; RV64I-NEXT: lbu s3, 15(a0) -; RV64I-NEXT: slli a4, a4, 8 -; RV64I-NEXT: slli a5, a5, 16 -; RV64I-NEXT: slli a6, a6, 24 -; RV64I-NEXT: or a3, a4, a3 -; RV64I-NEXT: or a4, a6, a5 ; RV64I-NEXT: lbu s4, 16(a0) ; RV64I-NEXT: lbu s5, 17(a0) ; RV64I-NEXT: lbu s6, 18(a0) ; RV64I-NEXT: lbu s7, 19(a0) +; RV64I-NEXT: slli a4, a4, 8 +; RV64I-NEXT: slli s8, a5, 16 +; RV64I-NEXT: slli a6, a6, 24 ; RV64I-NEXT: slli t0, t0, 8 ; RV64I-NEXT: slli t1, t1, 16 ; RV64I-NEXT: slli t2, t2, 24 +; RV64I-NEXT: or a5, a4, a3 +; RV64I-NEXT: or a6, a6, s8 +; RV64I-NEXT: or a3, t0, a7 +; RV64I-NEXT: or a4, t2, t1 +; RV64I-NEXT: lbu s8, 20(a0) +; RV64I-NEXT: lbu s9, 21(a0) +; RV64I-NEXT: lbu s10, 22(a0) +; RV64I-NEXT: lbu s11, 23(a0) ; RV64I-NEXT: slli t4, t4, 8 ; RV64I-NEXT: slli t5, t5, 16 ; RV64I-NEXT: slli t6, t6, 24 -; RV64I-NEXT: or a5, t0, a7 -; RV64I-NEXT: or a6, t2, t1 -; RV64I-NEXT: or a7, t4, t3 -; RV64I-NEXT: or t0, t6, t5 -; RV64I-NEXT: lbu t5, 20(a0) -; RV64I-NEXT: lbu t6, 21(a0) -; RV64I-NEXT: lbu s8, 22(a0) -; RV64I-NEXT: lbu s9, 23(a0) ; RV64I-NEXT: slli s1, s1, 8 ; RV64I-NEXT: slli s2, s2, 16 ; RV64I-NEXT: slli s3, s3, 24 +; RV64I-NEXT: or a7, t4, t3 +; RV64I-NEXT: or t0, t6, t5 +; RV64I-NEXT: or t1, s1, s0 +; RV64I-NEXT: or t2, s3, s2 +; RV64I-NEXT: lbu t6, 24(a0) +; RV64I-NEXT: lbu s0, 25(a0) +; RV64I-NEXT: lbu s1, 26(a0) +; RV64I-NEXT: lbu s2, 27(a0) ; RV64I-NEXT: slli s5, s5, 8 ; RV64I-NEXT: slli s6, s6, 16 ; RV64I-NEXT: slli s7, s7, 24 -; RV64I-NEXT: or t1, s1, s0 -; RV64I-NEXT: or t2, s3, s2 +; RV64I-NEXT: slli s9, s9, 8 ; RV64I-NEXT: or t3, s5, s4 ; RV64I-NEXT: or t4, s7, s6 -; RV64I-NEXT: lbu s0, 24(a0) -; RV64I-NEXT: lbu s1, 25(a0) -; RV64I-NEXT: lbu s2, 26(a0) -; RV64I-NEXT: lbu s3, 27(a0) -; RV64I-NEXT: slli t6, t6, 8 -; RV64I-NEXT: slli s8, s8, 16 -; RV64I-NEXT: slli s9, s9, 24 -; RV64I-NEXT: slli s1, s1, 8 -; RV64I-NEXT: or t5, t6, t5 -; RV64I-NEXT: or t6, s9, s8 -; RV64I-NEXT: or s0, s1, s0 -; RV64I-NEXT: lbu s1, 28(a0) +; RV64I-NEXT: or t5, s9, s8 +; RV64I-NEXT: lbu s3, 28(a0) ; RV64I-NEXT: lbu s4, 29(a0) ; RV64I-NEXT: lbu s5, 30(a0) ; RV64I-NEXT: lbu s6, 31(a0) -; RV64I-NEXT: lbu a0, 0(a1) +; RV64I-NEXT: slli s10, s10, 16 +; RV64I-NEXT: slli s11, s11, 24 +; RV64I-NEXT: slli s0, s0, 8 +; RV64I-NEXT: slli s1, s1, 16 +; RV64I-NEXT: slli s2, s2, 24 +; RV64I-NEXT: slli s4, s4, 8 +; RV64I-NEXT: or a0, s11, s10 +; RV64I-NEXT: or t6, s0, t6 +; RV64I-NEXT: or s0, s2, s1 +; RV64I-NEXT: or s1, s4, s3 +; RV64I-NEXT: lbu s2, 0(a1) +; RV64I-NEXT: lbu s3, 1(a1) +; RV64I-NEXT: lbu s4, 2(a1) +; RV64I-NEXT: lbu s7, 3(a1) +; RV64I-NEXT: slli s5, s5, 16 +; RV64I-NEXT: slli s6, s6, 24 +; RV64I-NEXT: slli s3, s3, 8 +; RV64I-NEXT: slli s4, s4, 16 +; RV64I-NEXT: slli s7, s7, 24 +; RV64I-NEXT: or s5, s6, s5 +; RV64I-NEXT: or s2, s3, s2 +; RV64I-NEXT: or s3, s7, s4 +; RV64I-NEXT: lbu s4, 5(a1) +; RV64I-NEXT: lbu s6, 4(a1) +; RV64I-NEXT: lbu s7, 6(a1) +; RV64I-NEXT: lbu a1, 7(a1) +; RV64I-NEXT: slli s4, s4, 8 +; RV64I-NEXT: or s4, s4, s6 +; RV64I-NEXT: slli s7, s7, 16 +; RV64I-NEXT: slli a1, a1, 24 +; RV64I-NEXT: or a1, a1, s7 ; RV64I-NEXT: sd zero, 0(sp) ; RV64I-NEXT: sd zero, 8(sp) ; RV64I-NEXT: sd zero, 16(sp) ; RV64I-NEXT: sd zero, 24(sp) -; RV64I-NEXT: slli s2, s2, 16 -; RV64I-NEXT: slli s3, s3, 24 -; RV64I-NEXT: or a1, s3, s2 -; RV64I-NEXT: addi s2, sp, 32 -; RV64I-NEXT: slli s4, s4, 8 -; RV64I-NEXT: slli s5, s5, 16 -; RV64I-NEXT: slli s6, s6, 24 -; RV64I-NEXT: or s1, s4, s1 -; RV64I-NEXT: srli s3, a0, 3 -; RV64I-NEXT: or s4, s6, s5 -; RV64I-NEXT: andi s5, a0, 63 -; RV64I-NEXT: andi s3, s3, 24 -; RV64I-NEXT: or a3, a4, a3 -; RV64I-NEXT: or a4, a6, a5 -; RV64I-NEXT: or a5, t0, a7 -; RV64I-NEXT: or a6, t2, t1 -; RV64I-NEXT: or a7, t4, t3 -; RV64I-NEXT: or t0, t6, t5 -; RV64I-NEXT: or a1, a1, s0 -; RV64I-NEXT: or t1, s4, s1 -; RV64I-NEXT: sub t2, s2, s3 -; RV64I-NEXT: slli a4, a4, 32 -; RV64I-NEXT: slli a6, a6, 32 -; RV64I-NEXT: slli t0, t0, 32 -; RV64I-NEXT: slli t1, t1, 32 +; RV64I-NEXT: or a5, a6, a5 +; RV64I-NEXT: addi a6, sp, 32 ; RV64I-NEXT: or a3, a4, a3 -; RV64I-NEXT: or a4, a6, a5 -; RV64I-NEXT: or a5, t0, a7 -; RV64I-NEXT: or a1, t1, a1 +; RV64I-NEXT: or a4, t0, a7 +; RV64I-NEXT: or a7, t2, t1 +; RV64I-NEXT: or t0, t4, t3 +; RV64I-NEXT: or a0, a0, t5 +; RV64I-NEXT: or t1, s0, t6 +; RV64I-NEXT: or t2, s5, s1 +; RV64I-NEXT: or t3, s3, s2 +; RV64I-NEXT: or a1, a1, s4 +; RV64I-NEXT: slli a3, a3, 32 +; RV64I-NEXT: slli a7, a7, 32 +; RV64I-NEXT: slli a0, a0, 32 +; RV64I-NEXT: slli t2, t2, 32 +; RV64I-NEXT: slli a1, a1, 32 +; RV64I-NEXT: or a3, a3, a5 +; RV64I-NEXT: or a4, a7, a4 +; RV64I-NEXT: or a0, a0, t0 +; RV64I-NEXT: or a5, t2, t1 +; RV64I-NEXT: or a1, a1, t3 ; RV64I-NEXT: sd a3, 32(sp) ; RV64I-NEXT: sd a4, 40(sp) -; RV64I-NEXT: sd a5, 48(sp) -; RV64I-NEXT: sd a1, 56(sp) -; RV64I-NEXT: ld a1, 0(t2) -; RV64I-NEXT: ld a3, 8(t2) -; RV64I-NEXT: ld a4, 16(t2) -; RV64I-NEXT: ld a5, 24(t2) -; RV64I-NEXT: xori a6, s5, 63 -; RV64I-NEXT: sll a7, a3, a0 -; RV64I-NEXT: srli t0, a1, 1 -; RV64I-NEXT: sll a5, a5, a0 -; RV64I-NEXT: srli t1, a4, 1 -; RV64I-NEXT: sll a4, a4, a0 -; RV64I-NEXT: srli a3, a3, 1 -; RV64I-NEXT: sll t2, a1, a0 -; RV64I-NEXT: srl a0, t0, a6 -; RV64I-NEXT: srl a1, t1, a6 -; RV64I-NEXT: srl a3, a3, a6 -; RV64I-NEXT: srli a6, t2, 56 -; RV64I-NEXT: srli t0, t2, 48 -; RV64I-NEXT: srli t1, t2, 40 -; RV64I-NEXT: srli t3, t2, 32 -; RV64I-NEXT: srli t4, t2, 24 -; RV64I-NEXT: srli t5, t2, 16 -; RV64I-NEXT: srli t6, t2, 8 -; RV64I-NEXT: or a0, a7, a0 -; RV64I-NEXT: or a1, a5, a1 -; RV64I-NEXT: or a3, a4, a3 -; RV64I-NEXT: sb t3, 4(a2) -; RV64I-NEXT: sb t1, 5(a2) -; RV64I-NEXT: sb t0, 6(a2) -; RV64I-NEXT: sb a6, 7(a2) -; RV64I-NEXT: sb t2, 0(a2) -; RV64I-NEXT: sb t6, 1(a2) -; RV64I-NEXT: sb t5, 2(a2) -; RV64I-NEXT: sb t4, 3(a2) +; RV64I-NEXT: sd a0, 48(sp) +; RV64I-NEXT: sd a5, 56(sp) +; RV64I-NEXT: srli a0, a1, 3 +; RV64I-NEXT: andi a3, a1, 63 +; RV64I-NEXT: andi a0, a0, 24 +; RV64I-NEXT: sub a0, a6, a0 +; RV64I-NEXT: ld a4, 0(a0) +; RV64I-NEXT: ld a5, 8(a0) +; RV64I-NEXT: ld a6, 16(a0) +; RV64I-NEXT: ld a0, 24(a0) +; RV64I-NEXT: xori a3, a3, 63 +; RV64I-NEXT: sll a7, a5, a1 +; RV64I-NEXT: srli t0, a4, 1 +; RV64I-NEXT: sll t1, a0, a1 +; RV64I-NEXT: srli a0, a6, 1 +; RV64I-NEXT: sll a6, a6, a1 +; RV64I-NEXT: srli a5, a5, 1 +; RV64I-NEXT: sll a4, a4, a1 +; RV64I-NEXT: srl a1, t0, a3 +; RV64I-NEXT: srl t0, a0, a3 +; RV64I-NEXT: srl a3, a5, a3 +; RV64I-NEXT: srli a5, a4, 56 +; RV64I-NEXT: srli t2, a4, 48 +; RV64I-NEXT: srli t3, a4, 40 +; RV64I-NEXT: srli t4, a4, 32 +; RV64I-NEXT: srli t5, a4, 24 +; RV64I-NEXT: srli t6, a4, 16 +; RV64I-NEXT: srli s0, a4, 8 +; RV64I-NEXT: or a0, a7, a1 +; RV64I-NEXT: or a1, t1, t0 +; RV64I-NEXT: or a3, a6, a3 +; RV64I-NEXT: sb t4, 4(a2) +; RV64I-NEXT: sb t3, 5(a2) +; RV64I-NEXT: sb t2, 6(a2) +; RV64I-NEXT: sb a5, 7(a2) +; RV64I-NEXT: sb a4, 0(a2) +; RV64I-NEXT: sb s0, 1(a2) +; RV64I-NEXT: sb t6, 2(a2) +; RV64I-NEXT: sb t5, 3(a2) ; RV64I-NEXT: srli a4, a3, 56 ; RV64I-NEXT: srli a5, a3, 48 ; RV64I-NEXT: srli a6, a3, 40 @@ -1903,17 +1989,19 @@ define void @shl_32bytes(ptr %src.ptr, ptr %bitOff.ptr, ptr %dst) nounwind { ; RV64I-NEXT: sb a1, 9(a2) ; RV64I-NEXT: sb a5, 10(a2) ; RV64I-NEXT: sb a3, 11(a2) -; RV64I-NEXT: ld s0, 136(sp) # 8-byte Folded Reload -; RV64I-NEXT: ld s1, 128(sp) # 8-byte Folded Reload -; RV64I-NEXT: ld s2, 120(sp) # 8-byte Folded Reload -; RV64I-NEXT: ld s3, 112(sp) # 8-byte Folded Reload -; RV64I-NEXT: ld s4, 104(sp) # 8-byte Folded Reload -; RV64I-NEXT: ld s5, 96(sp) # 8-byte Folded Reload -; RV64I-NEXT: ld s6, 88(sp) # 8-byte Folded Reload -; RV64I-NEXT: ld s7, 80(sp) # 8-byte Folded Reload -; RV64I-NEXT: ld s8, 72(sp) # 8-byte Folded Reload -; RV64I-NEXT: ld s9, 64(sp) # 8-byte Folded Reload -; RV64I-NEXT: addi sp, sp, 144 +; RV64I-NEXT: ld s0, 152(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s1, 144(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s2, 136(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s3, 128(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s4, 120(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s5, 112(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s6, 104(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s7, 96(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s8, 88(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s9, 80(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s10, 72(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s11, 64(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 160 ; RV64I-NEXT: ret ; ; RV32I-LABEL: shl_32bytes: @@ -1938,55 +2026,67 @@ define void @shl_32bytes(ptr %src.ptr, ptr %bitOff.ptr, ptr %dst) nounwind { ; RV32I-NEXT: lbu a7, 3(a0) ; RV32I-NEXT: lbu a5, 4(a0) ; RV32I-NEXT: lbu t0, 5(a0) -; RV32I-NEXT: lbu t3, 6(a0) -; RV32I-NEXT: lbu t6, 7(a0) -; RV32I-NEXT: lbu s2, 8(a0) -; RV32I-NEXT: lbu s3, 9(a0) -; RV32I-NEXT: lbu s4, 10(a0) -; RV32I-NEXT: lbu s5, 11(a0) -; RV32I-NEXT: lbu s7, 12(a0) -; RV32I-NEXT: lbu s8, 13(a0) -; RV32I-NEXT: lbu s9, 14(a0) -; RV32I-NEXT: lbu s10, 15(a0) -; RV32I-NEXT: lbu s11, 16(a0) -; RV32I-NEXT: lbu ra, 17(a0) -; RV32I-NEXT: lbu t4, 18(a0) -; RV32I-NEXT: lbu s0, 19(a0) +; RV32I-NEXT: lbu t1, 6(a0) +; RV32I-NEXT: lbu t2, 7(a0) +; RV32I-NEXT: lbu t3, 8(a0) +; RV32I-NEXT: lbu t4, 9(a0) +; RV32I-NEXT: lbu t5, 10(a0) +; RV32I-NEXT: lbu t6, 11(a0) +; RV32I-NEXT: lbu s0, 12(a0) +; RV32I-NEXT: lbu s2, 13(a0) +; RV32I-NEXT: lbu s4, 14(a0) +; RV32I-NEXT: lbu s5, 15(a0) +; RV32I-NEXT: lbu s6, 16(a0) +; RV32I-NEXT: lbu s7, 17(a0) +; RV32I-NEXT: lbu s8, 18(a0) +; RV32I-NEXT: lbu s9, 19(a0) ; RV32I-NEXT: slli a4, a4, 8 ; RV32I-NEXT: slli a6, a6, 16 ; RV32I-NEXT: slli a7, a7, 24 ; RV32I-NEXT: or a3, a4, a3 +; RV32I-NEXT: sw a3, 4(sp) # 4-byte Folded Spill ; RV32I-NEXT: or a4, a7, a6 -; RV32I-NEXT: lbu t1, 20(a0) -; RV32I-NEXT: lbu t2, 21(a0) -; RV32I-NEXT: lbu t5, 22(a0) -; RV32I-NEXT: lbu s1, 23(a0) +; RV32I-NEXT: lbu s10, 20(a0) +; RV32I-NEXT: lbu s11, 21(a0) +; RV32I-NEXT: lbu ra, 22(a0) +; RV32I-NEXT: lbu a3, 23(a0) ; RV32I-NEXT: slli t0, t0, 8 -; RV32I-NEXT: slli t3, t3, 16 +; RV32I-NEXT: slli t1, t1, 16 +; RV32I-NEXT: slli t2, t2, 24 +; RV32I-NEXT: slli t4, t4, 8 +; RV32I-NEXT: slli t5, t5, 16 ; RV32I-NEXT: slli t6, t6, 24 -; RV32I-NEXT: slli s3, s3, 8 +; RV32I-NEXT: or a5, t0, a5 +; RV32I-NEXT: or a6, t2, t1 +; RV32I-NEXT: or a7, t4, t3 +; RV32I-NEXT: or t0, t6, t5 +; RV32I-NEXT: lbu s1, 24(a0) +; RV32I-NEXT: lbu s3, 25(a0) +; RV32I-NEXT: lbu t4, 26(a0) +; RV32I-NEXT: lbu t5, 27(a0) +; RV32I-NEXT: slli s2, s2, 8 ; RV32I-NEXT: slli s4, s4, 16 ; RV32I-NEXT: slli s5, s5, 24 -; RV32I-NEXT: or a5, t0, a5 -; RV32I-NEXT: or a6, t6, t3 -; RV32I-NEXT: or a7, s3, s2 -; RV32I-NEXT: or t0, s5, s4 -; RV32I-NEXT: lbu t3, 24(a0) -; RV32I-NEXT: lbu s5, 25(a0) -; RV32I-NEXT: lbu s6, 26(a0) -; RV32I-NEXT: lbu t6, 27(a0) -; RV32I-NEXT: slli s8, s8, 8 -; RV32I-NEXT: slli s9, s9, 16 -; RV32I-NEXT: slli s10, s10, 24 -; RV32I-NEXT: slli ra, ra, 8 -; RV32I-NEXT: or s7, s8, s7 -; RV32I-NEXT: or s2, s10, s9 -; RV32I-NEXT: or s3, ra, s11 -; RV32I-NEXT: lbu s4, 28(a0) -; RV32I-NEXT: lbu s8, 29(a0) -; RV32I-NEXT: lbu s9, 30(a0) -; RV32I-NEXT: lbu s10, 31(a0) -; RV32I-NEXT: lbu a0, 0(a1) +; RV32I-NEXT: slli s7, s7, 8 +; RV32I-NEXT: or t1, s2, s0 +; RV32I-NEXT: or t2, s5, s4 +; RV32I-NEXT: or t3, s7, s6 +; RV32I-NEXT: lbu t6, 28(a0) +; RV32I-NEXT: lbu s4, 29(a0) +; RV32I-NEXT: lbu s5, 30(a0) +; RV32I-NEXT: lbu s6, 31(a0) +; RV32I-NEXT: slli s8, s8, 16 +; RV32I-NEXT: slli s9, s9, 24 +; RV32I-NEXT: slli s11, s11, 8 +; RV32I-NEXT: slli ra, ra, 16 +; RV32I-NEXT: slli a3, a3, 24 +; RV32I-NEXT: or a0, s9, s8 +; RV32I-NEXT: or s0, s11, s10 +; RV32I-NEXT: or s2, a3, ra +; RV32I-NEXT: lbu a3, 0(a1) +; RV32I-NEXT: lbu s7, 1(a1) +; RV32I-NEXT: lbu s8, 2(a1) +; RV32I-NEXT: lbu a1, 3(a1) ; RV32I-NEXT: sw zero, 24(sp) ; RV32I-NEXT: sw zero, 28(sp) ; RV32I-NEXT: sw zero, 32(sp) @@ -1995,89 +2095,88 @@ define void @shl_32bytes(ptr %src.ptr, ptr %bitOff.ptr, ptr %dst) nounwind { ; RV32I-NEXT: sw zero, 12(sp) ; RV32I-NEXT: sw zero, 16(sp) ; RV32I-NEXT: sw zero, 20(sp) +; RV32I-NEXT: slli s3, s3, 8 +; RV32I-NEXT: or s1, s3, s1 +; RV32I-NEXT: addi s3, sp, 40 ; RV32I-NEXT: slli t4, t4, 16 -; RV32I-NEXT: slli s0, s0, 24 -; RV32I-NEXT: or t4, s0, t4 -; RV32I-NEXT: addi s0, sp, 40 -; RV32I-NEXT: slli t2, t2, 8 -; RV32I-NEXT: slli t5, t5, 16 -; RV32I-NEXT: slli s1, s1, 24 -; RV32I-NEXT: slli s5, s5, 8 -; RV32I-NEXT: slli s6, s6, 16 -; RV32I-NEXT: slli t6, t6, 24 -; RV32I-NEXT: slli s8, s8, 8 -; RV32I-NEXT: slli s9, s9, 16 -; RV32I-NEXT: slli s10, s10, 24 -; RV32I-NEXT: or t1, t2, t1 +; RV32I-NEXT: slli t5, t5, 24 +; RV32I-NEXT: slli s4, s4, 8 +; RV32I-NEXT: slli s5, s5, 16 +; RV32I-NEXT: slli s6, s6, 24 +; RV32I-NEXT: slli s7, s7, 8 +; RV32I-NEXT: slli s8, s8, 16 +; RV32I-NEXT: slli a1, a1, 24 +; RV32I-NEXT: or t4, t5, t4 +; RV32I-NEXT: or t5, s4, t6 +; RV32I-NEXT: or t6, s6, s5 +; RV32I-NEXT: or a3, s7, a3 +; RV32I-NEXT: or a1, a1, s8 +; RV32I-NEXT: lw s4, 4(sp) # 4-byte Folded Reload +; RV32I-NEXT: or a4, a4, s4 +; RV32I-NEXT: or a5, a6, a5 +; RV32I-NEXT: or a6, t0, a7 +; RV32I-NEXT: or a7, t2, t1 +; RV32I-NEXT: or t0, a0, t3 +; RV32I-NEXT: or t1, s2, s0 +; RV32I-NEXT: or t2, t4, s1 +; RV32I-NEXT: or t3, t6, t5 +; RV32I-NEXT: or a0, a1, a3 +; RV32I-NEXT: sw t0, 56(sp) +; RV32I-NEXT: sw t1, 60(sp) +; RV32I-NEXT: sw t2, 64(sp) +; RV32I-NEXT: sw t3, 68(sp) +; RV32I-NEXT: sw a4, 40(sp) +; RV32I-NEXT: sw a5, 44(sp) +; RV32I-NEXT: sw a6, 48(sp) +; RV32I-NEXT: sw a7, 52(sp) ; RV32I-NEXT: srli a1, a0, 3 -; RV32I-NEXT: or t2, s1, t5 -; RV32I-NEXT: andi t5, a0, 31 -; RV32I-NEXT: or t3, s5, t3 -; RV32I-NEXT: or t6, t6, s6 -; RV32I-NEXT: or s1, s8, s4 -; RV32I-NEXT: or s4, s10, s9 -; RV32I-NEXT: andi s5, a1, 28 -; RV32I-NEXT: xori a1, t5, 31 -; RV32I-NEXT: or a3, a4, a3 -; RV32I-NEXT: or a4, a6, a5 -; RV32I-NEXT: or a5, t0, a7 -; RV32I-NEXT: or a6, s2, s7 -; RV32I-NEXT: or a7, t4, s3 -; RV32I-NEXT: or t0, t2, t1 -; RV32I-NEXT: or t1, t6, t3 -; RV32I-NEXT: or t2, s4, s1 -; RV32I-NEXT: sub t3, s0, s5 -; RV32I-NEXT: sw a7, 56(sp) -; RV32I-NEXT: sw t0, 60(sp) -; RV32I-NEXT: sw t1, 64(sp) -; RV32I-NEXT: sw t2, 68(sp) -; RV32I-NEXT: sw a3, 40(sp) -; RV32I-NEXT: sw a4, 44(sp) -; RV32I-NEXT: sw a5, 48(sp) -; RV32I-NEXT: sw a6, 52(sp) -; RV32I-NEXT: lw a3, 0(t3) -; RV32I-NEXT: lw a4, 4(t3) -; RV32I-NEXT: lw a5, 8(t3) -; RV32I-NEXT: lw a6, 12(t3) -; RV32I-NEXT: lw a7, 16(t3) -; RV32I-NEXT: lw t0, 20(t3) -; RV32I-NEXT: lw t1, 24(t3) -; RV32I-NEXT: lw t2, 28(t3) -; RV32I-NEXT: sll t3, a4, a0 -; RV32I-NEXT: srli t4, a3, 1 -; RV32I-NEXT: sll t5, a6, a0 -; RV32I-NEXT: srli t6, a5, 1 -; RV32I-NEXT: sll a5, a5, a0 -; RV32I-NEXT: srli a4, a4, 1 -; RV32I-NEXT: sll s0, t0, a0 -; RV32I-NEXT: srli s1, a7, 1 -; RV32I-NEXT: sll a7, a7, a0 -; RV32I-NEXT: srli a6, a6, 1 +; RV32I-NEXT: andi a3, a0, 31 +; RV32I-NEXT: andi a4, a1, 28 +; RV32I-NEXT: xori a1, a3, 31 +; RV32I-NEXT: sub a3, s3, a4 +; RV32I-NEXT: lw a4, 0(a3) +; RV32I-NEXT: lw a5, 4(a3) +; RV32I-NEXT: lw a6, 8(a3) +; RV32I-NEXT: lw a7, 12(a3) +; RV32I-NEXT: lw t0, 16(a3) +; RV32I-NEXT: lw t1, 20(a3) +; RV32I-NEXT: lw t2, 24(a3) +; RV32I-NEXT: lw a3, 28(a3) +; RV32I-NEXT: sll t3, a5, a0 +; RV32I-NEXT: srli t4, a4, 1 +; RV32I-NEXT: sll t5, a7, a0 +; RV32I-NEXT: srli t6, a6, 1 +; RV32I-NEXT: sll a6, a6, a0 +; RV32I-NEXT: srli a5, a5, 1 +; RV32I-NEXT: sll s0, t1, a0 +; RV32I-NEXT: srli s1, t0, 1 +; RV32I-NEXT: sll t0, t0, a0 +; RV32I-NEXT: srli a7, a7, 1 +; RV32I-NEXT: sll s2, a3, a0 +; RV32I-NEXT: srli a3, t2, 1 ; RV32I-NEXT: sll t2, t2, a0 -; RV32I-NEXT: srli s2, t1, 1 -; RV32I-NEXT: sll t1, t1, a0 -; RV32I-NEXT: srli t0, t0, 1 -; RV32I-NEXT: sll s3, a3, a0 +; RV32I-NEXT: srli t1, t1, 1 +; RV32I-NEXT: sll s3, a4, a0 ; RV32I-NEXT: srl a0, t4, a1 -; RV32I-NEXT: srl a3, t6, a1 -; RV32I-NEXT: srl a4, a4, a1 +; RV32I-NEXT: srl a4, t6, a1 +; RV32I-NEXT: srl a5, a5, a1 ; RV32I-NEXT: srl t4, s1, a1 -; RV32I-NEXT: srl a6, a6, a1 -; RV32I-NEXT: srl t6, s2, a1 -; RV32I-NEXT: srl t0, t0, a1 +; RV32I-NEXT: srl a7, a7, a1 +; RV32I-NEXT: srl t6, a3, a1 +; RV32I-NEXT: srl t1, t1, a1 ; RV32I-NEXT: srli s1, s3, 24 -; RV32I-NEXT: srli s2, s3, 16 -; RV32I-NEXT: srli s4, s3, 8 +; RV32I-NEXT: srli s4, s3, 16 +; RV32I-NEXT: srli s5, s3, 8 ; RV32I-NEXT: or a0, t3, a0 -; RV32I-NEXT: or a1, t5, a3 -; RV32I-NEXT: or a3, a5, a4 +; RV32I-NEXT: or a1, t5, a4 +; RV32I-NEXT: or a3, a6, a5 ; RV32I-NEXT: or a4, s0, t4 -; RV32I-NEXT: or a5, a7, a6 -; RV32I-NEXT: or a6, t2, t6 -; RV32I-NEXT: or a7, t1, t0 +; RV32I-NEXT: or a5, t0, a7 +; RV32I-NEXT: or a6, s2, t6 +; RV32I-NEXT: or a7, t2, t1 ; RV32I-NEXT: sb s3, 0(a2) -; RV32I-NEXT: sb s4, 1(a2) -; RV32I-NEXT: sb s2, 2(a2) +; RV32I-NEXT: sb s5, 1(a2) +; RV32I-NEXT: sb s4, 2(a2) ; RV32I-NEXT: sb s1, 3(a2) ; RV32I-NEXT: srli t0, a7, 24 ; RV32I-NEXT: srli t1, a7, 16 @@ -2152,17 +2251,19 @@ define void @shl_32bytes(ptr %src.ptr, ptr %bitOff.ptr, ptr %dst) nounwind { define void @ashr_32bytes(ptr %src.ptr, ptr %bitOff.ptr, ptr %dst) nounwind { ; RV64I-LABEL: ashr_32bytes: ; RV64I: # %bb.0: -; RV64I-NEXT: addi sp, sp, -144 -; RV64I-NEXT: sd s0, 136(sp) # 8-byte Folded Spill -; RV64I-NEXT: sd s1, 128(sp) # 8-byte Folded Spill -; RV64I-NEXT: sd s2, 120(sp) # 8-byte Folded Spill -; RV64I-NEXT: sd s3, 112(sp) # 8-byte Folded Spill -; RV64I-NEXT: sd s4, 104(sp) # 8-byte Folded Spill -; RV64I-NEXT: sd s5, 96(sp) # 8-byte Folded Spill -; RV64I-NEXT: sd s6, 88(sp) # 8-byte Folded Spill -; RV64I-NEXT: sd s7, 80(sp) # 8-byte Folded Spill -; RV64I-NEXT: sd s8, 72(sp) # 8-byte Folded Spill -; RV64I-NEXT: sd s9, 64(sp) # 8-byte Folded Spill +; RV64I-NEXT: addi sp, sp, -160 +; RV64I-NEXT: sd s0, 152(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s1, 144(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s2, 136(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s3, 128(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s4, 120(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s5, 112(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s6, 104(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s7, 96(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s8, 88(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s9, 80(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s10, 72(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s11, 64(sp) # 8-byte Folded Spill ; RV64I-NEXT: lbu a3, 0(a0) ; RV64I-NEXT: lbu a4, 1(a0) ; RV64I-NEXT: lbu a5, 2(a0) @@ -2179,123 +2280,144 @@ define void @ashr_32bytes(ptr %src.ptr, ptr %bitOff.ptr, ptr %dst) nounwind { ; RV64I-NEXT: lbu s1, 13(a0) ; RV64I-NEXT: lbu s2, 14(a0) ; RV64I-NEXT: lbu s3, 15(a0) -; RV64I-NEXT: slli a4, a4, 8 -; RV64I-NEXT: slli a5, a5, 16 -; RV64I-NEXT: slli a6, a6, 24 -; RV64I-NEXT: or a3, a4, a3 -; RV64I-NEXT: or a4, a6, a5 ; RV64I-NEXT: lbu s4, 16(a0) ; RV64I-NEXT: lbu s5, 17(a0) ; RV64I-NEXT: lbu s6, 18(a0) ; RV64I-NEXT: lbu s7, 19(a0) +; RV64I-NEXT: slli a4, a4, 8 +; RV64I-NEXT: slli a5, a5, 16 +; RV64I-NEXT: slli a6, a6, 24 ; RV64I-NEXT: slli t0, t0, 8 ; RV64I-NEXT: slli t1, t1, 16 ; RV64I-NEXT: slli t2, t2, 24 +; RV64I-NEXT: or a3, a4, a3 +; RV64I-NEXT: or a4, a6, a5 +; RV64I-NEXT: or a5, t0, a7 +; RV64I-NEXT: or a6, t2, t1 +; RV64I-NEXT: lbu s8, 20(a0) +; RV64I-NEXT: lbu s9, 21(a0) +; RV64I-NEXT: lbu s10, 22(a0) +; RV64I-NEXT: lbu s11, 23(a0) ; RV64I-NEXT: slli t4, t4, 8 ; RV64I-NEXT: slli t5, t5, 16 ; RV64I-NEXT: slli t6, t6, 24 -; RV64I-NEXT: or a5, t0, a7 -; RV64I-NEXT: or a6, t2, t1 -; RV64I-NEXT: or a7, t4, t3 -; RV64I-NEXT: or t0, t6, t5 -; RV64I-NEXT: lbu t5, 20(a0) -; RV64I-NEXT: lbu t6, 21(a0) -; RV64I-NEXT: lbu s8, 22(a0) -; RV64I-NEXT: lbu s9, 23(a0) ; RV64I-NEXT: slli s1, s1, 8 ; RV64I-NEXT: slli s2, s2, 16 ; RV64I-NEXT: slli s3, s3, 24 +; RV64I-NEXT: or a7, t4, t3 +; RV64I-NEXT: or t0, t6, t5 +; RV64I-NEXT: or t1, s1, s0 +; RV64I-NEXT: or t2, s3, s2 +; RV64I-NEXT: lbu t6, 24(a0) +; RV64I-NEXT: lbu s0, 25(a0) +; RV64I-NEXT: lbu s1, 26(a0) +; RV64I-NEXT: lbu s2, 27(a0) ; RV64I-NEXT: slli s5, s5, 8 ; RV64I-NEXT: slli s6, s6, 16 ; RV64I-NEXT: slli s7, s7, 24 -; RV64I-NEXT: or t1, s1, s0 -; RV64I-NEXT: or t2, s3, s2 +; RV64I-NEXT: slli s9, s9, 8 ; RV64I-NEXT: or t3, s5, s4 ; RV64I-NEXT: or t4, s7, s6 -; RV64I-NEXT: lbu s0, 24(a0) -; RV64I-NEXT: lbu s1, 25(a0) -; RV64I-NEXT: lbu s2, 26(a0) -; RV64I-NEXT: lbu s3, 27(a0) -; RV64I-NEXT: slli t6, t6, 8 -; RV64I-NEXT: slli s8, s8, 16 -; RV64I-NEXT: slli s9, s9, 24 -; RV64I-NEXT: slli s1, s1, 8 -; RV64I-NEXT: or t5, t6, t5 -; RV64I-NEXT: or t6, s9, s8 -; RV64I-NEXT: or s0, s1, s0 -; RV64I-NEXT: lbu s1, 28(a0) +; RV64I-NEXT: or t5, s9, s8 +; RV64I-NEXT: lbu s3, 28(a0) ; RV64I-NEXT: lbu s4, 29(a0) ; RV64I-NEXT: lbu s5, 30(a0) ; RV64I-NEXT: lbu s6, 31(a0) -; RV64I-NEXT: lbu a0, 0(a1) -; RV64I-NEXT: slli s2, s2, 16 -; RV64I-NEXT: slli s3, s3, 24 -; RV64I-NEXT: or a1, s3, s2 -; RV64I-NEXT: mv s2, sp +; RV64I-NEXT: slli s10, s10, 16 +; RV64I-NEXT: slli s11, s11, 24 +; RV64I-NEXT: slli s0, s0, 8 +; RV64I-NEXT: slli s1, s1, 16 +; RV64I-NEXT: slli s2, s2, 24 ; RV64I-NEXT: slli s4, s4, 8 +; RV64I-NEXT: or a0, s11, s10 +; RV64I-NEXT: or t6, s0, t6 +; RV64I-NEXT: or s0, s2, s1 +; RV64I-NEXT: or s1, s4, s3 +; RV64I-NEXT: lbu s2, 0(a1) +; RV64I-NEXT: lbu s3, 1(a1) +; RV64I-NEXT: lbu s4, 2(a1) +; RV64I-NEXT: lbu s7, 3(a1) ; RV64I-NEXT: slli s5, s5, 16 ; RV64I-NEXT: slli s6, s6, 24 -; RV64I-NEXT: or s1, s4, s1 -; RV64I-NEXT: srli s3, a0, 3 -; RV64I-NEXT: or s4, s6, s5 -; RV64I-NEXT: andi s5, a0, 63 -; RV64I-NEXT: andi s3, s3, 24 -; RV64I-NEXT: xori s5, s5, 63 +; RV64I-NEXT: slli s3, s3, 8 +; RV64I-NEXT: slli s4, s4, 16 +; RV64I-NEXT: slli s7, s7, 24 +; RV64I-NEXT: or s5, s6, s5 +; RV64I-NEXT: or s2, s3, s2 +; RV64I-NEXT: or s3, s7, s4 +; RV64I-NEXT: lbu s4, 5(a1) +; RV64I-NEXT: lbu s6, 4(a1) +; RV64I-NEXT: lbu s7, 6(a1) +; RV64I-NEXT: lbu a1, 7(a1) +; RV64I-NEXT: slli s4, s4, 8 +; RV64I-NEXT: or s4, s4, s6 +; RV64I-NEXT: slli s7, s7, 16 +; RV64I-NEXT: slli a1, a1, 24 +; RV64I-NEXT: or a1, a1, s7 +; RV64I-NEXT: mv s6, sp ; RV64I-NEXT: or a3, a4, a3 ; RV64I-NEXT: or a4, a6, a5 ; RV64I-NEXT: or a5, t0, a7 ; RV64I-NEXT: or a6, t2, t1 ; RV64I-NEXT: or a7, t4, t3 -; RV64I-NEXT: or t0, t6, t5 -; RV64I-NEXT: or a1, a1, s0 -; RV64I-NEXT: or t1, s4, s1 -; RV64I-NEXT: add s2, s2, s3 +; RV64I-NEXT: or a0, a0, t5 +; RV64I-NEXT: or t0, s0, t6 +; RV64I-NEXT: or t1, s5, s1 +; RV64I-NEXT: or t2, s3, s2 +; RV64I-NEXT: or a1, a1, s4 ; RV64I-NEXT: slli a4, a4, 32 ; RV64I-NEXT: slli a6, a6, 32 -; RV64I-NEXT: slli t0, t0, 32 -; RV64I-NEXT: slli t2, t1, 32 +; RV64I-NEXT: slli a0, a0, 32 +; RV64I-NEXT: slli t3, t1, 32 +; RV64I-NEXT: slli a1, a1, 32 ; RV64I-NEXT: sraiw t1, t1, 31 ; RV64I-NEXT: or a3, a4, a3 ; RV64I-NEXT: or a4, a6, a5 -; RV64I-NEXT: or a5, t0, a7 -; RV64I-NEXT: or a1, t2, a1 +; RV64I-NEXT: or a0, a0, a7 +; RV64I-NEXT: or a5, t3, t0 +; RV64I-NEXT: or a1, a1, t2 ; RV64I-NEXT: sd t1, 32(sp) ; RV64I-NEXT: sd t1, 40(sp) ; RV64I-NEXT: sd t1, 48(sp) ; RV64I-NEXT: sd t1, 56(sp) ; RV64I-NEXT: sd a3, 0(sp) ; RV64I-NEXT: sd a4, 8(sp) -; RV64I-NEXT: sd a5, 16(sp) -; RV64I-NEXT: sd a1, 24(sp) -; RV64I-NEXT: ld a1, 8(s2) -; RV64I-NEXT: ld a3, 16(s2) -; RV64I-NEXT: ld a4, 0(s2) -; RV64I-NEXT: ld a5, 24(s2) -; RV64I-NEXT: srl a6, a1, a0 -; RV64I-NEXT: slli a7, a3, 1 -; RV64I-NEXT: srl a4, a4, a0 -; RV64I-NEXT: slli a1, a1, 1 -; RV64I-NEXT: srl a3, a3, a0 +; RV64I-NEXT: sd a0, 16(sp) +; RV64I-NEXT: sd a5, 24(sp) +; RV64I-NEXT: srli a0, a1, 3 +; RV64I-NEXT: andi a3, a1, 63 +; RV64I-NEXT: andi a0, a0, 24 +; RV64I-NEXT: xori a3, a3, 63 +; RV64I-NEXT: add a0, s6, a0 +; RV64I-NEXT: ld a4, 8(a0) +; RV64I-NEXT: ld a5, 16(a0) +; RV64I-NEXT: ld a6, 0(a0) +; RV64I-NEXT: ld a0, 24(a0) +; RV64I-NEXT: srl a7, a4, a1 ; RV64I-NEXT: slli t0, a5, 1 -; RV64I-NEXT: sra a5, a5, a0 -; RV64I-NEXT: sll a0, a7, s5 -; RV64I-NEXT: sll a1, a1, s5 -; RV64I-NEXT: sll a7, t0, s5 -; RV64I-NEXT: srli t0, a5, 56 -; RV64I-NEXT: srli t1, a5, 48 -; RV64I-NEXT: srli t2, a5, 40 -; RV64I-NEXT: srli t3, a5, 32 -; RV64I-NEXT: srli t4, a5, 24 -; RV64I-NEXT: srli t5, a5, 16 -; RV64I-NEXT: srli t6, a5, 8 -; RV64I-NEXT: or a0, a6, a0 -; RV64I-NEXT: or a1, a4, a1 -; RV64I-NEXT: or a3, a3, a7 +; RV64I-NEXT: srl a6, a6, a1 +; RV64I-NEXT: slli a4, a4, 1 +; RV64I-NEXT: srl a5, a5, a1 +; RV64I-NEXT: slli t1, a0, 1 +; RV64I-NEXT: sra t2, a0, a1 +; RV64I-NEXT: sll a0, t0, a3 +; RV64I-NEXT: sll a1, a4, a3 +; RV64I-NEXT: sll a3, t1, a3 +; RV64I-NEXT: srli a4, t2, 56 +; RV64I-NEXT: srli t0, t2, 48 +; RV64I-NEXT: srli t1, t2, 40 +; RV64I-NEXT: srli t3, t2, 32 +; RV64I-NEXT: srli t4, t2, 24 +; RV64I-NEXT: srli t5, t2, 16 +; RV64I-NEXT: srli t6, t2, 8 +; RV64I-NEXT: or a0, a7, a0 +; RV64I-NEXT: or a1, a6, a1 +; RV64I-NEXT: or a3, a5, a3 ; RV64I-NEXT: sb t3, 28(a2) -; RV64I-NEXT: sb t2, 29(a2) -; RV64I-NEXT: sb t1, 30(a2) -; RV64I-NEXT: sb t0, 31(a2) -; RV64I-NEXT: sb a5, 24(a2) +; RV64I-NEXT: sb t1, 29(a2) +; RV64I-NEXT: sb t0, 30(a2) +; RV64I-NEXT: sb a4, 31(a2) +; RV64I-NEXT: sb t2, 24(a2) ; RV64I-NEXT: sb t6, 25(a2) ; RV64I-NEXT: sb t5, 26(a2) ; RV64I-NEXT: sb t4, 27(a2) @@ -2316,45 +2438,47 @@ define void @ashr_32bytes(ptr %src.ptr, ptr %bitOff.ptr, ptr %dst) nounwind { ; RV64I-NEXT: srli s3, a0, 56 ; RV64I-NEXT: srli s4, a0, 48 ; RV64I-NEXT: srli s5, a0, 40 +; RV64I-NEXT: srli s6, a0, 32 ; RV64I-NEXT: sb a7, 20(a2) ; RV64I-NEXT: sb a6, 21(a2) ; RV64I-NEXT: sb a5, 22(a2) ; RV64I-NEXT: sb a4, 23(a2) -; RV64I-NEXT: srli a4, a0, 32 +; RV64I-NEXT: srli a4, a0, 24 ; RV64I-NEXT: sb a3, 16(a2) ; RV64I-NEXT: sb t2, 17(a2) ; RV64I-NEXT: sb t1, 18(a2) ; RV64I-NEXT: sb t0, 19(a2) -; RV64I-NEXT: srli a3, a0, 24 +; RV64I-NEXT: srli a3, a0, 16 ; RV64I-NEXT: sb t6, 4(a2) ; RV64I-NEXT: sb t5, 5(a2) ; RV64I-NEXT: sb t4, 6(a2) ; RV64I-NEXT: sb t3, 7(a2) -; RV64I-NEXT: srli a5, a0, 16 +; RV64I-NEXT: srli a5, a0, 8 ; RV64I-NEXT: sb a1, 0(a2) ; RV64I-NEXT: sb s2, 1(a2) ; RV64I-NEXT: sb s1, 2(a2) ; RV64I-NEXT: sb s0, 3(a2) -; RV64I-NEXT: srli a1, a0, 8 -; RV64I-NEXT: sb a4, 12(a2) +; RV64I-NEXT: sb s6, 12(a2) ; RV64I-NEXT: sb s5, 13(a2) ; RV64I-NEXT: sb s4, 14(a2) ; RV64I-NEXT: sb s3, 15(a2) ; RV64I-NEXT: sb a0, 8(a2) -; RV64I-NEXT: sb a1, 9(a2) -; RV64I-NEXT: sb a5, 10(a2) -; RV64I-NEXT: sb a3, 11(a2) -; RV64I-NEXT: ld s0, 136(sp) # 8-byte Folded Reload -; RV64I-NEXT: ld s1, 128(sp) # 8-byte Folded Reload -; RV64I-NEXT: ld s2, 120(sp) # 8-byte Folded Reload -; RV64I-NEXT: ld s3, 112(sp) # 8-byte Folded Reload -; RV64I-NEXT: ld s4, 104(sp) # 8-byte Folded Reload -; RV64I-NEXT: ld s5, 96(sp) # 8-byte Folded Reload -; RV64I-NEXT: ld s6, 88(sp) # 8-byte Folded Reload -; RV64I-NEXT: ld s7, 80(sp) # 8-byte Folded Reload -; RV64I-NEXT: ld s8, 72(sp) # 8-byte Folded Reload -; RV64I-NEXT: ld s9, 64(sp) # 8-byte Folded Reload -; RV64I-NEXT: addi sp, sp, 144 +; RV64I-NEXT: sb a5, 9(a2) +; RV64I-NEXT: sb a3, 10(a2) +; RV64I-NEXT: sb a4, 11(a2) +; RV64I-NEXT: ld s0, 152(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s1, 144(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s2, 136(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s3, 128(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s4, 120(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s5, 112(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s6, 104(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s7, 96(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s8, 88(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s9, 80(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s10, 72(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s11, 64(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 160 ; RV64I-NEXT: ret ; ; RV32I-LABEL: ashr_32bytes: @@ -2379,148 +2503,159 @@ define void @ashr_32bytes(ptr %src.ptr, ptr %bitOff.ptr, ptr %dst) nounwind { ; RV32I-NEXT: lbu a7, 3(a0) ; RV32I-NEXT: lbu a5, 4(a0) ; RV32I-NEXT: lbu t0, 5(a0) -; RV32I-NEXT: lbu t3, 6(a0) -; RV32I-NEXT: lbu t4, 7(a0) -; RV32I-NEXT: lbu t6, 8(a0) -; RV32I-NEXT: lbu s0, 9(a0) -; RV32I-NEXT: lbu s4, 10(a0) -; RV32I-NEXT: lbu s5, 11(a0) -; RV32I-NEXT: lbu s6, 12(a0) -; RV32I-NEXT: lbu s7, 13(a0) -; RV32I-NEXT: lbu s8, 14(a0) -; RV32I-NEXT: lbu s9, 15(a0) -; RV32I-NEXT: lbu s10, 16(a0) -; RV32I-NEXT: lbu s11, 17(a0) -; RV32I-NEXT: lbu s2, 18(a0) -; RV32I-NEXT: lbu s3, 19(a0) +; RV32I-NEXT: lbu t1, 6(a0) +; RV32I-NEXT: lbu t2, 7(a0) +; RV32I-NEXT: lbu t3, 8(a0) +; RV32I-NEXT: lbu t4, 9(a0) +; RV32I-NEXT: lbu t5, 10(a0) +; RV32I-NEXT: lbu t6, 11(a0) +; RV32I-NEXT: lbu s0, 12(a0) +; RV32I-NEXT: lbu s1, 13(a0) +; RV32I-NEXT: lbu s2, 14(a0) +; RV32I-NEXT: lbu s3, 15(a0) +; RV32I-NEXT: lbu s4, 16(a0) +; RV32I-NEXT: lbu s5, 17(a0) +; RV32I-NEXT: lbu s6, 18(a0) +; RV32I-NEXT: lbu s7, 19(a0) ; RV32I-NEXT: slli a4, a4, 8 ; RV32I-NEXT: slli a6, a6, 16 ; RV32I-NEXT: slli a7, a7, 24 ; RV32I-NEXT: or a3, a4, a3 +; RV32I-NEXT: sw a3, 4(sp) # 4-byte Folded Spill ; RV32I-NEXT: or a4, a7, a6 -; RV32I-NEXT: lbu t1, 20(a0) -; RV32I-NEXT: lbu t2, 21(a0) -; RV32I-NEXT: lbu t5, 22(a0) -; RV32I-NEXT: lbu s1, 23(a0) +; RV32I-NEXT: lbu s8, 20(a0) +; RV32I-NEXT: lbu s9, 21(a0) +; RV32I-NEXT: lbu s10, 22(a0) +; RV32I-NEXT: lbu s11, 23(a0) ; RV32I-NEXT: slli t0, t0, 8 -; RV32I-NEXT: slli t3, t3, 16 -; RV32I-NEXT: slli t4, t4, 24 -; RV32I-NEXT: slli s0, s0, 8 -; RV32I-NEXT: slli s4, s4, 16 -; RV32I-NEXT: slli s5, s5, 24 +; RV32I-NEXT: slli t1, t1, 16 +; RV32I-NEXT: slli t2, t2, 24 +; RV32I-NEXT: slli t4, t4, 8 +; RV32I-NEXT: slli t5, t5, 16 +; RV32I-NEXT: slli t6, t6, 24 ; RV32I-NEXT: or a5, t0, a5 -; RV32I-NEXT: or a6, t4, t3 -; RV32I-NEXT: or a7, s0, t6 -; RV32I-NEXT: or t0, s5, s4 -; RV32I-NEXT: lbu t3, 24(a0) -; RV32I-NEXT: lbu s4, 25(a0) -; RV32I-NEXT: lbu s5, 26(a0) -; RV32I-NEXT: lbu ra, 27(a0) -; RV32I-NEXT: slli s7, s7, 8 -; RV32I-NEXT: slli s8, s8, 16 -; RV32I-NEXT: slli s9, s9, 24 -; RV32I-NEXT: slli s11, s11, 8 -; RV32I-NEXT: or t4, s7, s6 -; RV32I-NEXT: or t6, s9, s8 -; RV32I-NEXT: or s0, s11, s10 -; RV32I-NEXT: lbu s6, 28(a0) -; RV32I-NEXT: lbu s7, 29(a0) -; RV32I-NEXT: lbu s8, 30(a0) -; RV32I-NEXT: lbu s9, 31(a0) -; RV32I-NEXT: lbu a0, 0(a1) +; RV32I-NEXT: or a6, t2, t1 +; RV32I-NEXT: or a7, t4, t3 +; RV32I-NEXT: or t0, t6, t5 +; RV32I-NEXT: lbu ra, 24(a0) +; RV32I-NEXT: lbu a3, 25(a0) +; RV32I-NEXT: lbu t4, 26(a0) +; RV32I-NEXT: lbu t5, 27(a0) +; RV32I-NEXT: slli s1, s1, 8 ; RV32I-NEXT: slli s2, s2, 16 ; RV32I-NEXT: slli s3, s3, 24 -; RV32I-NEXT: or s2, s3, s2 -; RV32I-NEXT: addi s3, sp, 8 -; RV32I-NEXT: slli t2, t2, 8 -; RV32I-NEXT: slli t5, t5, 16 -; RV32I-NEXT: slli s1, s1, 24 -; RV32I-NEXT: slli s4, s4, 8 -; RV32I-NEXT: slli s5, s5, 16 -; RV32I-NEXT: slli ra, ra, 24 -; RV32I-NEXT: slli s7, s7, 8 -; RV32I-NEXT: slli s8, s8, 16 -; RV32I-NEXT: slli s9, s9, 24 -; RV32I-NEXT: or t1, t2, t1 -; RV32I-NEXT: srli a1, a0, 3 +; RV32I-NEXT: slli s5, s5, 8 +; RV32I-NEXT: or t1, s1, s0 +; RV32I-NEXT: or t2, s3, s2 +; RV32I-NEXT: or t3, s5, s4 +; RV32I-NEXT: lbu t6, 28(a0) +; RV32I-NEXT: lbu s0, 29(a0) +; RV32I-NEXT: lbu s1, 30(a0) +; RV32I-NEXT: lbu a0, 31(a0) +; RV32I-NEXT: slli s6, s6, 16 +; RV32I-NEXT: slli s7, s7, 24 +; RV32I-NEXT: slli s9, s9, 8 +; RV32I-NEXT: slli s10, s10, 16 +; RV32I-NEXT: slli s11, s11, 24 +; RV32I-NEXT: or s2, s7, s6 +; RV32I-NEXT: or s3, s9, s8 +; RV32I-NEXT: or s4, s11, s10 +; RV32I-NEXT: lbu s5, 0(a1) +; RV32I-NEXT: lbu s6, 1(a1) +; RV32I-NEXT: lbu s7, 2(a1) +; RV32I-NEXT: lbu a1, 3(a1) +; RV32I-NEXT: slli a3, a3, 8 +; RV32I-NEXT: or a3, a3, ra +; RV32I-NEXT: addi s8, sp, 8 +; RV32I-NEXT: slli t4, t4, 16 +; RV32I-NEXT: slli t5, t5, 24 +; RV32I-NEXT: slli s0, s0, 8 +; RV32I-NEXT: slli s1, s1, 16 +; RV32I-NEXT: slli a0, a0, 24 +; RV32I-NEXT: slli s6, s6, 8 +; RV32I-NEXT: slli s7, s7, 16 +; RV32I-NEXT: slli a1, a1, 24 +; RV32I-NEXT: or t4, t5, t4 +; RV32I-NEXT: or t5, s0, t6 +; RV32I-NEXT: or s1, a0, s1 +; RV32I-NEXT: or t6, s6, s5 +; RV32I-NEXT: or a1, a1, s7 +; RV32I-NEXT: srai s0, a0, 31 +; RV32I-NEXT: lw a0, 4(sp) # 4-byte Folded Reload +; RV32I-NEXT: or a4, a4, a0 +; RV32I-NEXT: or a5, a6, a5 +; RV32I-NEXT: or a6, t0, a7 +; RV32I-NEXT: or a7, t2, t1 +; RV32I-NEXT: or t0, s2, t3 +; RV32I-NEXT: or t1, s4, s3 +; RV32I-NEXT: or a3, t4, a3 ; RV32I-NEXT: or t2, s1, t5 -; RV32I-NEXT: andi t5, a0, 31 -; RV32I-NEXT: or t3, s4, t3 -; RV32I-NEXT: or s1, ra, s5 -; RV32I-NEXT: or s4, s7, s6 -; RV32I-NEXT: or s5, s9, s8 -; RV32I-NEXT: srai s6, s9, 31 -; RV32I-NEXT: andi s7, a1, 28 -; RV32I-NEXT: xori a1, t5, 31 -; RV32I-NEXT: or a3, a4, a3 -; RV32I-NEXT: or a4, a6, a5 -; RV32I-NEXT: or a5, t0, a7 -; RV32I-NEXT: or a6, t6, t4 -; RV32I-NEXT: or a7, s2, s0 -; RV32I-NEXT: or t0, t2, t1 -; RV32I-NEXT: or t1, s1, t3 -; RV32I-NEXT: or t2, s5, s4 -; RV32I-NEXT: sw s6, 56(sp) -; RV32I-NEXT: sw s6, 60(sp) -; RV32I-NEXT: sw s6, 64(sp) -; RV32I-NEXT: sw s6, 68(sp) -; RV32I-NEXT: sw s6, 40(sp) -; RV32I-NEXT: sw s6, 44(sp) -; RV32I-NEXT: sw s6, 48(sp) -; RV32I-NEXT: sw s6, 52(sp) -; RV32I-NEXT: add s3, s3, s7 -; RV32I-NEXT: sw a7, 24(sp) -; RV32I-NEXT: sw t0, 28(sp) -; RV32I-NEXT: sw t1, 32(sp) +; RV32I-NEXT: or a0, a1, t6 +; RV32I-NEXT: sw s0, 56(sp) +; RV32I-NEXT: sw s0, 60(sp) +; RV32I-NEXT: sw s0, 64(sp) +; RV32I-NEXT: sw s0, 68(sp) +; RV32I-NEXT: sw s0, 40(sp) +; RV32I-NEXT: sw s0, 44(sp) +; RV32I-NEXT: sw s0, 48(sp) +; RV32I-NEXT: sw s0, 52(sp) +; RV32I-NEXT: sw t0, 24(sp) +; RV32I-NEXT: sw t1, 28(sp) +; RV32I-NEXT: sw a3, 32(sp) ; RV32I-NEXT: sw t2, 36(sp) -; RV32I-NEXT: sw a3, 8(sp) -; RV32I-NEXT: sw a4, 12(sp) -; RV32I-NEXT: sw a5, 16(sp) -; RV32I-NEXT: sw a6, 20(sp) -; RV32I-NEXT: lw a3, 0(s3) -; RV32I-NEXT: lw a4, 4(s3) -; RV32I-NEXT: lw a5, 8(s3) -; RV32I-NEXT: lw a6, 12(s3) -; RV32I-NEXT: lw a7, 16(s3) -; RV32I-NEXT: lw t0, 20(s3) -; RV32I-NEXT: lw t1, 24(s3) -; RV32I-NEXT: lw t2, 28(s3) -; RV32I-NEXT: srl t3, a4, a0 -; RV32I-NEXT: slli t4, a5, 1 +; RV32I-NEXT: sw a4, 8(sp) +; RV32I-NEXT: sw a5, 12(sp) +; RV32I-NEXT: sw a6, 16(sp) +; RV32I-NEXT: sw a7, 20(sp) +; RV32I-NEXT: srli a1, a0, 3 +; RV32I-NEXT: andi a3, a0, 31 +; RV32I-NEXT: andi a4, a1, 28 +; RV32I-NEXT: xori a1, a3, 31 +; RV32I-NEXT: add a4, s8, a4 +; RV32I-NEXT: lw a3, 0(a4) +; RV32I-NEXT: lw a5, 4(a4) +; RV32I-NEXT: lw a6, 8(a4) +; RV32I-NEXT: lw a7, 12(a4) +; RV32I-NEXT: lw t0, 16(a4) +; RV32I-NEXT: lw t1, 20(a4) +; RV32I-NEXT: lw t2, 24(a4) +; RV32I-NEXT: lw a4, 28(a4) +; RV32I-NEXT: srl t3, a5, a0 +; RV32I-NEXT: slli t4, a6, 1 ; RV32I-NEXT: srl a3, a3, a0 -; RV32I-NEXT: slli a4, a4, 1 -; RV32I-NEXT: srl t5, a6, a0 -; RV32I-NEXT: slli t6, a7, 1 -; RV32I-NEXT: srl a5, a5, a0 -; RV32I-NEXT: slli a6, a6, 1 -; RV32I-NEXT: srl s0, t0, a0 -; RV32I-NEXT: slli s1, t1, 1 -; RV32I-NEXT: srl a7, a7, a0 -; RV32I-NEXT: slli t0, t0, 1 -; RV32I-NEXT: srl t1, t1, a0 -; RV32I-NEXT: slli s2, t2, 1 -; RV32I-NEXT: sra t2, t2, a0 +; RV32I-NEXT: slli a5, a5, 1 +; RV32I-NEXT: srl t5, a7, a0 +; RV32I-NEXT: slli t6, t0, 1 +; RV32I-NEXT: srl a6, a6, a0 +; RV32I-NEXT: slli a7, a7, 1 +; RV32I-NEXT: srl s0, t1, a0 +; RV32I-NEXT: slli s1, t2, 1 +; RV32I-NEXT: srl t0, t0, a0 +; RV32I-NEXT: slli t1, t1, 1 +; RV32I-NEXT: srl t2, t2, a0 +; RV32I-NEXT: slli s2, a4, 1 +; RV32I-NEXT: sra s3, a4, a0 ; RV32I-NEXT: sll a0, t4, a1 -; RV32I-NEXT: sll a4, a4, a1 -; RV32I-NEXT: sll t4, t6, a1 -; RV32I-NEXT: sll a6, a6, a1 -; RV32I-NEXT: sll t6, s1, a1 -; RV32I-NEXT: sll t0, t0, a1 -; RV32I-NEXT: sll s1, s2, a1 -; RV32I-NEXT: srli s2, t2, 24 -; RV32I-NEXT: srli s3, t2, 16 -; RV32I-NEXT: srli s4, t2, 8 +; RV32I-NEXT: sll a4, a5, a1 +; RV32I-NEXT: sll a5, t6, a1 +; RV32I-NEXT: sll a7, a7, a1 +; RV32I-NEXT: sll t4, s1, a1 +; RV32I-NEXT: sll t1, t1, a1 +; RV32I-NEXT: sll t6, s2, a1 +; RV32I-NEXT: srli s1, s3, 24 +; RV32I-NEXT: srli s2, s3, 16 +; RV32I-NEXT: srli s4, s3, 8 ; RV32I-NEXT: or a0, t3, a0 ; RV32I-NEXT: or a1, a3, a4 -; RV32I-NEXT: or a3, t5, t4 -; RV32I-NEXT: or a4, a5, a6 -; RV32I-NEXT: or a5, s0, t6 -; RV32I-NEXT: or a6, a7, t0 -; RV32I-NEXT: or a7, t1, s1 -; RV32I-NEXT: sb t2, 28(a2) +; RV32I-NEXT: or a3, t5, a5 +; RV32I-NEXT: or a4, a6, a7 +; RV32I-NEXT: or a5, s0, t4 +; RV32I-NEXT: or a6, t0, t1 +; RV32I-NEXT: or a7, t2, t6 +; RV32I-NEXT: sb s3, 28(a2) ; RV32I-NEXT: sb s4, 29(a2) -; RV32I-NEXT: sb s3, 30(a2) -; RV32I-NEXT: sb s2, 31(a2) +; RV32I-NEXT: sb s2, 30(a2) +; RV32I-NEXT: sb s1, 31(a2) ; RV32I-NEXT: srli t0, a7, 24 ; RV32I-NEXT: srli t1, a7, 16 ; RV32I-NEXT: srli t2, a7, 8 diff --git a/llvm/test/CodeGen/RISCV/xandesbfhcvt.ll b/llvm/test/CodeGen/RISCV/xandesbfhcvt.ll index 854d0b6..72242f1 100644 --- a/llvm/test/CodeGen/RISCV/xandesbfhcvt.ll +++ b/llvm/test/CodeGen/RISCV/xandesbfhcvt.ll @@ -1,8 +1,12 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mtriple=riscv32 -mattr=+xandesbfhcvt -target-abi ilp32f \ -; RUN: -verify-machineinstrs < %s | FileCheck %s +; RUN: -verify-machineinstrs < %s | FileCheck --check-prefixes=CHECK,XANDESBFHCVT %s +; RUN: llc -mtriple=riscv32 -mattr=+zfh,+xandesbfhcvt -target-abi ilp32f \ +; RUN: -verify-machineinstrs < %s | FileCheck --check-prefixes=CHECK,ZFH %s ; RUN: llc -mtriple=riscv64 -mattr=+xandesbfhcvt -target-abi lp64f \ -; RUN: -verify-machineinstrs < %s | FileCheck %s +; RUN: -verify-machineinstrs < %s | FileCheck --check-prefixes=CHECK,XANDESBFHCVT %s +; RUN: llc -mtriple=riscv64 -mattr=+zfh,+xandesbfhcvt -target-abi lp64f \ +; RUN: -verify-machineinstrs < %s | FileCheck --check-prefixes=CHECK,ZFH %s define float @fcvt_s_bf16(bfloat %a) nounwind { ; CHECK-LABEL: fcvt_s_bf16: @@ -21,3 +25,40 @@ define bfloat @fcvt_bf16_s(float %a) nounwind { %1 = fptrunc float %a to bfloat ret bfloat %1 } + +; Check load and store to bf16. +define void @loadstorebf16(ptr %bf, ptr %sf) nounwind { +; XANDESBFHCVT-LABEL: loadstorebf16: +; XANDESBFHCVT: # %bb.0: # %entry +; XANDESBFHCVT-NEXT: lhu a2, 0(a0) +; XANDESBFHCVT-NEXT: lui a3, 1048560 +; XANDESBFHCVT-NEXT: or a2, a2, a3 +; XANDESBFHCVT-NEXT: fmv.w.x fa5, a2 +; XANDESBFHCVT-NEXT: nds.fcvt.s.bf16 fa5, fa5 +; XANDESBFHCVT-NEXT: fsw fa5, 0(a1) +; XANDESBFHCVT-NEXT: flw fa5, 0(a1) +; XANDESBFHCVT-NEXT: nds.fcvt.bf16.s fa5, fa5 +; XANDESBFHCVT-NEXT: fmv.x.w a1, fa5 +; XANDESBFHCVT-NEXT: sh a1, 0(a0) +; XANDESBFHCVT-NEXT: ret +; +; ZFH-LABEL: loadstorebf16: +; ZFH: # %bb.0: # %entry +; ZFH-NEXT: flh fa5, 0(a0) +; ZFH-NEXT: nds.fcvt.s.bf16 fa5, fa5 +; ZFH-NEXT: fsw fa5, 0(a1) +; ZFH-NEXT: flw fa5, 0(a1) +; ZFH-NEXT: nds.fcvt.bf16.s fa5, fa5 +; ZFH-NEXT: fsh fa5, 0(a0) +; ZFH-NEXT: ret +entry: + %0 = load bfloat, bfloat* %bf, align 2 + %1 = fpext bfloat %0 to float + store volatile float %1, float* %sf, align 4 + + %2 = load float, float* %sf, align 4 + %3 = fptrunc float %2 to bfloat + store volatile bfloat %3, bfloat* %bf, align 2 + + ret void +} diff --git a/llvm/test/CodeGen/RISCV/xmips-cbop.ll b/llvm/test/CodeGen/RISCV/xmips-cbop.ll index cbbd1de..0d5defc 100644 --- a/llvm/test/CodeGen/RISCV/xmips-cbop.ll +++ b/llvm/test/CodeGen/RISCV/xmips-cbop.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 -; RUN: llc -mtriple=riscv32 -mattr=+xmipscbop -mattr=+m -verify-machineinstrs < %s \ +; RUN: llc -mtriple=riscv32 -mattr=+xmipscbop -verify-machineinstrs < %s \ ; RUN: | FileCheck %s -check-prefix=RV32XMIPSPREFETCH -; RUN: llc -mtriple=riscv64 -mattr=+xmipscbop -mattr=+m -verify-machineinstrs < %s \ +; RUN: llc -mtriple=riscv64 -mattr=+xmipscbop -verify-machineinstrs < %s \ ; RUN: | FileCheck %s -check-prefix=RV64XMIPSPREFETCH define void @prefetch_data_read(ptr noundef %ptr) nounwind { @@ -49,3 +49,54 @@ define void @prefetch_inst_read(ptr noundef %ptr) nounwind { tail call void @llvm.prefetch.p0(ptr nonnull %arrayidx, i32 0, i32 0, i32 0) ret void } + +define void @prefetch_frameindex_test_neg() nounwind { +; RV32XMIPSPREFETCH-LABEL: prefetch_frameindex_test_neg: +; RV32XMIPSPREFETCH: # %bb.0: +; RV32XMIPSPREFETCH-NEXT: lui a0, 1 +; RV32XMIPSPREFETCH-NEXT: addi a0, a0, 16 +; RV32XMIPSPREFETCH-NEXT: sub sp, sp, a0 +; RV32XMIPSPREFETCH-NEXT: addi a0, sp, 524 +; RV32XMIPSPREFETCH-NEXT: mips.pref 8, 0(a0) +; RV32XMIPSPREFETCH-NEXT: lui a0, 1 +; RV32XMIPSPREFETCH-NEXT: addi a0, a0, 16 +; RV32XMIPSPREFETCH-NEXT: add sp, sp, a0 +; RV32XMIPSPREFETCH-NEXT: ret +; +; RV64XMIPSPREFETCH-LABEL: prefetch_frameindex_test_neg: +; RV64XMIPSPREFETCH: # %bb.0: +; RV64XMIPSPREFETCH-NEXT: lui a0, 1 +; RV64XMIPSPREFETCH-NEXT: addi a0, a0, 16 +; RV64XMIPSPREFETCH-NEXT: sub sp, sp, a0 +; RV64XMIPSPREFETCH-NEXT: addi a0, sp, 524 +; RV64XMIPSPREFETCH-NEXT: mips.pref 8, 0(a0) +; RV64XMIPSPREFETCH-NEXT: lui a0, 1 +; RV64XMIPSPREFETCH-NEXT: addi a0, a0, 16 +; RV64XMIPSPREFETCH-NEXT: add sp, sp, a0 +; RV64XMIPSPREFETCH-NEXT: ret + %data = alloca [1024 x i32], align 4 + %ptr = getelementptr [1024 x i32], ptr %data, i32 0, i32 127 + call void @llvm.prefetch(ptr %ptr, i32 0, i32 0, i32 1) + ret void +} + +define void @prefetch_frameindex_test() nounwind { +; RV32XMIPSPREFETCH-LABEL: prefetch_frameindex_test: +; RV32XMIPSPREFETCH: # %bb.0: +; RV32XMIPSPREFETCH-NEXT: addi sp, sp, -512 +; RV32XMIPSPREFETCH-NEXT: mips.pref 8, 32(sp) +; RV32XMIPSPREFETCH-NEXT: addi sp, sp, 512 +; RV32XMIPSPREFETCH-NEXT: ret +; +; RV64XMIPSPREFETCH-LABEL: prefetch_frameindex_test: +; RV64XMIPSPREFETCH: # %bb.0: +; RV64XMIPSPREFETCH-NEXT: addi sp, sp, -512 +; RV64XMIPSPREFETCH-NEXT: mips.pref 8, 32(sp) +; RV64XMIPSPREFETCH-NEXT: addi sp, sp, 512 +; RV64XMIPSPREFETCH-NEXT: ret + %data = alloca [128 x i32], align 4 + %base = bitcast ptr %data to ptr + %ptr = getelementptr [128 x i32], ptr %base, i32 0, i32 8 + call void @llvm.prefetch(ptr %ptr, i32 0, i32 0, i32 1) + ret void +} diff --git a/llvm/test/CodeGen/RISCV/xqciac.ll b/llvm/test/CodeGen/RISCV/xqciac.ll index 4c77b39..6fdc63f 100644 --- a/llvm/test/CodeGen/RISCV/xqciac.ll +++ b/llvm/test/CodeGen/RISCV/xqciac.ll @@ -463,3 +463,30 @@ entry: %add = add nsw i32 %shlc1, %shlc2 ret i32 %add } + +define i32 @testmuliaddnegimm(i32 %a) { +; RV32IM-LABEL: testmuliaddnegimm: +; RV32IM: # %bb.0: +; RV32IM-NEXT: slli a1, a0, 1 +; RV32IM-NEXT: add a0, a1, a0 +; RV32IM-NEXT: li a1, 3 +; RV32IM-NEXT: sub a0, a1, a0 +; RV32IM-NEXT: ret +; +; RV32IMXQCIAC-LABEL: testmuliaddnegimm: +; RV32IMXQCIAC: # %bb.0: +; RV32IMXQCIAC-NEXT: li a1, 3 +; RV32IMXQCIAC-NEXT: qc.muliadd a1, a0, -3 +; RV32IMXQCIAC-NEXT: mv a0, a1 +; RV32IMXQCIAC-NEXT: ret +; +; RV32IZBAMXQCIAC-LABEL: testmuliaddnegimm: +; RV32IZBAMXQCIAC: # %bb.0: +; RV32IZBAMXQCIAC-NEXT: li a1, 3 +; RV32IZBAMXQCIAC-NEXT: qc.muliadd a1, a0, -3 +; RV32IZBAMXQCIAC-NEXT: mv a0, a1 +; RV32IZBAMXQCIAC-NEXT: ret + %mul = mul i32 %a, -3 + %add = add i32 %mul, 3 + ret i32 %add +} diff --git a/llvm/test/CodeGen/RISCV/xqcisls.ll b/llvm/test/CodeGen/RISCV/xqcisls.ll index 709dc4c..3dea540 100644 --- a/llvm/test/CodeGen/RISCV/xqcisls.ll +++ b/llvm/test/CodeGen/RISCV/xqcisls.ll @@ -308,13 +308,13 @@ define i64 @lrd(ptr %a, i32 %b) { ; ; RV32IZBAXQCISLS-LABEL: lrd: ; RV32IZBAXQCISLS: # %bb.0: -; RV32IZBAXQCISLS-NEXT: qc.lrw a2, a0, a1, 3 -; RV32IZBAXQCISLS-NEXT: addi a0, a0, 4 -; RV32IZBAXQCISLS-NEXT: qc.lrw a1, a0, a1, 3 -; RV32IZBAXQCISLS-NEXT: add a0, a2, a2 -; RV32IZBAXQCISLS-NEXT: sltu a2, a0, a2 -; RV32IZBAXQCISLS-NEXT: add a1, a1, a1 -; RV32IZBAXQCISLS-NEXT: add a1, a1, a2 +; RV32IZBAXQCISLS-NEXT: sh3add a0, a1, a0 +; RV32IZBAXQCISLS-NEXT: lw a1, 0(a0) +; RV32IZBAXQCISLS-NEXT: lw a2, 4(a0) +; RV32IZBAXQCISLS-NEXT: add a0, a1, a1 +; RV32IZBAXQCISLS-NEXT: sltu a1, a0, a1 +; RV32IZBAXQCISLS-NEXT: add a2, a2, a2 +; RV32IZBAXQCISLS-NEXT: add a1, a2, a1 ; RV32IZBAXQCISLS-NEXT: ret %1 = getelementptr i64, ptr %a, i32 %b %2 = load i64, ptr %1, align 8 @@ -348,14 +348,13 @@ define i64 @lrd_2(ptr %a, i32 %b) { ; ; RV32IZBAXQCISLS-LABEL: lrd_2: ; RV32IZBAXQCISLS: # %bb.0: -; RV32IZBAXQCISLS-NEXT: addi a2, a0, 96 -; RV32IZBAXQCISLS-NEXT: qc.lrw a2, a2, a1, 3 -; RV32IZBAXQCISLS-NEXT: addi a0, a0, 100 -; RV32IZBAXQCISLS-NEXT: qc.lrw a1, a0, a1, 3 -; RV32IZBAXQCISLS-NEXT: add a0, a2, a2 -; RV32IZBAXQCISLS-NEXT: sltu a2, a0, a2 -; RV32IZBAXQCISLS-NEXT: add a1, a1, a1 -; RV32IZBAXQCISLS-NEXT: add a1, a1, a2 +; RV32IZBAXQCISLS-NEXT: sh3add a0, a1, a0 +; RV32IZBAXQCISLS-NEXT: lw a1, 96(a0) +; RV32IZBAXQCISLS-NEXT: lw a2, 100(a0) +; RV32IZBAXQCISLS-NEXT: add a0, a1, a1 +; RV32IZBAXQCISLS-NEXT: sltu a1, a0, a1 +; RV32IZBAXQCISLS-NEXT: add a2, a2, a2 +; RV32IZBAXQCISLS-NEXT: add a1, a2, a1 ; RV32IZBAXQCISLS-NEXT: ret %1 = add i32 %b, 12 %2 = getelementptr i64, ptr %a, i32 %1 @@ -472,11 +471,11 @@ define void @srd(ptr %a, i32 %b, i64 %c) { ; RV32IZBAXQCISLS: # %bb.0: ; RV32IZBAXQCISLS-NEXT: add a4, a2, a2 ; RV32IZBAXQCISLS-NEXT: add a3, a3, a3 -; RV32IZBAXQCISLS-NEXT: sltu a2, a4, a2 -; RV32IZBAXQCISLS-NEXT: qc.srw a4, a0, a1, 3 -; RV32IZBAXQCISLS-NEXT: add a2, a3, a2 -; RV32IZBAXQCISLS-NEXT: addi a0, a0, 4 -; RV32IZBAXQCISLS-NEXT: qc.srw a2, a0, a1, 3 +; RV32IZBAXQCISLS-NEXT: sh3add a0, a1, a0 +; RV32IZBAXQCISLS-NEXT: sltu a1, a4, a2 +; RV32IZBAXQCISLS-NEXT: add a1, a3, a1 +; RV32IZBAXQCISLS-NEXT: sw a4, 0(a0) +; RV32IZBAXQCISLS-NEXT: sw a1, 4(a0) ; RV32IZBAXQCISLS-NEXT: ret %1 = add i64 %c, %c %2 = getelementptr i64, ptr %a, i32 %b @@ -503,10 +502,10 @@ define i64 @lrd_large_shift(ptr %a, i32 %b) { ; ; RV32IZBAXQCISLS-LABEL: lrd_large_shift: ; RV32IZBAXQCISLS: # %bb.0: -; RV32IZBAXQCISLS-NEXT: addi a2, a0, 384 -; RV32IZBAXQCISLS-NEXT: addi a3, a0, 388 -; RV32IZBAXQCISLS-NEXT: qc.lrw a0, a2, a1, 5 -; RV32IZBAXQCISLS-NEXT: qc.lrw a1, a3, a1, 5 +; RV32IZBAXQCISLS-NEXT: slli a1, a1, 5 +; RV32IZBAXQCISLS-NEXT: add a1, a1, a0 +; RV32IZBAXQCISLS-NEXT: lw a0, 384(a1) +; RV32IZBAXQCISLS-NEXT: lw a1, 388(a1) ; RV32IZBAXQCISLS-NEXT: ret %1 = add i32 %b, 12 %2 = shl i32 %1, 2 diff --git a/llvm/test/CodeGen/RISCV/xtheadfmemidx.ll b/llvm/test/CodeGen/RISCV/xtheadfmemidx.ll index cdaae23..5724c4f 100644 --- a/llvm/test/CodeGen/RISCV/xtheadfmemidx.ll +++ b/llvm/test/CodeGen/RISCV/xtheadfmemidx.ll @@ -1,33 +1,27 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+d -mattr=+xtheadfmemidx -mattr=+m -verify-machineinstrs < %s \ -; RUN: | FileCheck %s -check-prefix=RV32XTHEADMEMIDX -; RUN: llc -mtriple=riscv64 -mattr=+d -mattr=+xtheadfmemidx -verify-machineinstrs < %s \ -; RUN: | FileCheck %s -check-prefix=RV64XTHEADFMEMIDX +; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+d,+xtheadfmemidx \ +; RUN: -verify-machineinstrs | FileCheck %s -check-prefixes=CHECK,RV32XTHEADFMEMIDX +; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+d,+xtheadfmemidx \ +; RUN: -verify-machineinstrs | FileCheck %s -check-prefixes=CHECK,RV64XTHEADFMEMIDX -define float @flrw(ptr %a, i64 %b) { -; RV32XTHEADMEMIDX-LABEL: flrw: -; RV32XTHEADMEMIDX: # %bb.0: -; RV32XTHEADMEMIDX-NEXT: th.flrw fa5, a0, a1, 2 -; RV32XTHEADMEMIDX-NEXT: fadd.s fa0, fa5, fa5 -; RV32XTHEADMEMIDX-NEXT: ret -; -; RV64XTHEADFMEMIDX-LABEL: flrw: -; RV64XTHEADFMEMIDX: # %bb.0: -; RV64XTHEADFMEMIDX-NEXT: th.flrw fa5, a0, a1, 2 -; RV64XTHEADFMEMIDX-NEXT: fadd.s fa0, fa5, fa5 -; RV64XTHEADFMEMIDX-NEXT: ret - %1 = getelementptr float, ptr %a, i64 %b +define float @flrw(ptr %a, iXLen %b) { +; CHECK-LABEL: flrw: +; CHECK: # %bb.0: +; CHECK-NEXT: th.flrw fa5, a0, a1, 2 +; CHECK-NEXT: fadd.s fa0, fa5, fa5 +; CHECK-NEXT: ret + %1 = getelementptr float, ptr %a, iXLen %b %2 = load float, ptr %1, align 4 %3 = fadd float %2, %2 ret float %3 } define float @flurw(ptr %a, i32 %b) { -; RV32XTHEADMEMIDX-LABEL: flurw: -; RV32XTHEADMEMIDX: # %bb.0: -; RV32XTHEADMEMIDX-NEXT: th.flrw fa5, a0, a1, 2 -; RV32XTHEADMEMIDX-NEXT: fadd.s fa0, fa5, fa5 -; RV32XTHEADMEMIDX-NEXT: ret +; RV32XTHEADFMEMIDX-LABEL: flurw: +; RV32XTHEADFMEMIDX: # %bb.0: +; RV32XTHEADFMEMIDX-NEXT: th.flrw fa5, a0, a1, 2 +; RV32XTHEADFMEMIDX-NEXT: fadd.s fa0, fa5, fa5 +; RV32XTHEADFMEMIDX-NEXT: ret ; ; RV64XTHEADFMEMIDX-LABEL: flurw: ; RV64XTHEADFMEMIDX: # %bb.0: @@ -41,30 +35,24 @@ define float @flurw(ptr %a, i32 %b) { ret float %4 } -define void @fsrw(ptr %a, i64 %b, float %c) { -; RV32XTHEADMEMIDX-LABEL: fsrw: -; RV32XTHEADMEMIDX: # %bb.0: -; RV32XTHEADMEMIDX-NEXT: fadd.s fa5, fa0, fa0 -; RV32XTHEADMEMIDX-NEXT: th.fsrw fa5, a0, a1, 2 -; RV32XTHEADMEMIDX-NEXT: ret -; -; RV64XTHEADFMEMIDX-LABEL: fsrw: -; RV64XTHEADFMEMIDX: # %bb.0: -; RV64XTHEADFMEMIDX-NEXT: fadd.s fa5, fa0, fa0 -; RV64XTHEADFMEMIDX-NEXT: th.fsrw fa5, a0, a1, 2 -; RV64XTHEADFMEMIDX-NEXT: ret +define void @fsrw(ptr %a, iXLen %b, float %c) { +; CHECK-LABEL: fsrw: +; CHECK: # %bb.0: +; CHECK-NEXT: fadd.s fa5, fa0, fa0 +; CHECK-NEXT: th.fsrw fa5, a0, a1, 2 +; CHECK-NEXT: ret %1 = fadd float %c, %c - %2 = getelementptr float, ptr %a, i64 %b + %2 = getelementptr float, ptr %a, iXLen %b store float %1, ptr %2, align 4 ret void } define void @fsurw(ptr %a, i32 %b, float %c) { -; RV32XTHEADMEMIDX-LABEL: fsurw: -; RV32XTHEADMEMIDX: # %bb.0: -; RV32XTHEADMEMIDX-NEXT: fadd.s fa5, fa0, fa0 -; RV32XTHEADMEMIDX-NEXT: th.fsrw fa5, a0, a1, 2 -; RV32XTHEADMEMIDX-NEXT: ret +; RV32XTHEADFMEMIDX-LABEL: fsurw: +; RV32XTHEADFMEMIDX: # %bb.0: +; RV32XTHEADFMEMIDX-NEXT: fadd.s fa5, fa0, fa0 +; RV32XTHEADFMEMIDX-NEXT: th.fsrw fa5, a0, a1, 2 +; RV32XTHEADFMEMIDX-NEXT: ret ; ; RV64XTHEADFMEMIDX-LABEL: fsurw: ; RV64XTHEADFMEMIDX: # %bb.0: @@ -78,30 +66,24 @@ define void @fsurw(ptr %a, i32 %b, float %c) { ret void } -define double @flrd(ptr %a, i64 %b) { -; RV32XTHEADMEMIDX-LABEL: flrd: -; RV32XTHEADMEMIDX: # %bb.0: -; RV32XTHEADMEMIDX-NEXT: th.flrd fa5, a0, a1, 3 -; RV32XTHEADMEMIDX-NEXT: fadd.d fa0, fa5, fa5 -; RV32XTHEADMEMIDX-NEXT: ret -; -; RV64XTHEADFMEMIDX-LABEL: flrd: -; RV64XTHEADFMEMIDX: # %bb.0: -; RV64XTHEADFMEMIDX-NEXT: th.flrd fa5, a0, a1, 3 -; RV64XTHEADFMEMIDX-NEXT: fadd.d fa0, fa5, fa5 -; RV64XTHEADFMEMIDX-NEXT: ret - %1 = getelementptr double, ptr %a, i64 %b +define double @flrd(ptr %a, iXLen %b) { +; CHECK-LABEL: flrd: +; CHECK: # %bb.0: +; CHECK-NEXT: th.flrd fa5, a0, a1, 3 +; CHECK-NEXT: fadd.d fa0, fa5, fa5 +; CHECK-NEXT: ret + %1 = getelementptr double, ptr %a, iXLen %b %2 = load double, ptr %1, align 8 %3 = fadd double %2, %2 ret double %3 } define double @flurd(ptr %a, i32 %b) { -; RV32XTHEADMEMIDX-LABEL: flurd: -; RV32XTHEADMEMIDX: # %bb.0: -; RV32XTHEADMEMIDX-NEXT: th.flrd fa5, a0, a1, 3 -; RV32XTHEADMEMIDX-NEXT: fadd.d fa0, fa5, fa5 -; RV32XTHEADMEMIDX-NEXT: ret +; RV32XTHEADFMEMIDX-LABEL: flurd: +; RV32XTHEADFMEMIDX: # %bb.0: +; RV32XTHEADFMEMIDX-NEXT: th.flrd fa5, a0, a1, 3 +; RV32XTHEADFMEMIDX-NEXT: fadd.d fa0, fa5, fa5 +; RV32XTHEADFMEMIDX-NEXT: ret ; ; RV64XTHEADFMEMIDX-LABEL: flurd: ; RV64XTHEADFMEMIDX: # %bb.0: @@ -115,30 +97,24 @@ define double @flurd(ptr %a, i32 %b) { ret double %4 } -define void @fsrd(ptr %a, i64 %b, double %c) { -; RV32XTHEADMEMIDX-LABEL: fsrd: -; RV32XTHEADMEMIDX: # %bb.0: -; RV32XTHEADMEMIDX-NEXT: fadd.d fa5, fa0, fa0 -; RV32XTHEADMEMIDX-NEXT: th.fsrd fa5, a0, a1, 3 -; RV32XTHEADMEMIDX-NEXT: ret -; -; RV64XTHEADFMEMIDX-LABEL: fsrd: -; RV64XTHEADFMEMIDX: # %bb.0: -; RV64XTHEADFMEMIDX-NEXT: fadd.d fa5, fa0, fa0 -; RV64XTHEADFMEMIDX-NEXT: th.fsrd fa5, a0, a1, 3 -; RV64XTHEADFMEMIDX-NEXT: ret +define void @fsrd(ptr %a, iXLen %b, double %c) { +; CHECK-LABEL: fsrd: +; CHECK: # %bb.0: +; CHECK-NEXT: fadd.d fa5, fa0, fa0 +; CHECK-NEXT: th.fsrd fa5, a0, a1, 3 +; CHECK-NEXT: ret %1 = fadd double %c, %c - %2 = getelementptr double, ptr %a, i64 %b + %2 = getelementptr double, ptr %a, iXLen %b store double %1, ptr %2, align 8 ret void } define void @fsurd(ptr %a, i32 %b, double %c) { -; RV32XTHEADMEMIDX-LABEL: fsurd: -; RV32XTHEADMEMIDX: # %bb.0: -; RV32XTHEADMEMIDX-NEXT: fadd.d fa5, fa0, fa0 -; RV32XTHEADMEMIDX-NEXT: th.fsrd fa5, a0, a1, 3 -; RV32XTHEADMEMIDX-NEXT: ret +; RV32XTHEADFMEMIDX-LABEL: fsurd: +; RV32XTHEADFMEMIDX: # %bb.0: +; RV32XTHEADFMEMIDX-NEXT: fadd.d fa5, fa0, fa0 +; RV32XTHEADFMEMIDX-NEXT: th.fsrd fa5, a0, a1, 3 +; RV32XTHEADFMEMIDX-NEXT: ret ; ; RV64XTHEADFMEMIDX-LABEL: fsurd: ; RV64XTHEADFMEMIDX: # %bb.0: diff --git a/llvm/test/CodeGen/RISCV/xtheadmemidx.ll b/llvm/test/CodeGen/RISCV/xtheadmemidx.ll index fc20fcb..9f0f8d9 100644 --- a/llvm/test/CodeGen/RISCV/xtheadmemidx.ll +++ b/llvm/test/CodeGen/RISCV/xtheadmemidx.ll @@ -1,238 +1,156 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+d -mattr=+xtheadmemidx -mattr=+m -verify-machineinstrs < %s \ -; RUN: | FileCheck %s -check-prefix=RV32XTHEADMEMIDX -; RUN: llc -mtriple=riscv64 -mattr=+d -mattr=+xtheadmemidx -mattr=+m -verify-machineinstrs < %s \ -; RUN: | FileCheck %s -check-prefix=RV64XTHEADMEMIDX +; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+d,+xtheadmemidx \ +; RUN: -verify-machineinstrs | FileCheck %s -check-prefixes=CHECK,RV32XTHEADMEMIDX +; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+d,+xtheadmemidx \ +; RUN: -verify-machineinstrs | FileCheck %s -check-prefixes=CHECK,RV64XTHEADMEMIDX define ptr @lbia(ptr %base, ptr %addr.2, i8 %a) { -; RV32XTHEADMEMIDX-LABEL: lbia: -; RV32XTHEADMEMIDX: # %bb.0: -; RV32XTHEADMEMIDX-NEXT: th.lbia a3, (a0), -1, 0 -; RV32XTHEADMEMIDX-NEXT: add a2, a3, a2 -; RV32XTHEADMEMIDX-NEXT: sb a2, 0(a1) -; RV32XTHEADMEMIDX-NEXT: ret -; -; RV64XTHEADMEMIDX-LABEL: lbia: -; RV64XTHEADMEMIDX: # %bb.0: -; RV64XTHEADMEMIDX-NEXT: th.lbia a3, (a0), -1, 0 -; RV64XTHEADMEMIDX-NEXT: add a2, a3, a2 -; RV64XTHEADMEMIDX-NEXT: sb a2, 0(a1) -; RV64XTHEADMEMIDX-NEXT: ret - %addr = getelementptr i8, ptr %base, i8 0 +; CHECK-LABEL: lbia: +; CHECK: # %bb.0: +; CHECK-NEXT: th.lbia a3, (a0), -1, 0 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: sb a2, 0(a1) +; CHECK-NEXT: ret + %addr = getelementptr i8, ptr %base, iXLen 0 %ld = load i8, ptr %addr - %addr.1 = getelementptr i8, ptr %base, i8 -1 + %addr.1 = getelementptr i8, ptr %base, iXLen -1 %res = add i8 %ld, %a store i8 %res, ptr %addr.2 ret ptr %addr.1 } define ptr @lbib(ptr %base, i8 %a) { -; RV32XTHEADMEMIDX-LABEL: lbib: -; RV32XTHEADMEMIDX: # %bb.0: -; RV32XTHEADMEMIDX-NEXT: th.lbib a2, (a0), 1, 0 -; RV32XTHEADMEMIDX-NEXT: add a1, a2, a1 -; RV32XTHEADMEMIDX-NEXT: sb a1, 1(a0) -; RV32XTHEADMEMIDX-NEXT: ret -; -; RV64XTHEADMEMIDX-LABEL: lbib: -; RV64XTHEADMEMIDX: # %bb.0: -; RV64XTHEADMEMIDX-NEXT: th.lbib a2, (a0), 1, 0 -; RV64XTHEADMEMIDX-NEXT: add a1, a2, a1 -; RV64XTHEADMEMIDX-NEXT: sb a1, 1(a0) -; RV64XTHEADMEMIDX-NEXT: ret - %addr = getelementptr i8, ptr %base, i8 1 +; CHECK-LABEL: lbib: +; CHECK: # %bb.0: +; CHECK-NEXT: th.lbib a2, (a0), 1, 0 +; CHECK-NEXT: add a1, a2, a1 +; CHECK-NEXT: sb a1, 1(a0) +; CHECK-NEXT: ret + %addr = getelementptr i8, ptr %base, iXLen 1 %ld = load i8, ptr %addr - %addr.1 = getelementptr i8, ptr %base, i8 2 + %addr.1 = getelementptr i8, ptr %base, iXLen 2 %res = add i8 %ld, %a store i8 %res, ptr %addr.1 ret ptr %addr } -define ptr @lbuia(ptr %base, ptr %addr.2, i64 %a) { -; RV32XTHEADMEMIDX-LABEL: lbuia: -; RV32XTHEADMEMIDX: # %bb.0: -; RV32XTHEADMEMIDX-NEXT: th.lbuia a4, (a0), -1, 0 -; RV32XTHEADMEMIDX-NEXT: add a2, a4, a2 -; RV32XTHEADMEMIDX-NEXT: sltu a4, a2, a4 -; RV32XTHEADMEMIDX-NEXT: add a3, a3, a4 -; RV32XTHEADMEMIDX-NEXT: sw a2, 0(a1) -; RV32XTHEADMEMIDX-NEXT: sw a3, 4(a1) -; RV32XTHEADMEMIDX-NEXT: ret -; -; RV64XTHEADMEMIDX-LABEL: lbuia: -; RV64XTHEADMEMIDX: # %bb.0: -; RV64XTHEADMEMIDX-NEXT: th.lbuia a3, (a0), -1, 0 -; RV64XTHEADMEMIDX-NEXT: add a2, a3, a2 -; RV64XTHEADMEMIDX-NEXT: sd a2, 0(a1) -; RV64XTHEADMEMIDX-NEXT: ret - %addr = getelementptr i8, ptr %base, i8 0 +define ptr @lbuia(ptr %base, ptr %addr.2, i32 %a) { +; CHECK-LABEL: lbuia: +; CHECK: # %bb.0: +; CHECK-NEXT: th.lbuia a3, (a0), -1, 0 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: sw a2, 0(a1) +; CHECK-NEXT: ret + %addr = getelementptr i8, ptr %base, iXLen 0 %ld = load i8, ptr %addr - %zext = zext i8 %ld to i64 - %addr.1 = getelementptr i8, ptr %base, i8 -1 - %res = add i64 %zext, %a - store i64 %res, ptr %addr.2 + %zext = zext i8 %ld to i32 + %addr.1 = getelementptr i8, ptr %base, iXLen -1 + %res = add i32 %zext, %a + store i32 %res, ptr %addr.2 ret ptr %addr.1 } -define ptr @lbuib(ptr %base, i64 %a, ptr %addr.1) { -; RV32XTHEADMEMIDX-LABEL: lbuib: -; RV32XTHEADMEMIDX: # %bb.0: -; RV32XTHEADMEMIDX-NEXT: th.lbuib a4, (a0), 1, 0 -; RV32XTHEADMEMIDX-NEXT: add a1, a4, a1 -; RV32XTHEADMEMIDX-NEXT: sltu a4, a1, a4 -; RV32XTHEADMEMIDX-NEXT: add a2, a2, a4 -; RV32XTHEADMEMIDX-NEXT: sw a1, 0(a3) -; RV32XTHEADMEMIDX-NEXT: sw a2, 4(a3) -; RV32XTHEADMEMIDX-NEXT: ret -; -; RV64XTHEADMEMIDX-LABEL: lbuib: -; RV64XTHEADMEMIDX: # %bb.0: -; RV64XTHEADMEMIDX-NEXT: th.lbuib a3, (a0), 1, 0 -; RV64XTHEADMEMIDX-NEXT: add a1, a3, a1 -; RV64XTHEADMEMIDX-NEXT: sd a1, 0(a2) -; RV64XTHEADMEMIDX-NEXT: ret - %addr = getelementptr i8, ptr %base, i8 1 +define ptr @lbuib(ptr %base, i32 %a, ptr %addr.1) { +; CHECK-LABEL: lbuib: +; CHECK: # %bb.0: +; CHECK-NEXT: th.lbuib a3, (a0), 1, 0 +; CHECK-NEXT: add a1, a3, a1 +; CHECK-NEXT: sw a1, 0(a2) +; CHECK-NEXT: ret + %addr = getelementptr i8, ptr %base, iXLen 1 %ld = load i8, ptr %addr - %zext = zext i8 %ld to i64 - %res = add i64 %zext, %a - store i64 %res, ptr %addr.1 + %zext = zext i8 %ld to i32 + %res = add i32 %zext, %a + store i32 %res, ptr %addr.1 ret ptr %addr } define ptr @lhia(ptr %base, ptr %addr.2, i16 %a) { -; RV32XTHEADMEMIDX-LABEL: lhia: -; RV32XTHEADMEMIDX: # %bb.0: -; RV32XTHEADMEMIDX-NEXT: th.lhia a3, (a0), -16, 1 -; RV32XTHEADMEMIDX-NEXT: add a2, a3, a2 -; RV32XTHEADMEMIDX-NEXT: sh a2, 0(a1) -; RV32XTHEADMEMIDX-NEXT: ret -; -; RV64XTHEADMEMIDX-LABEL: lhia: -; RV64XTHEADMEMIDX: # %bb.0: -; RV64XTHEADMEMIDX-NEXT: th.lhia a3, (a0), -16, 1 -; RV64XTHEADMEMIDX-NEXT: add a2, a3, a2 -; RV64XTHEADMEMIDX-NEXT: sh a2, 0(a1) -; RV64XTHEADMEMIDX-NEXT: ret - %addr = getelementptr i16, ptr %base, i16 0 +; CHECK-LABEL: lhia: +; CHECK: # %bb.0: +; CHECK-NEXT: th.lhia a3, (a0), -16, 1 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: sh a2, 0(a1) +; CHECK-NEXT: ret + %addr = getelementptr i16, ptr %base, iXLen 0 %ld = load i16, ptr %addr - %addr.1 = getelementptr i16, ptr %base, i16 -16 + %addr.1 = getelementptr i16, ptr %base, iXLen -16 %res = add i16 %ld, %a store i16 %res, ptr %addr.2 ret ptr %addr.1 } define ptr @lhib(ptr %base, i16 %a) { -; RV32XTHEADMEMIDX-LABEL: lhib: -; RV32XTHEADMEMIDX: # %bb.0: -; RV32XTHEADMEMIDX-NEXT: th.lhib a2, (a0), 2, 0 -; RV32XTHEADMEMIDX-NEXT: add a1, a2, a1 -; RV32XTHEADMEMIDX-NEXT: sh a1, 2(a0) -; RV32XTHEADMEMIDX-NEXT: ret -; -; RV64XTHEADMEMIDX-LABEL: lhib: -; RV64XTHEADMEMIDX: # %bb.0: -; RV64XTHEADMEMIDX-NEXT: th.lhib a2, (a0), 2, 0 -; RV64XTHEADMEMIDX-NEXT: add a1, a2, a1 -; RV64XTHEADMEMIDX-NEXT: sh a1, 2(a0) -; RV64XTHEADMEMIDX-NEXT: ret - %addr = getelementptr i16, ptr %base, i16 1 +; CHECK-LABEL: lhib: +; CHECK: # %bb.0: +; CHECK-NEXT: th.lhib a2, (a0), 2, 0 +; CHECK-NEXT: add a1, a2, a1 +; CHECK-NEXT: sh a1, 2(a0) +; CHECK-NEXT: ret + %addr = getelementptr i16, ptr %base, iXLen 1 %ld = load i16, ptr %addr - %addr.1 = getelementptr i16, ptr %base, i16 2 + %addr.1 = getelementptr i16, ptr %base, iXLen 2 %res = add i16 %ld, %a store i16 %res, ptr %addr.1 ret ptr %addr } -define ptr @lhuia(ptr %base, ptr %addr.2, i64 %a) { -; RV32XTHEADMEMIDX-LABEL: lhuia: -; RV32XTHEADMEMIDX: # %bb.0: -; RV32XTHEADMEMIDX-NEXT: th.lhuia a4, (a0), -16, 1 -; RV32XTHEADMEMIDX-NEXT: add a2, a4, a2 -; RV32XTHEADMEMIDX-NEXT: sltu a4, a2, a4 -; RV32XTHEADMEMIDX-NEXT: add a3, a3, a4 -; RV32XTHEADMEMIDX-NEXT: sw a2, 0(a1) -; RV32XTHEADMEMIDX-NEXT: sw a3, 4(a1) -; RV32XTHEADMEMIDX-NEXT: ret -; -; RV64XTHEADMEMIDX-LABEL: lhuia: -; RV64XTHEADMEMIDX: # %bb.0: -; RV64XTHEADMEMIDX-NEXT: th.lhuia a3, (a0), -16, 1 -; RV64XTHEADMEMIDX-NEXT: add a2, a3, a2 -; RV64XTHEADMEMIDX-NEXT: sd a2, 0(a1) -; RV64XTHEADMEMIDX-NEXT: ret - %addr = getelementptr i16, ptr %base, i16 0 +define ptr @lhuia(ptr %base, ptr %addr.2, i32 %a) { +; CHECK-LABEL: lhuia: +; CHECK: # %bb.0: +; CHECK-NEXT: th.lhuia a3, (a0), -16, 1 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: sw a2, 0(a1) +; CHECK-NEXT: ret + %addr = getelementptr i16, ptr %base, iXLen 0 %ld = load i16, ptr %addr - %zext = zext i16 %ld to i64 - %addr.1 = getelementptr i16, ptr %base, i16 -16 - %res = add i64 %zext, %a - store i64 %res, ptr %addr.2 + %zext = zext i16 %ld to i32 + %addr.1 = getelementptr i16, ptr %base, iXLen -16 + %res = add i32 %zext, %a + store i32 %res, ptr %addr.2 ret ptr %addr.1 } -define ptr @lhuib(ptr %base, i64 %a, ptr %addr.1) { -; RV32XTHEADMEMIDX-LABEL: lhuib: -; RV32XTHEADMEMIDX: # %bb.0: -; RV32XTHEADMEMIDX-NEXT: th.lhuib a4, (a0), 2, 0 -; RV32XTHEADMEMIDX-NEXT: add a1, a4, a1 -; RV32XTHEADMEMIDX-NEXT: sltu a4, a1, a4 -; RV32XTHEADMEMIDX-NEXT: add a2, a2, a4 -; RV32XTHEADMEMIDX-NEXT: sw a1, 0(a3) -; RV32XTHEADMEMIDX-NEXT: sw a2, 4(a3) -; RV32XTHEADMEMIDX-NEXT: ret -; -; RV64XTHEADMEMIDX-LABEL: lhuib: -; RV64XTHEADMEMIDX: # %bb.0: -; RV64XTHEADMEMIDX-NEXT: th.lhuib a3, (a0), 2, 0 -; RV64XTHEADMEMIDX-NEXT: add a1, a3, a1 -; RV64XTHEADMEMIDX-NEXT: sd a1, 0(a2) -; RV64XTHEADMEMIDX-NEXT: ret - %addr = getelementptr i16, ptr %base, i16 1 +define ptr @lhuib(ptr %base, i32 %a, ptr %addr.1) { +; CHECK-LABEL: lhuib: +; CHECK: # %bb.0: +; CHECK-NEXT: th.lhuib a3, (a0), 2, 0 +; CHECK-NEXT: add a1, a3, a1 +; CHECK-NEXT: sw a1, 0(a2) +; CHECK-NEXT: ret + %addr = getelementptr i16, ptr %base, iXLen 1 %ld = load i16, ptr %addr - %zext = zext i16 %ld to i64 - %res = add i64 %zext, %a - store i64 %res, ptr %addr.1 + %zext = zext i16 %ld to i32 + %res = add i32 %zext, %a + store i32 %res, ptr %addr.1 ret ptr %addr } define ptr @lwia(ptr %base, ptr %addr.2, i32 %a) { -; RV32XTHEADMEMIDX-LABEL: lwia: -; RV32XTHEADMEMIDX: # %bb.0: -; RV32XTHEADMEMIDX-NEXT: th.lwia a3, (a0), -16, 2 -; RV32XTHEADMEMIDX-NEXT: add a2, a3, a2 -; RV32XTHEADMEMIDX-NEXT: sw a2, 0(a1) -; RV32XTHEADMEMIDX-NEXT: ret -; -; RV64XTHEADMEMIDX-LABEL: lwia: -; RV64XTHEADMEMIDX: # %bb.0: -; RV64XTHEADMEMIDX-NEXT: th.lwia a3, (a0), -16, 2 -; RV64XTHEADMEMIDX-NEXT: add a2, a3, a2 -; RV64XTHEADMEMIDX-NEXT: sw a2, 0(a1) -; RV64XTHEADMEMIDX-NEXT: ret - %addr = getelementptr i32, ptr %base, i32 0 +; CHECK-LABEL: lwia: +; CHECK: # %bb.0: +; CHECK-NEXT: th.lwia a3, (a0), -16, 2 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: sw a2, 0(a1) +; CHECK-NEXT: ret + %addr = getelementptr i32, ptr %base, iXLen 0 %ld = load i32, ptr %addr - %addr.1 = getelementptr i32, ptr %base, i32 -16 + %addr.1 = getelementptr i32, ptr %base, iXLen -16 %res = add i32 %ld, %a store i32 %res, ptr %addr.2 ret ptr %addr.1 } define ptr @lwib(ptr %base, i32 %a) { -; RV32XTHEADMEMIDX-LABEL: lwib: -; RV32XTHEADMEMIDX: # %bb.0: -; RV32XTHEADMEMIDX-NEXT: th.lwib a2, (a0), 4, 0 -; RV32XTHEADMEMIDX-NEXT: add a1, a2, a1 -; RV32XTHEADMEMIDX-NEXT: sw a1, 4(a0) -; RV32XTHEADMEMIDX-NEXT: ret -; -; RV64XTHEADMEMIDX-LABEL: lwib: -; RV64XTHEADMEMIDX: # %bb.0: -; RV64XTHEADMEMIDX-NEXT: th.lwib a2, (a0), 4, 0 -; RV64XTHEADMEMIDX-NEXT: add a1, a2, a1 -; RV64XTHEADMEMIDX-NEXT: sw a1, 4(a0) -; RV64XTHEADMEMIDX-NEXT: ret - %addr = getelementptr i32, ptr %base, i32 1 +; CHECK-LABEL: lwib: +; CHECK: # %bb.0: +; CHECK-NEXT: th.lwib a2, (a0), 4, 0 +; CHECK-NEXT: add a1, a2, a1 +; CHECK-NEXT: sw a1, 4(a0) +; CHECK-NEXT: ret + %addr = getelementptr i32, ptr %base, iXLen 1 %ld = load i32, ptr %addr - %addr.1 = getelementptr i32, ptr %base, i32 2 + %addr.1 = getelementptr i32, ptr %base, iXLen 2 %res = add i32 %ld, %a store i32 %res, ptr %addr.1 ret ptr %addr @@ -255,10 +173,10 @@ define ptr @lwuia(ptr %base, ptr %addr.2, i64 %a) { ; RV64XTHEADMEMIDX-NEXT: add a2, a3, a2 ; RV64XTHEADMEMIDX-NEXT: sd a2, 0(a1) ; RV64XTHEADMEMIDX-NEXT: ret - %addr = getelementptr i32, ptr %base, i32 0 + %addr = getelementptr i32, ptr %base, iXLen 0 %ld = load i32, ptr %addr %zext = zext i32 %ld to i64 - %addr.1 = getelementptr i32, ptr %base, i32 -16 + %addr.1 = getelementptr i32, ptr %base, iXLen -16 %res = add i64 %zext, %a store i64 %res, ptr %addr.2 ret ptr %addr.1 @@ -281,7 +199,7 @@ define ptr @lwuib(ptr %base, i64 %a, ptr %addr.1) { ; RV64XTHEADMEMIDX-NEXT: add a1, a3, a1 ; RV64XTHEADMEMIDX-NEXT: sd a1, 0(a2) ; RV64XTHEADMEMIDX-NEXT: ret - %addr = getelementptr i32, ptr %base, i32 1 + %addr = getelementptr i32, ptr %base, iXLen 1 %ld = load i32, ptr %addr %zext = zext i32 %ld to i64 %res = add i64 %zext, %a @@ -309,9 +227,9 @@ define ptr @ldia(ptr %base, ptr %addr.2, i64 %a) { ; RV64XTHEADMEMIDX-NEXT: add a2, a3, a2 ; RV64XTHEADMEMIDX-NEXT: sd a2, 0(a1) ; RV64XTHEADMEMIDX-NEXT: ret - %addr = getelementptr i64, ptr %base, i64 0 + %addr = getelementptr i64, ptr %base, iXLen 0 %ld = load i64, ptr %addr - %addr.1 = getelementptr i64, ptr %base, i64 -16 + %addr.1 = getelementptr i64, ptr %base, iXLen -16 %res = add i64 %ld, %a store i64 %res, ptr %addr.2 ret ptr %addr.1 @@ -336,117 +254,81 @@ define ptr @ldib(ptr %base, i64 %a) { ; RV64XTHEADMEMIDX-NEXT: add a1, a2, a1 ; RV64XTHEADMEMIDX-NEXT: sd a1, 8(a0) ; RV64XTHEADMEMIDX-NEXT: ret - %addr = getelementptr i64, ptr %base, i64 1 + %addr = getelementptr i64, ptr %base, iXLen 1 %ld = load i64, ptr %addr - %addr.1 = getelementptr i64, ptr %base, i64 2 + %addr.1 = getelementptr i64, ptr %base, iXLen 2 %res = add i64 %ld, %a store i64 %res, ptr %addr.1 ret ptr %addr } define ptr @sbia(ptr %base, i8 %a, i8 %b) { -; RV32XTHEADMEMIDX-LABEL: sbia: -; RV32XTHEADMEMIDX: # %bb.0: -; RV32XTHEADMEMIDX-NEXT: add a1, a1, a2 -; RV32XTHEADMEMIDX-NEXT: th.sbia a1, (a0), 1, 0 -; RV32XTHEADMEMIDX-NEXT: ret -; -; RV64XTHEADMEMIDX-LABEL: sbia: -; RV64XTHEADMEMIDX: # %bb.0: -; RV64XTHEADMEMIDX-NEXT: add a1, a1, a2 -; RV64XTHEADMEMIDX-NEXT: th.sbia a1, (a0), 1, 0 -; RV64XTHEADMEMIDX-NEXT: ret - %addr.1 = getelementptr i8, ptr %base, i8 1 +; CHECK-LABEL: sbia: +; CHECK: # %bb.0: +; CHECK-NEXT: add a1, a1, a2 +; CHECK-NEXT: th.sbia a1, (a0), 1, 0 +; CHECK-NEXT: ret + %addr.1 = getelementptr i8, ptr %base, iXLen 1 %res = add i8 %a, %b store i8 %res, ptr %base ret ptr %addr.1 } define ptr @sbib(ptr %base, i8 %a, i8 %b) { -; RV32XTHEADMEMIDX-LABEL: sbib: -; RV32XTHEADMEMIDX: # %bb.0: -; RV32XTHEADMEMIDX-NEXT: add a1, a1, a2 -; RV32XTHEADMEMIDX-NEXT: th.sbib a1, (a0), 1, 0 -; RV32XTHEADMEMIDX-NEXT: ret -; -; RV64XTHEADMEMIDX-LABEL: sbib: -; RV64XTHEADMEMIDX: # %bb.0: -; RV64XTHEADMEMIDX-NEXT: add a1, a1, a2 -; RV64XTHEADMEMIDX-NEXT: th.sbib a1, (a0), 1, 0 -; RV64XTHEADMEMIDX-NEXT: ret - %addr.1 = getelementptr i8, ptr %base, i8 1 +; CHECK-LABEL: sbib: +; CHECK: # %bb.0: +; CHECK-NEXT: add a1, a1, a2 +; CHECK-NEXT: th.sbib a1, (a0), 1, 0 +; CHECK-NEXT: ret + %addr.1 = getelementptr i8, ptr %base, iXLen 1 %res = add i8 %a, %b store i8 %res, ptr %addr.1 ret ptr %addr.1 } define ptr @shia(ptr %base, i16 %a, i16 %b) { -; RV32XTHEADMEMIDX-LABEL: shia: -; RV32XTHEADMEMIDX: # %bb.0: -; RV32XTHEADMEMIDX-NEXT: add a1, a1, a2 -; RV32XTHEADMEMIDX-NEXT: th.shia a1, (a0), -9, 1 -; RV32XTHEADMEMIDX-NEXT: ret -; -; RV64XTHEADMEMIDX-LABEL: shia: -; RV64XTHEADMEMIDX: # %bb.0: -; RV64XTHEADMEMIDX-NEXT: add a1, a1, a2 -; RV64XTHEADMEMIDX-NEXT: th.shia a1, (a0), -9, 1 -; RV64XTHEADMEMIDX-NEXT: ret - %addr.1 = getelementptr i16, ptr %base, i16 -9 +; CHECK-LABEL: shia: +; CHECK: # %bb.0: +; CHECK-NEXT: add a1, a1, a2 +; CHECK-NEXT: th.shia a1, (a0), -9, 1 +; CHECK-NEXT: ret + %addr.1 = getelementptr i16, ptr %base, iXLen -9 %res = add i16 %a, %b store i16 %res, ptr %base ret ptr %addr.1 } define ptr @shib(ptr %base, i16 %a, i16 %b) { -; RV32XTHEADMEMIDX-LABEL: shib: -; RV32XTHEADMEMIDX: # %bb.0: -; RV32XTHEADMEMIDX-NEXT: add a1, a1, a2 -; RV32XTHEADMEMIDX-NEXT: th.shib a1, (a0), 2, 0 -; RV32XTHEADMEMIDX-NEXT: ret -; -; RV64XTHEADMEMIDX-LABEL: shib: -; RV64XTHEADMEMIDX: # %bb.0: -; RV64XTHEADMEMIDX-NEXT: add a1, a1, a2 -; RV64XTHEADMEMIDX-NEXT: th.shib a1, (a0), 2, 0 -; RV64XTHEADMEMIDX-NEXT: ret - %addr.1 = getelementptr i16, ptr %base, i16 1 +; CHECK-LABEL: shib: +; CHECK: # %bb.0: +; CHECK-NEXT: add a1, a1, a2 +; CHECK-NEXT: th.shib a1, (a0), 2, 0 +; CHECK-NEXT: ret + %addr.1 = getelementptr i16, ptr %base, iXLen 1 %res = add i16 %a, %b store i16 %res, ptr %addr.1 ret ptr %addr.1 } define ptr @swia(ptr %base, i32 %a, i32 %b) { -; RV32XTHEADMEMIDX-LABEL: swia: -; RV32XTHEADMEMIDX: # %bb.0: -; RV32XTHEADMEMIDX-NEXT: add a1, a1, a2 -; RV32XTHEADMEMIDX-NEXT: th.swia a1, (a0), 8, 2 -; RV32XTHEADMEMIDX-NEXT: ret -; -; RV64XTHEADMEMIDX-LABEL: swia: -; RV64XTHEADMEMIDX: # %bb.0: -; RV64XTHEADMEMIDX-NEXT: add a1, a1, a2 -; RV64XTHEADMEMIDX-NEXT: th.swia a1, (a0), 8, 2 -; RV64XTHEADMEMIDX-NEXT: ret - %addr.1 = getelementptr i32, ptr %base, i32 8 +; CHECK-LABEL: swia: +; CHECK: # %bb.0: +; CHECK-NEXT: add a1, a1, a2 +; CHECK-NEXT: th.swia a1, (a0), 8, 2 +; CHECK-NEXT: ret + %addr.1 = getelementptr i32, ptr %base, iXLen 8 %res = add i32 %a, %b store i32 %res, ptr %base ret ptr %addr.1 } define ptr @swib(ptr %base, i32 %a, i32 %b) { -; RV32XTHEADMEMIDX-LABEL: swib: -; RV32XTHEADMEMIDX: # %bb.0: -; RV32XTHEADMEMIDX-NEXT: add a1, a1, a2 -; RV32XTHEADMEMIDX-NEXT: th.swib a1, (a0), -13, 3 -; RV32XTHEADMEMIDX-NEXT: ret -; -; RV64XTHEADMEMIDX-LABEL: swib: -; RV64XTHEADMEMIDX: # %bb.0: -; RV64XTHEADMEMIDX-NEXT: add a1, a1, a2 -; RV64XTHEADMEMIDX-NEXT: th.swib a1, (a0), -13, 3 -; RV64XTHEADMEMIDX-NEXT: ret - %addr.1 = getelementptr i32, ptr %base, i32 -26 +; CHECK-LABEL: swib: +; CHECK: # %bb.0: +; CHECK-NEXT: add a1, a1, a2 +; CHECK-NEXT: th.swib a1, (a0), -13, 3 +; CHECK-NEXT: ret + %addr.1 = getelementptr i32, ptr %base, iXLen -26 %res = add i32 %a, %b store i32 %res, ptr %addr.1 ret ptr %addr.1 @@ -470,7 +352,7 @@ define ptr @sdia(ptr %base, i64 %a, i64 %b) { ; RV64XTHEADMEMIDX-NEXT: add a1, a1, a2 ; RV64XTHEADMEMIDX-NEXT: th.sdia a1, (a0), 8, 3 ; RV64XTHEADMEMIDX-NEXT: ret - %addr.1 = getelementptr i64, ptr %base, i64 8 + %addr.1 = getelementptr i64, ptr %base, iXLen 8 %res = add i64 %a, %b store i64 %res, ptr %base ret ptr %addr.1 @@ -492,48 +374,33 @@ define ptr @sdib(ptr %base, i64 %a, i64 %b) { ; RV64XTHEADMEMIDX-NEXT: add a1, a1, a2 ; RV64XTHEADMEMIDX-NEXT: th.sdib a1, (a0), 8, 0 ; RV64XTHEADMEMIDX-NEXT: ret - %addr.1 = getelementptr i64, ptr %base, i64 1 + %addr.1 = getelementptr i64, ptr %base, iXLen 1 %res = add i64 %a, %b store i64 %res, ptr %addr.1 ret ptr %addr.1 } -define i8 @lrb_anyext(ptr %a, i64 %b) { -; RV32XTHEADMEMIDX-LABEL: lrb_anyext: -; RV32XTHEADMEMIDX: # %bb.0: -; RV32XTHEADMEMIDX-NEXT: th.lrb a0, a0, a1, 0 -; RV32XTHEADMEMIDX-NEXT: ret -; -; RV64XTHEADMEMIDX-LABEL: lrb_anyext: -; RV64XTHEADMEMIDX: # %bb.0: -; RV64XTHEADMEMIDX-NEXT: th.lrb a0, a0, a1, 0 -; RV64XTHEADMEMIDX-NEXT: ret - %1 = getelementptr i8, ptr %a, i64 %b +define i8 @lrb_anyext(ptr %a, iXLen %b) { +; CHECK-LABEL: lrb_anyext: +; CHECK: # %bb.0: +; CHECK-NEXT: th.lrb a0, a0, a1, 0 +; CHECK-NEXT: ret + %1 = getelementptr i8, ptr %a, iXLen %b %2 = load i8, ptr %1, align 1 ret i8 %2 } -define i64 @lrb(ptr %a, i64 %b) { -; RV32XTHEADMEMIDX-LABEL: lrb: -; RV32XTHEADMEMIDX: # %bb.0: -; RV32XTHEADMEMIDX-NEXT: th.lrb a1, a0, a1, 0 -; RV32XTHEADMEMIDX-NEXT: srai a2, a1, 31 -; RV32XTHEADMEMIDX-NEXT: add a0, a1, a1 -; RV32XTHEADMEMIDX-NEXT: sltu a1, a0, a1 -; RV32XTHEADMEMIDX-NEXT: add a2, a2, a2 -; RV32XTHEADMEMIDX-NEXT: add a1, a2, a1 -; RV32XTHEADMEMIDX-NEXT: ret -; -; RV64XTHEADMEMIDX-LABEL: lrb: -; RV64XTHEADMEMIDX: # %bb.0: -; RV64XTHEADMEMIDX-NEXT: th.lrb a0, a0, a1, 0 -; RV64XTHEADMEMIDX-NEXT: add a0, a0, a0 -; RV64XTHEADMEMIDX-NEXT: ret - %1 = getelementptr i8, ptr %a, i64 %b +define i32 @lrb(ptr %a, iXLen %b) { +; CHECK-LABEL: lrb: +; CHECK: # %bb.0: +; CHECK-NEXT: th.lrb a0, a0, a1, 0 +; CHECK-NEXT: add a0, a0, a0 +; CHECK-NEXT: ret + %1 = getelementptr i8, ptr %a, iXLen %b %2 = load i8, ptr %1, align 1 - %3 = sext i8 %2 to i64 - %4 = add i64 %3, %3 - ret i64 %4 + %3 = sext i8 %2 to i32 + %4 = add i32 %3, %3 + ret i32 %4 } define i8 @lurb_anyext(ptr %a, i32 %b) { @@ -552,15 +419,11 @@ define i8 @lurb_anyext(ptr %a, i32 %b) { ret i8 %3 } -define i64 @lurb(ptr %a, i32 %b) { +define i32 @lurb(ptr %a, i32 %b) { ; RV32XTHEADMEMIDX-LABEL: lurb: ; RV32XTHEADMEMIDX: # %bb.0: -; RV32XTHEADMEMIDX-NEXT: th.lrb a1, a0, a1, 0 -; RV32XTHEADMEMIDX-NEXT: srai a2, a1, 31 -; RV32XTHEADMEMIDX-NEXT: add a0, a1, a1 -; RV32XTHEADMEMIDX-NEXT: sltu a1, a0, a1 -; RV32XTHEADMEMIDX-NEXT: add a2, a2, a2 -; RV32XTHEADMEMIDX-NEXT: add a1, a2, a1 +; RV32XTHEADMEMIDX-NEXT: th.lrb a0, a0, a1, 0 +; RV32XTHEADMEMIDX-NEXT: add a0, a0, a0 ; RV32XTHEADMEMIDX-NEXT: ret ; ; RV64XTHEADMEMIDX-LABEL: lurb: @@ -571,37 +434,29 @@ define i64 @lurb(ptr %a, i32 %b) { %1 = zext i32 %b to i64 %2 = getelementptr i8, ptr %a, i64 %1 %3 = load i8, ptr %2, align 1 - %4 = sext i8 %3 to i64 - %5 = add i64 %4, %4 - ret i64 %5 -} - -define i64 @lrbu(ptr %a, i64 %b) { -; RV32XTHEADMEMIDX-LABEL: lrbu: -; RV32XTHEADMEMIDX: # %bb.0: -; RV32XTHEADMEMIDX-NEXT: th.lrbu a1, a0, a1, 0 -; RV32XTHEADMEMIDX-NEXT: add a0, a1, a1 -; RV32XTHEADMEMIDX-NEXT: sltu a1, a0, a1 -; RV32XTHEADMEMIDX-NEXT: ret -; -; RV64XTHEADMEMIDX-LABEL: lrbu: -; RV64XTHEADMEMIDX: # %bb.0: -; RV64XTHEADMEMIDX-NEXT: th.lrbu a0, a0, a1, 0 -; RV64XTHEADMEMIDX-NEXT: add a0, a0, a0 -; RV64XTHEADMEMIDX-NEXT: ret - %1 = getelementptr i8, ptr %a, i64 %b + %4 = sext i8 %3 to i32 + %5 = add i32 %4, %4 + ret i32 %5 +} + +define i32 @lrbu(ptr %a, iXLen %b) { +; CHECK-LABEL: lrbu: +; CHECK: # %bb.0: +; CHECK-NEXT: th.lrbu a0, a0, a1, 0 +; CHECK-NEXT: add a0, a0, a0 +; CHECK-NEXT: ret + %1 = getelementptr i8, ptr %a, iXLen %b %2 = load i8, ptr %1, align 1 - %3 = zext i8 %2 to i64 - %4 = add i64 %3, %3 - ret i64 %4 + %3 = zext i8 %2 to i32 + %4 = add i32 %3, %3 + ret i32 %4 } -define i64 @lurbu(ptr %a, i32 %b) { +define i32 @lurbu(ptr %a, i32 %b) { ; RV32XTHEADMEMIDX-LABEL: lurbu: ; RV32XTHEADMEMIDX: # %bb.0: -; RV32XTHEADMEMIDX-NEXT: th.lrbu a1, a0, a1, 0 -; RV32XTHEADMEMIDX-NEXT: add a0, a1, a1 -; RV32XTHEADMEMIDX-NEXT: sltu a1, a0, a1 +; RV32XTHEADMEMIDX-NEXT: th.lrbu a0, a0, a1, 0 +; RV32XTHEADMEMIDX-NEXT: add a0, a0, a0 ; RV32XTHEADMEMIDX-NEXT: ret ; ; RV64XTHEADMEMIDX-LABEL: lurbu: @@ -612,47 +467,32 @@ define i64 @lurbu(ptr %a, i32 %b) { %1 = zext i32 %b to i64 %2 = getelementptr i8, ptr %a, i64 %1 %3 = load i8, ptr %2, align 1 - %4 = zext i8 %3 to i64 - %5 = add i64 %4, %4 - ret i64 %5 + %4 = zext i8 %3 to i32 + %5 = add i32 %4, %4 + ret i32 %5 } -define i16 @lrh_anyext(ptr %a, i64 %b) { -; RV32XTHEADMEMIDX-LABEL: lrh_anyext: -; RV32XTHEADMEMIDX: # %bb.0: -; RV32XTHEADMEMIDX-NEXT: th.lrh a0, a0, a1, 1 -; RV32XTHEADMEMIDX-NEXT: ret -; -; RV64XTHEADMEMIDX-LABEL: lrh_anyext: -; RV64XTHEADMEMIDX: # %bb.0: -; RV64XTHEADMEMIDX-NEXT: th.lrh a0, a0, a1, 1 -; RV64XTHEADMEMIDX-NEXT: ret - %1 = getelementptr i16, ptr %a, i64 %b +define i16 @lrh_anyext(ptr %a, iXLen %b) { +; CHECK-LABEL: lrh_anyext: +; CHECK: # %bb.0: +; CHECK-NEXT: th.lrh a0, a0, a1, 1 +; CHECK-NEXT: ret + %1 = getelementptr i16, ptr %a, iXLen %b %2 = load i16, ptr %1, align 2 ret i16 %2 } -define i64 @lrh(ptr %a, i64 %b) { -; RV32XTHEADMEMIDX-LABEL: lrh: -; RV32XTHEADMEMIDX: # %bb.0: -; RV32XTHEADMEMIDX-NEXT: th.lrh a1, a0, a1, 1 -; RV32XTHEADMEMIDX-NEXT: srai a2, a1, 31 -; RV32XTHEADMEMIDX-NEXT: add a0, a1, a1 -; RV32XTHEADMEMIDX-NEXT: sltu a1, a0, a1 -; RV32XTHEADMEMIDX-NEXT: add a2, a2, a2 -; RV32XTHEADMEMIDX-NEXT: add a1, a2, a1 -; RV32XTHEADMEMIDX-NEXT: ret -; -; RV64XTHEADMEMIDX-LABEL: lrh: -; RV64XTHEADMEMIDX: # %bb.0: -; RV64XTHEADMEMIDX-NEXT: th.lrh a0, a0, a1, 1 -; RV64XTHEADMEMIDX-NEXT: add a0, a0, a0 -; RV64XTHEADMEMIDX-NEXT: ret - %1 = getelementptr i16, ptr %a, i64 %b +define i32 @lrh(ptr %a, iXLen %b) { +; CHECK-LABEL: lrh: +; CHECK: # %bb.0: +; CHECK-NEXT: th.lrh a0, a0, a1, 1 +; CHECK-NEXT: add a0, a0, a0 +; CHECK-NEXT: ret + %1 = getelementptr i16, ptr %a, iXLen %b %2 = load i16, ptr %1, align 2 - %3 = sext i16 %2 to i64 - %4 = add i64 %3, %3 - ret i64 %4 + %3 = sext i16 %2 to i32 + %4 = add i32 %3, %3 + ret i32 %4 } define i16 @lurh_anyext(ptr %a, i32 %b) { @@ -671,15 +511,11 @@ define i16 @lurh_anyext(ptr %a, i32 %b) { ret i16 %3 } -define i64 @lurh(ptr %a, i32 %b) { +define i32 @lurh(ptr %a, i32 %b) { ; RV32XTHEADMEMIDX-LABEL: lurh: ; RV32XTHEADMEMIDX: # %bb.0: -; RV32XTHEADMEMIDX-NEXT: th.lrh a1, a0, a1, 1 -; RV32XTHEADMEMIDX-NEXT: srai a2, a1, 31 -; RV32XTHEADMEMIDX-NEXT: add a0, a1, a1 -; RV32XTHEADMEMIDX-NEXT: sltu a1, a0, a1 -; RV32XTHEADMEMIDX-NEXT: add a2, a2, a2 -; RV32XTHEADMEMIDX-NEXT: add a1, a2, a1 +; RV32XTHEADMEMIDX-NEXT: th.lrh a0, a0, a1, 1 +; RV32XTHEADMEMIDX-NEXT: add a0, a0, a0 ; RV32XTHEADMEMIDX-NEXT: ret ; ; RV64XTHEADMEMIDX-LABEL: lurh: @@ -690,37 +526,29 @@ define i64 @lurh(ptr %a, i32 %b) { %1 = zext i32 %b to i64 %2 = getelementptr i16, ptr %a, i64 %1 %3 = load i16, ptr %2, align 2 - %4 = sext i16 %3 to i64 - %5 = add i64 %4, %4 - ret i64 %5 -} - -define i64 @lrhu(ptr %a, i64 %b) { -; RV32XTHEADMEMIDX-LABEL: lrhu: -; RV32XTHEADMEMIDX: # %bb.0: -; RV32XTHEADMEMIDX-NEXT: th.lrhu a1, a0, a1, 1 -; RV32XTHEADMEMIDX-NEXT: add a0, a1, a1 -; RV32XTHEADMEMIDX-NEXT: sltu a1, a0, a1 -; RV32XTHEADMEMIDX-NEXT: ret -; -; RV64XTHEADMEMIDX-LABEL: lrhu: -; RV64XTHEADMEMIDX: # %bb.0: -; RV64XTHEADMEMIDX-NEXT: th.lrhu a0, a0, a1, 1 -; RV64XTHEADMEMIDX-NEXT: add a0, a0, a0 -; RV64XTHEADMEMIDX-NEXT: ret - %1 = getelementptr i16, ptr %a, i64 %b + %4 = sext i16 %3 to i32 + %5 = add i32 %4, %4 + ret i32 %5 +} + +define i32 @lrhu(ptr %a, iXLen %b) { +; CHECK-LABEL: lrhu: +; CHECK: # %bb.0: +; CHECK-NEXT: th.lrhu a0, a0, a1, 1 +; CHECK-NEXT: add a0, a0, a0 +; CHECK-NEXT: ret + %1 = getelementptr i16, ptr %a, iXLen %b %2 = load i16, ptr %1, align 2 - %3 = zext i16 %2 to i64 - %4 = add i64 %3, %3 - ret i64 %4 + %3 = zext i16 %2 to i32 + %4 = add i32 %3, %3 + ret i32 %4 } -define i64 @lurhu(ptr %a, i32 %b) { +define i32 @lurhu(ptr %a, i32 %b) { ; RV32XTHEADMEMIDX-LABEL: lurhu: ; RV32XTHEADMEMIDX: # %bb.0: -; RV32XTHEADMEMIDX-NEXT: th.lrhu a1, a0, a1, 1 -; RV32XTHEADMEMIDX-NEXT: add a0, a1, a1 -; RV32XTHEADMEMIDX-NEXT: sltu a1, a0, a1 +; RV32XTHEADMEMIDX-NEXT: th.lrhu a0, a0, a1, 1 +; RV32XTHEADMEMIDX-NEXT: add a0, a0, a0 ; RV32XTHEADMEMIDX-NEXT: ret ; ; RV64XTHEADMEMIDX-LABEL: lurhu: @@ -731,27 +559,22 @@ define i64 @lurhu(ptr %a, i32 %b) { %1 = zext i32 %b to i64 %2 = getelementptr i16, ptr %a, i64 %1 %3 = load i16, ptr %2, align 2 - %4 = zext i16 %3 to i64 - %5 = add i64 %4, %4 - ret i64 %5 + %4 = zext i16 %3 to i32 + %5 = add i32 %4, %4 + ret i32 %5 } -define i32 @lrw_anyext(ptr %a, i64 %b) { -; RV32XTHEADMEMIDX-LABEL: lrw_anyext: -; RV32XTHEADMEMIDX: # %bb.0: -; RV32XTHEADMEMIDX-NEXT: th.lrw a0, a0, a1, 2 -; RV32XTHEADMEMIDX-NEXT: ret -; -; RV64XTHEADMEMIDX-LABEL: lrw_anyext: -; RV64XTHEADMEMIDX: # %bb.0: -; RV64XTHEADMEMIDX-NEXT: th.lrw a0, a0, a1, 2 -; RV64XTHEADMEMIDX-NEXT: ret - %1 = getelementptr i32, ptr %a, i64 %b +define i32 @lrw_anyext(ptr %a, iXLen %b) { +; CHECK-LABEL: lrw_anyext: +; CHECK: # %bb.0: +; CHECK-NEXT: th.lrw a0, a0, a1, 2 +; CHECK-NEXT: ret + %1 = getelementptr i32, ptr %a, iXLen %b %2 = load i32, ptr %1, align 4 ret i32 %2 } -define i64 @lrw(ptr %a, i64 %b) { +define i64 @lrw(ptr %a, iXLen %b) { ; RV32XTHEADMEMIDX-LABEL: lrw: ; RV32XTHEADMEMIDX: # %bb.0: ; RV32XTHEADMEMIDX-NEXT: th.lrw a1, a0, a1, 2 @@ -767,7 +590,7 @@ define i64 @lrw(ptr %a, i64 %b) { ; RV64XTHEADMEMIDX-NEXT: th.lrw a0, a0, a1, 2 ; RV64XTHEADMEMIDX-NEXT: add a0, a0, a0 ; RV64XTHEADMEMIDX-NEXT: ret - %1 = getelementptr i32, ptr %a, i64 %b + %1 = getelementptr i32, ptr %a, iXLen %b %2 = load i32, ptr %1, align 4 %3 = sext i32 %2 to i64 %4 = add i64 %3, %3 @@ -814,7 +637,7 @@ define i64 @lurw(ptr %a, i32 %b) { ret i64 %5 } -define i64 @lrwu(ptr %a, i64 %b) { +define i64 @lrwu(ptr %a, iXLen %b) { ; RV32XTHEADMEMIDX-LABEL: lrwu: ; RV32XTHEADMEMIDX: # %bb.0: ; RV32XTHEADMEMIDX-NEXT: th.lrw a1, a0, a1, 2 @@ -827,7 +650,7 @@ define i64 @lrwu(ptr %a, i64 %b) { ; RV64XTHEADMEMIDX-NEXT: th.lrwu a0, a0, a1, 2 ; RV64XTHEADMEMIDX-NEXT: add a0, a0, a0 ; RV64XTHEADMEMIDX-NEXT: ret - %1 = getelementptr i32, ptr %a, i64 %b + %1 = getelementptr i32, ptr %a, iXLen %b %2 = load i32, ptr %1, align 4 %3 = zext i32 %2 to i64 %4 = add i64 %3, %3 @@ -855,7 +678,7 @@ define i64 @lurwu(ptr %a, i32 %b) { ret i64 %5 } -define i64 @lrd(ptr %a, i64 %b) { +define i64 @lrd(ptr %a, iXLen %b) { ; RV32XTHEADMEMIDX-LABEL: lrd: ; RV32XTHEADMEMIDX: # %bb.0: ; RV32XTHEADMEMIDX-NEXT: th.lrw a2, a0, a1, 3 @@ -872,23 +695,23 @@ define i64 @lrd(ptr %a, i64 %b) { ; RV64XTHEADMEMIDX-NEXT: th.lrd a0, a0, a1, 3 ; RV64XTHEADMEMIDX-NEXT: add a0, a0, a0 ; RV64XTHEADMEMIDX-NEXT: ret - %1 = getelementptr i64, ptr %a, i64 %b + %1 = getelementptr i64, ptr %a, iXLen %b %2 = load i64, ptr %1, align 8 %3 = add i64 %2, %2 ret i64 %3 } -define i64 @lrd_2(ptr %a, i64 %b) { +define i64 @lrd_2(ptr %a, iXLen %b) { ; RV32XTHEADMEMIDX-LABEL: lrd_2: ; RV32XTHEADMEMIDX: # %bb.0: -; RV32XTHEADMEMIDX-NEXT: addi a2, a0, 96 -; RV32XTHEADMEMIDX-NEXT: th.lrw a2, a2, a1, 3 -; RV32XTHEADMEMIDX-NEXT: addi a0, a0, 100 -; RV32XTHEADMEMIDX-NEXT: th.lrw a1, a0, a1, 3 -; RV32XTHEADMEMIDX-NEXT: add a0, a2, a2 -; RV32XTHEADMEMIDX-NEXT: sltu a2, a0, a2 -; RV32XTHEADMEMIDX-NEXT: add a1, a1, a1 -; RV32XTHEADMEMIDX-NEXT: add a1, a1, a2 +; RV32XTHEADMEMIDX-NEXT: slli a1, a1, 3 +; RV32XTHEADMEMIDX-NEXT: add a0, a1, a0 +; RV32XTHEADMEMIDX-NEXT: lw a1, 96(a0) +; RV32XTHEADMEMIDX-NEXT: lw a2, 100(a0) +; RV32XTHEADMEMIDX-NEXT: add a0, a1, a1 +; RV32XTHEADMEMIDX-NEXT: sltu a1, a0, a1 +; RV32XTHEADMEMIDX-NEXT: add a2, a2, a2 +; RV32XTHEADMEMIDX-NEXT: add a1, a2, a1 ; RV32XTHEADMEMIDX-NEXT: ret ; ; RV64XTHEADMEMIDX-LABEL: lrd_2: @@ -897,8 +720,8 @@ define i64 @lrd_2(ptr %a, i64 %b) { ; RV64XTHEADMEMIDX-NEXT: th.lrd a0, a0, a1, 3 ; RV64XTHEADMEMIDX-NEXT: add a0, a0, a0 ; RV64XTHEADMEMIDX-NEXT: ret - %1 = add i64 %b, 12 - %2 = getelementptr i64, ptr %a, i64 %1 + %1 = add iXLen %b, 12 + %2 = getelementptr i64, ptr %a, iXLen %1 %3 = load i64, ptr %2, align 8 %4 = add i64 %3, %3 ret i64 %4 @@ -928,20 +751,14 @@ define i64 @lurd(ptr %a, i32 %b) { ret i64 %4 } -define void @srb(ptr %a, i64 %b, i8 %c) { -; RV32XTHEADMEMIDX-LABEL: srb: -; RV32XTHEADMEMIDX: # %bb.0: -; RV32XTHEADMEMIDX-NEXT: add a3, a3, a3 -; RV32XTHEADMEMIDX-NEXT: th.srb a3, a0, a1, 0 -; RV32XTHEADMEMIDX-NEXT: ret -; -; RV64XTHEADMEMIDX-LABEL: srb: -; RV64XTHEADMEMIDX: # %bb.0: -; RV64XTHEADMEMIDX-NEXT: add a2, a2, a2 -; RV64XTHEADMEMIDX-NEXT: th.srb a2, a0, a1, 0 -; RV64XTHEADMEMIDX-NEXT: ret +define void @srb(ptr %a, iXLen %b, i8 %c) { +; CHECK-LABEL: srb: +; CHECK: # %bb.0: +; CHECK-NEXT: add a2, a2, a2 +; CHECK-NEXT: th.srb a2, a0, a1, 0 +; CHECK-NEXT: ret %1 = add i8 %c, %c - %2 = getelementptr i8, ptr %a, i64 %b + %2 = getelementptr i8, ptr %a, iXLen %b store i8 %1, ptr %2, align 1 ret void } @@ -965,20 +782,14 @@ define void @surb(ptr %a, i32 %b, i8 %c) { ret void } -define void @srh(ptr %a, i64 %b, i16 %c) { -; RV32XTHEADMEMIDX-LABEL: srh: -; RV32XTHEADMEMIDX: # %bb.0: -; RV32XTHEADMEMIDX-NEXT: add a3, a3, a3 -; RV32XTHEADMEMIDX-NEXT: th.srh a3, a0, a1, 1 -; RV32XTHEADMEMIDX-NEXT: ret -; -; RV64XTHEADMEMIDX-LABEL: srh: -; RV64XTHEADMEMIDX: # %bb.0: -; RV64XTHEADMEMIDX-NEXT: add a2, a2, a2 -; RV64XTHEADMEMIDX-NEXT: th.srh a2, a0, a1, 1 -; RV64XTHEADMEMIDX-NEXT: ret +define void @srh(ptr %a, iXLen %b, i16 %c) { +; CHECK-LABEL: srh: +; CHECK: # %bb.0: +; CHECK-NEXT: add a2, a2, a2 +; CHECK-NEXT: th.srh a2, a0, a1, 1 +; CHECK-NEXT: ret %1 = add i16 %c, %c - %2 = getelementptr i16, ptr %a, i64 %b + %2 = getelementptr i16, ptr %a, iXLen %b store i16 %1, ptr %2, align 2 ret void } @@ -1002,20 +813,14 @@ define void @surh(ptr %a, i32 %b, i16 %c) { ret void } -define void @srw(ptr %a, i64 %b, i32 %c) { -; RV32XTHEADMEMIDX-LABEL: srw: -; RV32XTHEADMEMIDX: # %bb.0: -; RV32XTHEADMEMIDX-NEXT: add a3, a3, a3 -; RV32XTHEADMEMIDX-NEXT: th.srw a3, a0, a1, 2 -; RV32XTHEADMEMIDX-NEXT: ret -; -; RV64XTHEADMEMIDX-LABEL: srw: -; RV64XTHEADMEMIDX: # %bb.0: -; RV64XTHEADMEMIDX-NEXT: add a2, a2, a2 -; RV64XTHEADMEMIDX-NEXT: th.srw a2, a0, a1, 2 -; RV64XTHEADMEMIDX-NEXT: ret +define void @srw(ptr %a, iXLen %b, i32 %c) { +; CHECK-LABEL: srw: +; CHECK: # %bb.0: +; CHECK-NEXT: add a2, a2, a2 +; CHECK-NEXT: th.srw a2, a0, a1, 2 +; CHECK-NEXT: ret %1 = add i32 %c, %c - %2 = getelementptr i32, ptr %a, i64 %b + %2 = getelementptr i32, ptr %a, iXLen %b store i32 %1, ptr %2, align 4 ret void } @@ -1039,16 +844,16 @@ define void @surw(ptr %a, i32 %b, i32 %c) { ret void } -define void @srd(ptr %a, i64 %b, i64 %c) { +define void @srd(ptr %a, iXLen %b, i64 %c) { ; RV32XTHEADMEMIDX-LABEL: srd: ; RV32XTHEADMEMIDX: # %bb.0: -; RV32XTHEADMEMIDX-NEXT: add a2, a3, a3 -; RV32XTHEADMEMIDX-NEXT: add a4, a4, a4 -; RV32XTHEADMEMIDX-NEXT: sltu a3, a2, a3 -; RV32XTHEADMEMIDX-NEXT: th.srw a2, a0, a1, 3 -; RV32XTHEADMEMIDX-NEXT: add a3, a4, a3 +; RV32XTHEADMEMIDX-NEXT: add a4, a2, a2 +; RV32XTHEADMEMIDX-NEXT: add a3, a3, a3 +; RV32XTHEADMEMIDX-NEXT: sltu a2, a4, a2 +; RV32XTHEADMEMIDX-NEXT: th.srw a4, a0, a1, 3 +; RV32XTHEADMEMIDX-NEXT: add a2, a3, a2 ; RV32XTHEADMEMIDX-NEXT: addi a0, a0, 4 -; RV32XTHEADMEMIDX-NEXT: th.srw a3, a0, a1, 3 +; RV32XTHEADMEMIDX-NEXT: th.srw a2, a0, a1, 3 ; RV32XTHEADMEMIDX-NEXT: ret ; ; RV64XTHEADMEMIDX-LABEL: srd: @@ -1057,7 +862,7 @@ define void @srd(ptr %a, i64 %b, i64 %c) { ; RV64XTHEADMEMIDX-NEXT: th.srd a2, a0, a1, 3 ; RV64XTHEADMEMIDX-NEXT: ret %1 = add i64 %c, %c - %2 = getelementptr i64, ptr %a, i64 %b + %2 = getelementptr i64, ptr %a, iXLen %b store i64 %1, ptr %2, align 8 ret void } @@ -1087,24 +892,18 @@ define void @surd(ptr %a, i32 %b, i64 %c) { } define ptr @test_simm5(ptr %base, i32 %a, i32 %b) { -; RV32XTHEADMEMIDX-LABEL: test_simm5: -; RV32XTHEADMEMIDX: # %bb.0: -; RV32XTHEADMEMIDX-NEXT: add a1, a1, a2 -; RV32XTHEADMEMIDX-NEXT: th.swia a1, (a0), -12, 2 -; RV32XTHEADMEMIDX-NEXT: ret -; -; RV64XTHEADMEMIDX-LABEL: test_simm5: -; RV64XTHEADMEMIDX: # %bb.0: -; RV64XTHEADMEMIDX-NEXT: add a1, a1, a2 -; RV64XTHEADMEMIDX-NEXT: th.swia a1, (a0), -12, 2 -; RV64XTHEADMEMIDX-NEXT: ret +; CHECK-LABEL: test_simm5: +; CHECK: # %bb.0: +; CHECK-NEXT: add a1, a1, a2 +; CHECK-NEXT: th.swia a1, (a0), -12, 2 +; CHECK-NEXT: ret %addr.1 = getelementptr i32, ptr %base, i32 -12 %res = add i32 %a, %b store i32 %res, ptr %base ret ptr %addr.1 } -define i64 @lrd_large_shift(ptr %a, i64 %b) { +define i64 @lrd_large_shift(ptr %a, iXLen %b) { ; RV32XTHEADMEMIDX-LABEL: lrd_large_shift: ; RV32XTHEADMEMIDX: # %bb.0: ; RV32XTHEADMEMIDX-NEXT: slli a1, a1, 5 @@ -1119,14 +918,14 @@ define i64 @lrd_large_shift(ptr %a, i64 %b) { ; RV64XTHEADMEMIDX-NEXT: add a0, a1, a0 ; RV64XTHEADMEMIDX-NEXT: ld a0, 384(a0) ; RV64XTHEADMEMIDX-NEXT: ret - %1 = add i64 %b, 12 - %2 = shl i64 %1, 2 - %3 = getelementptr i64, ptr %a, i64 %2 + %1 = add iXLen %b, 12 + %2 = shl iXLen %1, 2 + %3 = getelementptr i64, ptr %a, iXLen %2 %4 = load i64, ptr %3, align 8 ret i64 %4 } -define i64 @lrd_large_offset(ptr %a, i64 %b) { +define i64 @lrd_large_offset(ptr %a, iXLen %b) { ; RV32XTHEADMEMIDX-LABEL: lrd_large_offset: ; RV32XTHEADMEMIDX: # %bb.0: ; RV32XTHEADMEMIDX-NEXT: slli a1, a1, 3 @@ -1145,8 +944,8 @@ define i64 @lrd_large_offset(ptr %a, i64 %b) { ; RV64XTHEADMEMIDX-NEXT: add a0, a0, a1 ; RV64XTHEADMEMIDX-NEXT: ld a0, 1792(a0) ; RV64XTHEADMEMIDX-NEXT: ret - %1 = add i64 %b, 12000 - %2 = getelementptr i64, ptr %a, i64 %1 + %1 = add iXLen %b, 12000 + %2 = getelementptr i64, ptr %a, iXLen %1 %3 = load i64, ptr %2, align 8 ret i64 %3 } diff --git a/llvm/test/CodeGen/RISCV/zilsd.ll b/llvm/test/CodeGen/RISCV/zilsd.ll index 09b065a..048ce96 100644 --- a/llvm/test/CodeGen/RISCV/zilsd.ll +++ b/llvm/test/CodeGen/RISCV/zilsd.ll @@ -117,3 +117,22 @@ entyr: store i64 0, ptr @g ret void } + +define void @large_offset(ptr nocapture %p, i64 %d) nounwind { +; CHECK-LABEL: large_offset: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: lui a1, 4 +; CHECK-NEXT: add a0, a0, a1 +; CHECK-NEXT: ld a2, -384(a0) +; CHECK-NEXT: addi a2, a2, 1 +; CHECK-NEXT: seqz a1, a2 +; CHECK-NEXT: add a3, a3, a1 +; CHECK-NEXT: sd a2, -384(a0) +; CHECK-NEXT: ret +entry: + %add.ptr = getelementptr inbounds i64, ptr %p, i64 2000 + %a = load i64, ptr %add.ptr, align 8 + %b = add i64 %a, 1 + store i64 %b, ptr %add.ptr, align 8 + ret void +} |