; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mtriple=riscv32 -mattr=+experimental-p,+m,+zbb \ ; RUN: -verify-machineinstrs < %s | \ ; RUN: FileCheck --check-prefixes=CHECK,RV32 %s ; RUN: llc -mtriple=riscv64 -mattr=+experimental-p,+m,+zbb \ ; RUN: -verify-machineinstrs < %s | \ ; RUN: FileCheck --check-prefixes=CHECK,RV64 %s define <4 x i8> @test_cc_v4i8(<4 x i8> %a, <4 x i8> %b) { ; CHECK-LABEL: test_cc_v4i8: ; CHECK: # %bb.0: ; CHECK-NEXT: padd.b a0, a0, a1 ; CHECK-NEXT: ret %res = add <4 x i8> %a, %b ret <4 x i8> %res } define <2 x i16> @test_cc_v2i16(<2 x i16> %a, <2 x i16> %b) { ; CHECK-LABEL: test_cc_v2i16: ; CHECK: # %bb.0: ; CHECK-NEXT: padd.h a0, a0, a1 ; CHECK-NEXT: ret %res = add <2 x i16> %a, %b ret <2 x i16> %res } define <8 x i8> @test_cc_v8i8(<8 x i8> %a, <8 x i8> %b) { ; RV32-LABEL: test_cc_v8i8: ; RV32: # %bb.0: ; RV32-NEXT: padd.b a0, a0, a2 ; RV32-NEXT: padd.b a1, a1, a3 ; RV32-NEXT: ret ; ; RV64-LABEL: test_cc_v8i8: ; RV64: # %bb.0: ; RV64-NEXT: padd.b a0, a0, a1 ; RV64-NEXT: ret %res = add <8 x i8> %a, %b ret <8 x i8> %res } define <4 x i16> @test_cc_v4i16(<4 x i16> %a, <4 x i16> %b) { ; RV32-LABEL: test_cc_v4i16: ; RV32: # %bb.0: ; RV32-NEXT: padd.h a0, a0, a2 ; RV32-NEXT: padd.h a1, a1, a3 ; RV32-NEXT: ret ; ; RV64-LABEL: test_cc_v4i16: ; RV64: # %bb.0: ; RV64-NEXT: padd.h a0, a0, a1 ; RV64-NEXT: ret %res = add <4 x i16> %a, %b ret <4 x i16> %res } define <2 x i32> @test_cc_v2i32(<2 x i32> %a, <2 x i32> %b) { ; RV32-LABEL: test_cc_v2i32: ; RV32: # %bb.0: ; RV32-NEXT: add a0, a0, a2 ; RV32-NEXT: add a1, a1, a3 ; RV32-NEXT: ret ; ; RV64-LABEL: test_cc_v2i32: ; RV64: # %bb.0: ; RV64-NEXT: padd.w a0, a0, a1 ; RV64-NEXT: ret %res = add <2 x i32> %a, %b ret <2 x i32> %res } ; Indirect on RV32, two registers on RV64 define <16 x i8> @test_cc_v16i8(<16 x i8> %a, <16 x i8> %b) { ; RV32-LABEL: test_cc_v16i8: ; RV32: # %bb.0: ; RV32-NEXT: lw a3, 0(a2) ; RV32-NEXT: lw a4, 4(a2) ; RV32-NEXT: lw a5, 8(a2) ; RV32-NEXT: lw a2, 12(a2) ; RV32-NEXT: lw a6, 0(a1) ; RV32-NEXT: lw a7, 4(a1) ; RV32-NEXT: lw t0, 8(a1) ; RV32-NEXT: lw a1, 12(a1) ; RV32-NEXT: padd.b a3, a6, a3 ; RV32-NEXT: padd.b a4, a7, a4 ; RV32-NEXT: padd.b a5, t0, a5 ; RV32-NEXT: padd.b a1, a1, a2 ; RV32-NEXT: sw a3, 0(a0) ; RV32-NEXT: sw a4, 4(a0) ; RV32-NEXT: sw a5, 8(a0) ; RV32-NEXT: sw a1, 12(a0) ; RV32-NEXT: ret ; ; RV64-LABEL: test_cc_v16i8: ; RV64: # %bb.0: ; RV64-NEXT: padd.b a0, a0, a2 ; RV64-NEXT: padd.b a1, a1, a3 ; RV64-NEXT: ret %res = add <16 x i8> %a, %b ret <16 x i8> %res } define <8 x i16> @test_cc_v8i16(<8 x i16> %a, <8 x i16> %b) { ; RV32-LABEL: test_cc_v8i16: ; RV32: # %bb.0: ; RV32-NEXT: lw a3, 0(a2) ; RV32-NEXT: lw a4, 4(a2) ; RV32-NEXT: lw a5, 8(a2) ; RV32-NEXT: lw a2, 12(a2) ; RV32-NEXT: lw a6, 0(a1) ; RV32-NEXT: lw a7, 4(a1) ; RV32-NEXT: lw t0, 8(a1) ; RV32-NEXT: lw a1, 12(a1) ; RV32-NEXT: padd.h a3, a6, a3 ; RV32-NEXT: padd.h a4, a7, a4 ; RV32-NEXT: padd.h a5, t0, a5 ; RV32-NEXT: padd.h a1, a1, a2 ; RV32-NEXT: sw a3, 0(a0) ; RV32-NEXT: sw a4, 4(a0) ; RV32-NEXT: sw a5, 8(a0) ; RV32-NEXT: sw a1, 12(a0) ; RV32-NEXT: ret ; ; RV64-LABEL: test_cc_v8i16: ; RV64: # %bb.0: ; RV64-NEXT: padd.h a0, a0, a2 ; RV64-NEXT: padd.h a1, a1, a3 ; RV64-NEXT: ret %res = add <8 x i16> %a, %b ret <8 x i16> %res } define <4 x i32> @test_cc_v4i32(<4 x i32> %a, <4 x i32> %b) { ; RV32-LABEL: test_cc_v4i32: ; RV32: # %bb.0: ; RV32-NEXT: lw a3, 0(a2) ; RV32-NEXT: lw a4, 4(a2) ; RV32-NEXT: lw a5, 8(a2) ; RV32-NEXT: lw a2, 12(a2) ; RV32-NEXT: lw a6, 0(a1) ; RV32-NEXT: lw a7, 4(a1) ; RV32-NEXT: lw t0, 8(a1) ; RV32-NEXT: lw a1, 12(a1) ; RV32-NEXT: add a3, a6, a3 ; RV32-NEXT: add a4, a7, a4 ; RV32-NEXT: add a5, t0, a5 ; RV32-NEXT: add a1, a1, a2 ; RV32-NEXT: sw a3, 0(a0) ; RV32-NEXT: sw a4, 4(a0) ; RV32-NEXT: sw a5, 8(a0) ; RV32-NEXT: sw a1, 12(a0) ; RV32-NEXT: ret ; ; RV64-LABEL: test_cc_v4i32: ; RV64: # %bb.0: ; RV64-NEXT: padd.w a0, a0, a2 ; RV64-NEXT: padd.w a1, a1, a3 ; RV64-NEXT: ret %res = add <4 x i32> %a, %b ret <4 x i32> %res } ; Function call tests declare <4 x i8> @external_v4i8(<4 x i8>, <4 x i8>) define <4 x i8> @test_call_v4i8(<4 x i8> %a, <4 x i8> %b) { ; RV32-LABEL: test_call_v4i8: ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 ; RV32-NEXT: sw ra, 12(sp) # 4-byte Folded Spill ; RV32-NEXT: .cfi_offset ra, -4 ; RV32-NEXT: mv a2, a0 ; RV32-NEXT: mv a0, a1 ; RV32-NEXT: mv a1, a2 ; RV32-NEXT: call external_v4i8 ; RV32-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; RV32-NEXT: .cfi_restore ra ; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: .cfi_def_cfa_offset 0 ; RV32-NEXT: ret ; ; RV64-LABEL: test_call_v4i8: ; RV64: # %bb.0: ; RV64-NEXT: addi sp, sp, -16 ; RV64-NEXT: .cfi_def_cfa_offset 16 ; RV64-NEXT: sd ra, 8(sp) # 8-byte Folded Spill ; RV64-NEXT: .cfi_offset ra, -8 ; RV64-NEXT: mv a2, a0 ; RV64-NEXT: mv a0, a1 ; RV64-NEXT: mv a1, a2 ; RV64-NEXT: call external_v4i8 ; RV64-NEXT: ld ra, 8(sp) # 8-byte Folded Reload ; RV64-NEXT: .cfi_restore ra ; RV64-NEXT: addi sp, sp, 16 ; RV64-NEXT: .cfi_def_cfa_offset 0 ; RV64-NEXT: ret %res = call <4 x i8> @external_v4i8(<4 x i8> %b, <4 x i8> %a) ret <4 x i8> %res } ; Test calling a function with v8i8 arguments (split on RV32) declare <8 x i8> @external_v8i8(<8 x i8>, <8 x i8>) define <8 x i8> @test_call_v8i8(<8 x i8> %a, <8 x i8> %b) { ; RV32-LABEL: test_call_v8i8: ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 ; RV32-NEXT: sw ra, 12(sp) # 4-byte Folded Spill ; RV32-NEXT: .cfi_offset ra, -4 ; RV32-NEXT: mv a4, a1 ; RV32-NEXT: mv a5, a0 ; RV32-NEXT: padd.dw a0, a2, zero ; RV32-NEXT: mv a2, a5 ; RV32-NEXT: mv a3, a4 ; RV32-NEXT: call external_v8i8 ; RV32-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; RV32-NEXT: .cfi_restore ra ; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: .cfi_def_cfa_offset 0 ; RV32-NEXT: ret ; ; RV64-LABEL: test_call_v8i8: ; RV64: # %bb.0: ; RV64-NEXT: addi sp, sp, -16 ; RV64-NEXT: .cfi_def_cfa_offset 16 ; RV64-NEXT: sd ra, 8(sp) # 8-byte Folded Spill ; RV64-NEXT: .cfi_offset ra, -8 ; RV64-NEXT: mv a2, a0 ; RV64-NEXT: mv a0, a1 ; RV64-NEXT: mv a1, a2 ; RV64-NEXT: call external_v8i8 ; RV64-NEXT: ld ra, 8(sp) # 8-byte Folded Reload ; RV64-NEXT: .cfi_restore ra ; RV64-NEXT: addi sp, sp, 16 ; RV64-NEXT: .cfi_def_cfa_offset 0 ; RV64-NEXT: ret %res = call <8 x i8> @external_v8i8(<8 x i8> %b, <8 x i8> %a) ret <8 x i8> %res } ; Test calling a function with v16i8 arguments (passed by reference on RV32) declare <16 x i8> @external_v16i8(<16 x i8>, <16 x i8>) define <16 x i8> @test_call_v16i8(<16 x i8> %a, <16 x i8> %b) { ; RV32-LABEL: test_call_v16i8: ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -64 ; RV32-NEXT: .cfi_def_cfa_offset 64 ; RV32-NEXT: sw ra, 60(sp) # 4-byte Folded Spill ; RV32-NEXT: sw s0, 56(sp) # 4-byte Folded Spill ; RV32-NEXT: .cfi_offset ra, -4 ; RV32-NEXT: .cfi_offset s0, -8 ; RV32-NEXT: lw a3, 0(a2) ; RV32-NEXT: lw a4, 4(a2) ; RV32-NEXT: lw a5, 8(a2) ; RV32-NEXT: lw a6, 12(a2) ; RV32-NEXT: lw a2, 0(a1) ; RV32-NEXT: lw a7, 4(a1) ; RV32-NEXT: lw t0, 8(a1) ; RV32-NEXT: lw a1, 12(a1) ; RV32-NEXT: mv s0, a0 ; RV32-NEXT: sw a2, 0(sp) ; RV32-NEXT: sw a7, 4(sp) ; RV32-NEXT: sw t0, 8(sp) ; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 32 ; RV32-NEXT: addi a1, sp, 16 ; RV32-NEXT: mv a2, sp ; RV32-NEXT: sw a3, 16(sp) ; RV32-NEXT: sw a4, 20(sp) ; RV32-NEXT: sw a5, 24(sp) ; RV32-NEXT: sw a6, 28(sp) ; RV32-NEXT: call external_v16i8 ; RV32-NEXT: lw a0, 32(sp) ; RV32-NEXT: lw a1, 36(sp) ; RV32-NEXT: lw a2, 40(sp) ; RV32-NEXT: lw a3, 44(sp) ; RV32-NEXT: sw a0, 0(s0) ; RV32-NEXT: sw a1, 4(s0) ; RV32-NEXT: sw a2, 8(s0) ; RV32-NEXT: sw a3, 12(s0) ; RV32-NEXT: lw ra, 60(sp) # 4-byte Folded Reload ; RV32-NEXT: lw s0, 56(sp) # 4-byte Folded Reload ; RV32-NEXT: .cfi_restore ra ; RV32-NEXT: .cfi_restore s0 ; RV32-NEXT: addi sp, sp, 64 ; RV32-NEXT: .cfi_def_cfa_offset 0 ; RV32-NEXT: ret ; ; RV64-LABEL: test_call_v16i8: ; RV64: # %bb.0: ; RV64-NEXT: addi sp, sp, -16 ; RV64-NEXT: .cfi_def_cfa_offset 16 ; RV64-NEXT: sd ra, 8(sp) # 8-byte Folded Spill ; RV64-NEXT: .cfi_offset ra, -8 ; RV64-NEXT: mv a4, a1 ; RV64-NEXT: mv a5, a0 ; RV64-NEXT: mv a0, a2 ; RV64-NEXT: mv a1, a3 ; RV64-NEXT: mv a2, a5 ; RV64-NEXT: mv a3, a4 ; RV64-NEXT: call external_v16i8 ; RV64-NEXT: ld ra, 8(sp) # 8-byte Folded Reload ; RV64-NEXT: .cfi_restore ra ; RV64-NEXT: addi sp, sp, 16 ; RV64-NEXT: .cfi_def_cfa_offset 0 ; RV64-NEXT: ret %res = call <16 x i8> @external_v16i8(<16 x i8> %b, <16 x i8> %a) ret <16 x i8> %res } define <2 x i16> @test_exhaust(i64 %dummy, i64 %dummy2, i64 %dummy3, i64 %dummy4, <2 x i16> %b) { ; RV32-LABEL: test_exhaust: ; RV32: # %bb.0: ; RV32-NEXT: lw a0, 0(sp) ; RV32-NEXT: padd.h a0, a0, a0 ; RV32-NEXT: ret ; ; RV64-LABEL: test_exhaust: ; RV64: # %bb.0: ; RV64-NEXT: padd.h a0, a4, a4 ; RV64-NEXT: ret %res = add <2 x i16> %b, %b ret <2 x i16> %res } define <4 x i16> @test_exhaust_2xlen_rv32(i64 %dummy, i64 %dummy2, i64 %dummy3, i32 %dummy4, <4 x i16> %b) { ; RV32-LABEL: test_exhaust_2xlen_rv32: ; RV32: # %bb.0: ; RV32-NEXT: lw a1, 0(sp) ; RV32-NEXT: padd.h a0, a7, a7 ; RV32-NEXT: padd.h a1, a1, a1 ; RV32-NEXT: ret ; ; RV64-LABEL: test_exhaust_2xlen_rv32: ; RV64: # %bb.0: ; RV64-NEXT: padd.h a0, a4, a4 ; RV64-NEXT: ret %res = add <4 x i16> %b, %b ret <4 x i16> %res } define <4 x i16> @test_exhaust_2xlen_rv32_2(i64 %dummy, i64 %dummy2, i64 %dummy3, i64 %dummy4, <4 x i16> %b) { ; RV32-LABEL: test_exhaust_2xlen_rv32_2: ; RV32: # %bb.0: ; RV32-NEXT: lw a0, 0(sp) ; RV32-NEXT: lw a1, 4(sp) ; RV32-NEXT: padd.h a0, a0, a0 ; RV32-NEXT: padd.h a1, a1, a1 ; RV32-NEXT: ret ; ; RV64-LABEL: test_exhaust_2xlen_rv32_2: ; RV64: # %bb.0: ; RV64-NEXT: padd.h a0, a4, a4 ; RV64-NEXT: ret %res = add <4 x i16> %b, %b ret <4 x i16> %res }