; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mtriple=riscv64 -O1 -mattr=+m,+v,+zvl1024b < %s | FileCheck %s --check-prefix=RV64-1024 ; RUN: llc -mtriple=riscv64 -O1 -mattr=+m,+v,+zvl2048b < %s | FileCheck %s --check-prefix=RV64-2048 define void @interleave256(ptr %agg.result, ptr %0, ptr %1) { ; RV64-1024-LABEL: interleave256: ; RV64-1024: # %bb.0: # %entry ; RV64-1024-NEXT: li a3, 128 ; RV64-1024-NEXT: vsetvli zero, a3, e16, m2, ta, ma ; RV64-1024-NEXT: vle16.v v12, (a1) ; RV64-1024-NEXT: vle16.v v14, (a2) ; RV64-1024-NEXT: li a1, -1 ; RV64-1024-NEXT: vwaddu.vv v8, v12, v14 ; RV64-1024-NEXT: vwmaccu.vx v8, a1, v14 ; RV64-1024-NEXT: li a1, 256 ; RV64-1024-NEXT: vsetvli zero, a1, e16, m4, ta, ma ; RV64-1024-NEXT: vse16.v v8, (a0) ; RV64-1024-NEXT: ret ; ; RV64-2048-LABEL: interleave256: ; RV64-2048: # %bb.0: # %entry ; RV64-2048-NEXT: li a3, 128 ; RV64-2048-NEXT: vsetvli zero, a3, e16, m1, ta, ma ; RV64-2048-NEXT: vle16.v v10, (a1) ; RV64-2048-NEXT: vle16.v v11, (a2) ; RV64-2048-NEXT: li a1, -1 ; RV64-2048-NEXT: vwaddu.vv v8, v10, v11 ; RV64-2048-NEXT: vwmaccu.vx v8, a1, v11 ; RV64-2048-NEXT: li a1, 256 ; RV64-2048-NEXT: vsetvli zero, a1, e16, m2, ta, ma ; RV64-2048-NEXT: vse16.v v8, (a0) ; RV64-2048-NEXT: ret entry: %ve = load <128 x i16>, ptr %0, align 256 %vo = load <128 x i16>, ptr %1, align 256 %2 = shufflevector <128 x i16> %ve, <128 x i16> poison, <256 x i32> %3 = shufflevector <128 x i16> %vo, <128 x i16> poison, <256 x i32> %4 = shufflevector <256 x i16> %2, <256 x i16> %3, <256 x i32> store <256 x i16> %4, ptr %agg.result, align 512 ret void } define void @interleave512(ptr %agg.result, ptr %0, ptr %1) local_unnamed_addr { ; RV64-1024-LABEL: interleave512: ; RV64-1024: # %bb.0: # %entry ; RV64-1024-NEXT: li a3, 256 ; RV64-1024-NEXT: vsetvli zero, a3, e16, m4, ta, ma ; RV64-1024-NEXT: vle16.v v16, (a1) ; RV64-1024-NEXT: vle16.v v20, (a2) ; RV64-1024-NEXT: li a1, -1 ; RV64-1024-NEXT: vwaddu.vv v8, v16, v20 ; RV64-1024-NEXT: vwmaccu.vx v8, a1, v20 ; RV64-1024-NEXT: li a1, 512 ; RV64-1024-NEXT: vsetvli zero, a1, e16, m8, ta, ma ; RV64-1024-NEXT: vse16.v v8, (a0) ; RV64-1024-NEXT: ret ; ; RV64-2048-LABEL: interleave512: ; RV64-2048: # %bb.0: # %entry ; RV64-2048-NEXT: li a3, 256 ; RV64-2048-NEXT: vsetvli zero, a3, e16, m2, ta, ma ; RV64-2048-NEXT: vle16.v v12, (a1) ; RV64-2048-NEXT: vle16.v v14, (a2) ; RV64-2048-NEXT: li a1, -1 ; RV64-2048-NEXT: vwaddu.vv v8, v12, v14 ; RV64-2048-NEXT: vwmaccu.vx v8, a1, v14 ; RV64-2048-NEXT: li a1, 512 ; RV64-2048-NEXT: vsetvli zero, a1, e16, m4, ta, ma ; RV64-2048-NEXT: vse16.v v8, (a0) ; RV64-2048-NEXT: ret entry: %ve = load <256 x i16>, ptr %0, align 512 %vo = load <256 x i16>, ptr %1, align 512 %2 = shufflevector <256 x i16> %ve, <256 x i16> poison, <512 x i32> %3 = shufflevector <256 x i16> %vo, <256 x i16> poison, <512 x i32> %4 = shufflevector <512 x i16> %2, <512 x i16> %3, <512 x i32> store <512 x i16> %4, ptr %agg.result, align 1024 ret void }