; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 ; RUN: llc -mtriple=aarch64-linux-gnu < %s | FileCheck %s ;; Patterns that lower to concat_vectors where all incoming operands are the same. define void @concat_i8q_256(<16 x i8> %data, ptr %addr) #0 { ; CHECK-LABEL: concat_i8q_256: ; CHECK: // %bb.0: ; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: mov z0.q, q0 ; CHECK-NEXT: str z0, [x0] ; CHECK-NEXT: ret %splat = shufflevector <16 x i8> %data, <16 x i8> poison, <32 x i32> store <32 x i8> %splat, ptr %addr, align 1 ret void } define void @concat_i16q_256(<8 x i16> %data, ptr %addr) #0 { ; CHECK-LABEL: concat_i16q_256: ; CHECK: // %bb.0: ; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: mov z0.q, q0 ; CHECK-NEXT: str z0, [x0] ; CHECK-NEXT: ret %splat = shufflevector <8 x i16> %data, <8 x i16> poison, <16 x i32> store <16 x i16> %splat, ptr %addr, align 1 ret void } define void @concat_i32q_256(<4 x i32> %data, ptr %addr) #0 { ; CHECK-LABEL: concat_i32q_256: ; CHECK: // %bb.0: ; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: mov z0.q, q0 ; CHECK-NEXT: str z0, [x0] ; CHECK-NEXT: ret %splat = shufflevector <4 x i32> %data, <4 x i32> poison, <8 x i32> store <8 x i32> %splat, ptr %addr, align 1 ret void } define void @concat_i64q_256(<2 x i64> %data, ptr %addr) #0 { ; CHECK-LABEL: concat_i64q_256: ; CHECK: // %bb.0: ; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: mov z0.q, q0 ; CHECK-NEXT: str z0, [x0] ; CHECK-NEXT: ret %splat = shufflevector <2 x i64> %data, <2 x i64> poison, <4 x i32> store <4 x i64> %splat, ptr %addr, align 1 ret void } define void @concat_f16q_256(<8 x half> %data, ptr %addr) #0 { ; CHECK-LABEL: concat_f16q_256: ; CHECK: // %bb.0: ; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: mov z0.q, q0 ; CHECK-NEXT: str z0, [x0] ; CHECK-NEXT: ret %splat = shufflevector <8 x half> %data, <8 x half> poison, <16 x i32> store <16 x half> %splat, ptr %addr, align 1 ret void } define void @concat_bf16q_256(<8 x bfloat> %data, ptr %addr) #0 { ; CHECK-LABEL: concat_bf16q_256: ; CHECK: // %bb.0: ; CHECK-NEXT: stp q0, q0, [x0] ; CHECK-NEXT: ret %splat = shufflevector <8 x bfloat> %data, <8 x bfloat> poison, <16 x i32> store <16 x bfloat> %splat, ptr %addr, align 1 ret void } define void @concat_f32q_256(<4 x float> %data, ptr %addr) #0 { ; CHECK-LABEL: concat_f32q_256: ; CHECK: // %bb.0: ; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: mov z0.q, q0 ; CHECK-NEXT: str z0, [x0] ; CHECK-NEXT: ret %splat = shufflevector <4 x float> %data, <4 x float> poison, <8 x i32> store <8 x float> %splat, ptr %addr, align 1 ret void } define void @concat_f64q_256(<2 x double> %data, ptr %addr) #0 { ; CHECK-LABEL: concat_f64q_256: ; CHECK: // %bb.0: ; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: mov z0.q, q0 ; CHECK-NEXT: str z0, [x0] ; CHECK-NEXT: ret %splat = shufflevector <2 x double> %data, <2 x double> poison, <4 x i32> store <4 x double> %splat, ptr %addr, align 1 ret void } ;; Test a wider vector define void @concat_i32q_512_with_256_vectors(<4 x i32> %data, ptr %addr) #0 { ; CHECK-LABEL: concat_i32q_512_with_256_vectors: ; CHECK: // %bb.0: ; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: mov z0.q, q0 ; CHECK-NEXT: str z0, [x0, #1, mul vl] ; CHECK-NEXT: str z0, [x0] ; CHECK-NEXT: ret %splat = shufflevector <4 x i32> %data, <4 x i32> poison, <16 x i32> store <16 x i32> %splat, ptr %addr, align 1 ret void } define void @concat_i32q_512_with_512_vectors(<4 x i32> %data, ptr %addr) #1 { ; CHECK-LABEL: concat_i32q_512_with_512_vectors: ; CHECK: // %bb.0: ; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: mov z0.q, q0 ; CHECK-NEXT: str z0, [x0] ; CHECK-NEXT: ret %splat = shufflevector <4 x i32> %data, <4 x i32> poison, <16 x i32> store <16 x i32> %splat, ptr %addr, align 1 ret void } attributes #0 = { vscale_range(2,2) "target-features"="+sve,+bf16" } attributes #1 = { vscale_range(4,4) "target-features"="+sve,+bf16" }