; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s ; Test that truncated scalars use the correct vector insert instruction. ; On big-endian targets, concat_vectors should not skip truncates when ; creating scalar_to_vector, as the bytes would be in the wrong position. ; This truncated i16 should use vlvgh (insert halfword), not vlvgf (insert fullword). define <16 x i8> @test_concat_trunc_i16(i32 %x) { ; CHECK-LABEL: test_concat_trunc_i16: ; CHECK: # %bb.0: ; CHECK-NEXT: vlvgh %v24, %r2, 0 ; CHECK-NEXT: br %r14 %t = trunc i32 %x to i16 %vec = bitcast i16 %t to <2 x i8> %result = shufflevector <2 x i8> %vec, <2 x i8> poison, <16 x i32> ret <16 x i8> %result } ; Test with a more complex shuffle pattern, reduced from a Rust bug report. define fastcc void @test_shuffle_with_trunc() { ; CHECK-LABEL: test_shuffle_with_trunc: ; CHECK: # %bb.0: ; CHECK-NEXT: lh %r1, 0 ; CHECK-NEXT: l %r0, 0 ; CHECK-NEXT: vlvgh %v1, %r1, 0 ; CHECK-NEXT: larl %r1, .LCPI1_0 ; CHECK-NEXT: vl %v2, 0(%r1), 3 ; CHECK-NEXT: vlvgf %v0, %r0, 0 ; CHECK-NEXT: vperm %v0, %v0, %v1, %v2 ; CHECK-NEXT: vst %v0, 0, 3 ; CHECK-NEXT: br %r14 %1 = load i32, ptr null, align 8 %2 = load i16, ptr null, align 1 br label %3 3: %4 = bitcast i32 %1 to <4 x i8> %5 = shufflevector <4 x i8> %4, <4 x i8> zeroinitializer, <16 x i32> %6 = bitcast i16 %2 to <2 x i8> %7 = shufflevector <2 x i8> %6, <2 x i8> zeroinitializer, <16 x i32> %8 = shufflevector <16 x i8> %5, <16 x i8> %7, <16 x i32> store <16 x i8> %8, ptr null, align 8 ret void }