; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6 ; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s ; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z16 | FileCheck %s --check-prefix=VECTOR ; ; Test conversions between different-sized float elements. ; Test cases where both elements of a v2f64 are converted to f16s. define void @f1(<2 x double> %val, ptr %ptr) { ; CHECK-LABEL: f1: ; CHECK: # %bb.0: ; CHECK-NEXT: stmg %r13, %r15, 104(%r15) ; CHECK-NEXT: .cfi_offset %r13, -56 ; CHECK-NEXT: .cfi_offset %r14, -48 ; CHECK-NEXT: .cfi_offset %r15, -40 ; CHECK-NEXT: aghi %r15, -176 ; CHECK-NEXT: .cfi_def_cfa_offset 336 ; CHECK-NEXT: std %f8, 168(%r15) # 8-byte Spill ; CHECK-NEXT: std %f9, 160(%r15) # 8-byte Spill ; CHECK-NEXT: .cfi_offset %f8, -168 ; CHECK-NEXT: .cfi_offset %f9, -176 ; CHECK-NEXT: lgr %r13, %r2 ; CHECK-NEXT: ldr %f8, %f2 ; CHECK-NEXT: brasl %r14, __truncdfhf2@PLT ; CHECK-NEXT: ler %f9, %f0 ; CHECK-NEXT: ldr %f0, %f8 ; CHECK-NEXT: brasl %r14, __truncdfhf2@PLT ; CHECK-NEXT: # kill: def $f0h killed $f0h def $f0d ; CHECK-NEXT: lgdr %r0, %f0 ; CHECK-NEXT: srlg %r0, %r0, 48 ; CHECK-NEXT: sth %r0, 2(%r13) ; CHECK-NEXT: lgdr %r0, %f9 ; CHECK-NEXT: srlg %r0, %r0, 48 ; CHECK-NEXT: sth %r0, 0(%r13) ; CHECK-NEXT: ld %f8, 168(%r15) # 8-byte Reload ; CHECK-NEXT: ld %f9, 160(%r15) # 8-byte Reload ; CHECK-NEXT: lmg %r13, %r15, 280(%r15) ; CHECK-NEXT: br %r14 ; ; VECTOR-LABEL: f1: ; VECTOR: # %bb.0: ; VECTOR-NEXT: stmg %r13, %r15, 104(%r15) ; VECTOR-NEXT: .cfi_offset %r13, -56 ; VECTOR-NEXT: .cfi_offset %r14, -48 ; VECTOR-NEXT: .cfi_offset %r15, -40 ; VECTOR-NEXT: aghi %r15, -184 ; VECTOR-NEXT: .cfi_def_cfa_offset 344 ; VECTOR-NEXT: std %f8, 176(%r15) # 8-byte Spill ; VECTOR-NEXT: .cfi_offset %f8, -168 ; VECTOR-NEXT: lgr %r13, %r2 ; VECTOR-NEXT: vst %v24, 160(%r15), 3 # 16-byte Spill ; VECTOR-NEXT: vrepg %v0, %v24, 1 ; VECTOR-NEXT: # kill: def $f0d killed $f0d killed $v0 ; VECTOR-NEXT: brasl %r14, __truncdfhf2@PLT ; VECTOR-NEXT: ldr %f8, %f0 ; VECTOR-NEXT: vl %v0, 160(%r15), 3 # 16-byte Reload ; VECTOR-NEXT: # kill: def $f0d killed $f0d killed $v0 ; VECTOR-NEXT: brasl %r14, __truncdfhf2@PLT ; VECTOR-NEXT: vsteh %v8, 2(%r13), 0 ; VECTOR-NEXT: ld %f8, 176(%r15) # 8-byte Reload ; VECTOR-NEXT: vsteh %v0, 0(%r13), 0 ; VECTOR-NEXT: lmg %r13, %r15, 288(%r15) ; VECTOR-NEXT: br %r14 %res = fptrunc <2 x double> %val to <2 x half> store <2 x half> %res, ptr %ptr ret void } ; Test conversion of an f64 in a vector register to an f16. define half @f2(<2 x double> %vec) { ; CHECK-LABEL: f2: ; CHECK: # %bb.0: ; CHECK-NEXT: stmg %r14, %r15, 112(%r15) ; CHECK-NEXT: .cfi_offset %r14, -48 ; CHECK-NEXT: .cfi_offset %r15, -40 ; CHECK-NEXT: aghi %r15, -160 ; CHECK-NEXT: .cfi_def_cfa_offset 320 ; CHECK-NEXT: brasl %r14, __truncdfhf2@PLT ; CHECK-NEXT: lmg %r14, %r15, 272(%r15) ; CHECK-NEXT: br %r14 ; ; VECTOR-LABEL: f2: ; VECTOR: # %bb.0: ; VECTOR-NEXT: stmg %r14, %r15, 112(%r15) ; VECTOR-NEXT: .cfi_offset %r14, -48 ; VECTOR-NEXT: .cfi_offset %r15, -40 ; VECTOR-NEXT: aghi %r15, -160 ; VECTOR-NEXT: .cfi_def_cfa_offset 320 ; VECTOR-NEXT: vlr %v0, %v24 ; VECTOR-NEXT: # kill: def $f0d killed $f0d killed $v0 ; VECTOR-NEXT: brasl %r14, __truncdfhf2@PLT ; VECTOR-NEXT: lmg %r14, %r15, 272(%r15) ; VECTOR-NEXT: br %r14 %scalar = extractelement <2 x double> %vec, i32 0 %ret = fptrunc double %scalar to half ret half %ret } ; Test cases where even elements of a v4f16 are converted to f64s. define <2 x double> @f3(<4 x half> %vec) { ; CHECK-LABEL: f3: ; CHECK: # %bb.0: ; CHECK-NEXT: stmg %r14, %r15, 112(%r15) ; CHECK-NEXT: .cfi_offset %r14, -48 ; CHECK-NEXT: .cfi_offset %r15, -40 ; CHECK-NEXT: aghi %r15, -176 ; CHECK-NEXT: .cfi_def_cfa_offset 336 ; CHECK-NEXT: std %f8, 168(%r15) # 8-byte Spill ; CHECK-NEXT: std %f9, 160(%r15) # 8-byte Spill ; CHECK-NEXT: .cfi_offset %f8, -168 ; CHECK-NEXT: .cfi_offset %f9, -176 ; CHECK-NEXT: ler %f8, %f4 ; CHECK-NEXT: brasl %r14, __extendhfdf2@PLT ; CHECK-NEXT: ldr %f9, %f0 ; CHECK-NEXT: ler %f0, %f8 ; CHECK-NEXT: brasl %r14, __extendhfdf2@PLT ; CHECK-NEXT: ldr %f2, %f0 ; CHECK-NEXT: ldr %f0, %f9 ; CHECK-NEXT: ld %f8, 168(%r15) # 8-byte Reload ; CHECK-NEXT: ld %f9, 160(%r15) # 8-byte Reload ; CHECK-NEXT: lmg %r14, %r15, 288(%r15) ; CHECK-NEXT: br %r14 ; ; VECTOR-LABEL: f3: ; VECTOR: # %bb.0: ; VECTOR-NEXT: stmg %r14, %r15, 112(%r15) ; VECTOR-NEXT: .cfi_offset %r14, -48 ; VECTOR-NEXT: .cfi_offset %r15, -40 ; VECTOR-NEXT: aghi %r15, -192 ; VECTOR-NEXT: .cfi_def_cfa_offset 352 ; VECTOR-NEXT: vreph %v1, %v24, 2 ; VECTOR-NEXT: vlr %v0, %v24 ; VECTOR-NEXT: vst %v1, 176(%r15), 3 # 16-byte Spill ; VECTOR-NEXT: # kill: def $f0h killed $f0h killed $v0 ; VECTOR-NEXT: brasl %r14, __extendhfdf2@PLT ; VECTOR-NEXT: # kill: def $f0d killed $f0d def $v0 ; VECTOR-NEXT: vst %v0, 160(%r15), 3 # 16-byte Spill ; VECTOR-NEXT: vl %v0, 176(%r15), 3 # 16-byte Reload ; VECTOR-NEXT: # kill: def $f0h killed $f0h killed $v0 ; VECTOR-NEXT: brasl %r14, __extendhfdf2@PLT ; VECTOR-NEXT: vl %v1, 160(%r15), 3 # 16-byte Reload ; VECTOR-NEXT: # kill: def $f0d killed $f0d def $v0 ; VECTOR-NEXT: vmrhg %v24, %v1, %v0 ; VECTOR-NEXT: lmg %r14, %r15, 304(%r15) ; VECTOR-NEXT: br %r14 %shuffle = shufflevector <4 x half> %vec, <4 x half> %vec, <2 x i32> %res = fpext <2 x half> %shuffle to <2 x double> ret <2 x double> %res } ; Test conversion of an f16 in a vector register to an f32, constant element index. define float @f4(<4 x half> %vec) { ; CHECK-LABEL: f4: ; CHECK: # %bb.0: ; CHECK-NEXT: stmg %r14, %r15, 112(%r15) ; CHECK-NEXT: .cfi_offset %r14, -48 ; CHECK-NEXT: .cfi_offset %r15, -40 ; CHECK-NEXT: aghi %r15, -160 ; CHECK-NEXT: .cfi_def_cfa_offset 320 ; CHECK-NEXT: brasl %r14, __extendhfsf2@PLT ; CHECK-NEXT: lmg %r14, %r15, 272(%r15) ; CHECK-NEXT: br %r14 ; ; VECTOR-LABEL: f4: ; VECTOR: # %bb.0: ; VECTOR-NEXT: stmg %r14, %r15, 112(%r15) ; VECTOR-NEXT: .cfi_offset %r14, -48 ; VECTOR-NEXT: .cfi_offset %r15, -40 ; VECTOR-NEXT: aghi %r15, -160 ; VECTOR-NEXT: .cfi_def_cfa_offset 320 ; VECTOR-NEXT: vlr %v0, %v24 ; VECTOR-NEXT: # kill: def $f0h killed $f0h killed $v0 ; VECTOR-NEXT: brasl %r14, __extendhfsf2@PLT ; VECTOR-NEXT: lmg %r14, %r15, 272(%r15) ; VECTOR-NEXT: br %r14 %scalar = extractelement <4 x half> %vec, i32 0 %ret = fpext half %scalar to float ret float %ret }