diff options
Diffstat (limited to 'llvm/test/CodeGen/AMDGPU/true16-imm-folded-to-0-regression.ll')
| -rw-r--r-- | llvm/test/CodeGen/AMDGPU/true16-imm-folded-to-0-regression.ll | 29 |
1 files changed, 29 insertions, 0 deletions
diff --git a/llvm/test/CodeGen/AMDGPU/true16-imm-folded-to-0-regression.ll b/llvm/test/CodeGen/AMDGPU/true16-imm-folded-to-0-regression.ll new file mode 100644 index 0000000..0bebb58 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/true16-imm-folded-to-0-regression.ll @@ -0,0 +1,29 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc -mtriple=amdgcn--amdpal -mcpu=gfx1100 -mattr=+real-true16 < %s | FileCheck %s + +; Make sure that the 16-bit constant 0x3c00 isn't folded as 0 into +; v_bfi_b32. +define i32 @mov16_bfi_fold_regression(half %arg, i32 %arg1) { +; CHECK-LABEL: bfi_fold_regression: +; CHECK: ; %bb.0: ; %bb +; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; CHECK-NEXT: v_mov_b16_e32 v2.l, 0x3c00 +; CHECK-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v1 +; CHECK-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) +; CHECK-NEXT: v_bfi_b32 v0, 0x7fff, v2, v0 +; CHECK-NEXT: v_cndmask_b16 v0.l, 0x3c00, v0.l, vcc_lo +; CHECK-NEXT: s_delay_alu instid0(VALU_DEP_1) +; CHECK-NEXT: v_pack_b32_f16 v0, v0.l, 0 +; CHECK-NEXT: s_setpc_b64 s[30:31] +bb: + %cmp = icmp eq i32 %arg1, 0 + %call = call half @llvm.copysign.f16(half 0xH3C00, half %arg) + %select = select i1 %cmp, half 0xH3C00, half %call + %insertelement = insertelement <2 x half> zeroinitializer, half %select, i64 0 + %bitcast = bitcast <2 x half> %insertelement to i32 + ret i32 %bitcast +} + +declare half @llvm.copysign.f16(half, half) #0 + +attributes #0 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) } |
