diff options
Diffstat (limited to 'llvm/test/CodeGen/NVPTX/f16-ex2.ll')
| -rw-r--r-- | llvm/test/CodeGen/NVPTX/f16-ex2.ll | 40 |
1 files changed, 34 insertions, 6 deletions
diff --git a/llvm/test/CodeGen/NVPTX/f16-ex2.ll b/llvm/test/CodeGen/NVPTX/f16-ex2.ll index ee79f9d..af3fe67 100644 --- a/llvm/test/CodeGen/NVPTX/f16-ex2.ll +++ b/llvm/test/CodeGen/NVPTX/f16-ex2.ll @@ -1,12 +1,13 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 -; RUN: llc < %s -mcpu=sm_75 -mattr=+ptx70 | FileCheck --check-prefixes=CHECK-FP16 %s -; RUN: %if ptxas-sm_75 && ptxas-isa-7.0 %{ llc < %s -mcpu=sm_75 -mattr=+ptx70 | %ptxas-verify -arch=sm_75 %} +; RUN: llc < %s -mcpu=sm_90 -mattr=+ptx78 | FileCheck --check-prefixes=CHECK-FP16 %s +; RUN: %if ptxas-sm_90 && ptxas-isa-7.8 %{ llc < %s -mcpu=sm_90 -mattr=+ptx78 | %ptxas-verify -arch=sm_90 %} target triple = "nvptx64-nvidia-cuda" declare half @llvm.nvvm.ex2.approx.f16(half) -declare <2 x half> @llvm.nvvm.ex2.approx.f16x2(<2 x half>) +declare <2 x half> @llvm.nvvm.ex2.approx.v2f16(<2 x half>) +declare bfloat @llvm.nvvm.ex2.approx.ftz.bf16(bfloat) +declare <2 x bfloat> @llvm.nvvm.ex2.approx.ftz.v2bf16(<2 x bfloat>) -; CHECK-LABEL: ex2_half define half @ex2_half(half %0) { ; CHECK-FP16-LABEL: ex2_half( ; CHECK-FP16: { @@ -21,7 +22,6 @@ define half @ex2_half(half %0) { ret half %res } -; CHECK-LABEL: ex2_2xhalf define <2 x half> @ex2_2xhalf(<2 x half> %0) { ; CHECK-FP16-LABEL: ex2_2xhalf( ; CHECK-FP16: { @@ -32,6 +32,34 @@ define <2 x half> @ex2_2xhalf(<2 x half> %0) { ; CHECK-FP16-NEXT: ex2.approx.f16x2 %r2, %r1; ; CHECK-FP16-NEXT: st.param.b32 [func_retval0], %r2; ; CHECK-FP16-NEXT: ret; - %res = call <2 x half> @llvm.nvvm.ex2.approx.f16x2(<2 x half> %0) + %res = call <2 x half> @llvm.nvvm.ex2.approx.v2f16(<2 x half> %0) ret <2 x half> %res } + +define bfloat @ex2_bfloat(bfloat %0) { +; CHECK-FP16-LABEL: ex2_bfloat( +; CHECK-FP16: { +; CHECK-FP16-NEXT: .reg .b16 %rs<3>; +; CHECK-FP16-EMPTY: +; CHECK-FP16-NEXT: // %bb.0: +; CHECK-FP16-NEXT: ld.param.b16 %rs1, [ex2_bfloat_param_0]; +; CHECK-FP16-NEXT: ex2.approx.ftz.bf16 %rs2, %rs1; +; CHECK-FP16-NEXT: st.param.b16 [func_retval0], %rs2; +; CHECK-FP16-NEXT: ret; + %res = call bfloat @llvm.nvvm.ex2.approx.ftz.bf16(bfloat %0) + ret bfloat %res +} + +define <2 x bfloat> @ex2_2xbfloat(<2 x bfloat> %0) { +; CHECK-FP16-LABEL: ex2_2xbfloat( +; CHECK-FP16: { +; CHECK-FP16-NEXT: .reg .b32 %r<3>; +; CHECK-FP16-EMPTY: +; CHECK-FP16-NEXT: // %bb.0: +; CHECK-FP16-NEXT: ld.param.b32 %r1, [ex2_2xbfloat_param_0]; +; CHECK-FP16-NEXT: ex2.approx.ftz.bf16x2 %r2, %r1; +; CHECK-FP16-NEXT: st.param.b32 [func_retval0], %r2; +; CHECK-FP16-NEXT: ret; + %res = call <2 x bfloat> @llvm.nvvm.ex2.approx.ftz.v2bf16(<2 x bfloat> %0) + ret <2 x bfloat> %res +} |
