aboutsummaryrefslogtreecommitdiff
path: root/llvm/test/CodeGen/NVPTX/f16-ex2.ll
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/test/CodeGen/NVPTX/f16-ex2.ll')
-rw-r--r--llvm/test/CodeGen/NVPTX/f16-ex2.ll40
1 files changed, 34 insertions, 6 deletions
diff --git a/llvm/test/CodeGen/NVPTX/f16-ex2.ll b/llvm/test/CodeGen/NVPTX/f16-ex2.ll
index ee79f9d..af3fe67 100644
--- a/llvm/test/CodeGen/NVPTX/f16-ex2.ll
+++ b/llvm/test/CodeGen/NVPTX/f16-ex2.ll
@@ -1,12 +1,13 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
-; RUN: llc < %s -mcpu=sm_75 -mattr=+ptx70 | FileCheck --check-prefixes=CHECK-FP16 %s
-; RUN: %if ptxas-sm_75 && ptxas-isa-7.0 %{ llc < %s -mcpu=sm_75 -mattr=+ptx70 | %ptxas-verify -arch=sm_75 %}
+; RUN: llc < %s -mcpu=sm_90 -mattr=+ptx78 | FileCheck --check-prefixes=CHECK-FP16 %s
+; RUN: %if ptxas-sm_90 && ptxas-isa-7.8 %{ llc < %s -mcpu=sm_90 -mattr=+ptx78 | %ptxas-verify -arch=sm_90 %}
target triple = "nvptx64-nvidia-cuda"
declare half @llvm.nvvm.ex2.approx.f16(half)
-declare <2 x half> @llvm.nvvm.ex2.approx.f16x2(<2 x half>)
+declare <2 x half> @llvm.nvvm.ex2.approx.v2f16(<2 x half>)
+declare bfloat @llvm.nvvm.ex2.approx.ftz.bf16(bfloat)
+declare <2 x bfloat> @llvm.nvvm.ex2.approx.ftz.v2bf16(<2 x bfloat>)
-; CHECK-LABEL: ex2_half
define half @ex2_half(half %0) {
; CHECK-FP16-LABEL: ex2_half(
; CHECK-FP16: {
@@ -21,7 +22,6 @@ define half @ex2_half(half %0) {
ret half %res
}
-; CHECK-LABEL: ex2_2xhalf
define <2 x half> @ex2_2xhalf(<2 x half> %0) {
; CHECK-FP16-LABEL: ex2_2xhalf(
; CHECK-FP16: {
@@ -32,6 +32,34 @@ define <2 x half> @ex2_2xhalf(<2 x half> %0) {
; CHECK-FP16-NEXT: ex2.approx.f16x2 %r2, %r1;
; CHECK-FP16-NEXT: st.param.b32 [func_retval0], %r2;
; CHECK-FP16-NEXT: ret;
- %res = call <2 x half> @llvm.nvvm.ex2.approx.f16x2(<2 x half> %0)
+ %res = call <2 x half> @llvm.nvvm.ex2.approx.v2f16(<2 x half> %0)
ret <2 x half> %res
}
+
+define bfloat @ex2_bfloat(bfloat %0) {
+; CHECK-FP16-LABEL: ex2_bfloat(
+; CHECK-FP16: {
+; CHECK-FP16-NEXT: .reg .b16 %rs<3>;
+; CHECK-FP16-EMPTY:
+; CHECK-FP16-NEXT: // %bb.0:
+; CHECK-FP16-NEXT: ld.param.b16 %rs1, [ex2_bfloat_param_0];
+; CHECK-FP16-NEXT: ex2.approx.ftz.bf16 %rs2, %rs1;
+; CHECK-FP16-NEXT: st.param.b16 [func_retval0], %rs2;
+; CHECK-FP16-NEXT: ret;
+ %res = call bfloat @llvm.nvvm.ex2.approx.ftz.bf16(bfloat %0)
+ ret bfloat %res
+}
+
+define <2 x bfloat> @ex2_2xbfloat(<2 x bfloat> %0) {
+; CHECK-FP16-LABEL: ex2_2xbfloat(
+; CHECK-FP16: {
+; CHECK-FP16-NEXT: .reg .b32 %r<3>;
+; CHECK-FP16-EMPTY:
+; CHECK-FP16-NEXT: // %bb.0:
+; CHECK-FP16-NEXT: ld.param.b32 %r1, [ex2_2xbfloat_param_0];
+; CHECK-FP16-NEXT: ex2.approx.ftz.bf16x2 %r2, %r1;
+; CHECK-FP16-NEXT: st.param.b32 [func_retval0], %r2;
+; CHECK-FP16-NEXT: ret;
+ %res = call <2 x bfloat> @llvm.nvvm.ex2.approx.ftz.v2bf16(<2 x bfloat> %0)
+ ret <2 x bfloat> %res
+}