llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cvt.sr.pk.bf16.ll


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66

; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1250 < %s | FileCheck -check-prefix=GCN %s
; xUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx1250 < %s | FileCheck -check-prefix=GCN %s

; FIXME: GlobalISel does not work with bf16

declare <2 x bfloat> @llvm.amdgcn.cvt.sr.pk.bf16.f32(float, float, i32) #0

define amdgpu_ps float @cvt_sr_pk_bf16_f32_vvv(float %src0, float %src1, i32 %src2) #1 {
; GCN-LABEL: cvt_sr_pk_bf16_f32_vvv:
; GCN:       ; %bb.0:
; GCN-NEXT:    v_cvt_sr_pk_bf16_f32 v0, v0, v1, v2
; GCN-NEXT:    ; return to shader part epilog
  %cvt = call <2 x bfloat> @llvm.amdgcn.cvt.sr.pk.bf16.f32(float %src0, float %src1, i32 %src2) #0
  %ret = bitcast <2 x bfloat> %cvt to float
  ret float %ret
}

define amdgpu_ps float @cvt_sr_pk_bf16_f32_sss(float inreg %src0, float inreg %src1, i32 inreg %src2) #1 {
; GCN-LABEL: cvt_sr_pk_bf16_f32_sss:
; GCN:       ; %bb.0:
; GCN-NEXT:    v_mov_b32_e32 v0, s2
; GCN-NEXT:    s_delay_alu instid0(VALU_DEP_1)
; GCN-NEXT:    v_cvt_sr_pk_bf16_f32 v0, s0, s1, v0
; GCN-NEXT:    ; return to shader part epilog
  %cvt = call <2 x bfloat> @llvm.amdgcn.cvt.sr.pk.bf16.f32(float %src0, float %src1, i32 %src2) #0
  %ret = bitcast <2 x bfloat> %cvt to float
  ret float %ret
}

define amdgpu_ps float @cvt_sr_pk_bf16_f32_vvi(float %src0, float %src1) #1 {
; GCN-LABEL: cvt_sr_pk_bf16_f32_vvi:
; GCN:       ; %bb.0:
; GCN-NEXT:    v_cvt_sr_pk_bf16_f32 v0, v0, v1, 0x10002
; GCN-NEXT:    ; return to shader part epilog
  %cvt = call <2 x bfloat> @llvm.amdgcn.cvt.sr.pk.bf16.f32(float %src0, float %src1, i32 65538) #0
  %ret = bitcast <2 x bfloat> %cvt to float
  ret float %ret
}

define amdgpu_ps float @cvt_sr_pk_bf16_f32_vvi_mods(float %src0, float %src1) #1 {
; GCN-LABEL: cvt_sr_pk_bf16_f32_vvi_mods:
; GCN:       ; %bb.0:
; GCN-NEXT:    v_cvt_sr_pk_bf16_f32 v0, -v0, |v1|, 1
; GCN-NEXT:    ; return to shader part epilog
  %s0 = fneg float %src0
  %s1 = call float @llvm.fabs.f32(float %src1) #0
  %cvt = call <2 x bfloat> @llvm.amdgcn.cvt.sr.pk.bf16.f32(float %s0, float %s1, i32 1) #0
  %ret = bitcast <2 x bfloat> %cvt to float
  ret float %ret
}

define amdgpu_ps float @cvt_sr_pk_bf16_f32_ssi(float inreg %src0, float inreg %src1) #1 {
; GCN-LABEL: cvt_sr_pk_bf16_f32_ssi:
; GCN:       ; %bb.0:
; GCN-NEXT:    v_cvt_sr_pk_bf16_f32 v0, s0, s1, 1
; GCN-NEXT:    ; return to shader part epilog
  %cvt = call <2 x bfloat> @llvm.amdgcn.cvt.sr.pk.bf16.f32(float %src0, float %src1, i32 1) #0
  %ret = bitcast <2 x bfloat> %cvt to float
  ret float %ret
}

declare float @llvm.fabs.f32(float) #0

attributes #0 = { nounwind readnone }
attributes #1 = { nounwind }