aboutsummaryrefslogtreecommitdiff
path: root/llvm/test/CodeGen/AMDGPU/inline-attr.ll
blob: 1e56f6f8e2c416ffe7f82e94ff97ef6e1b36de51 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals all --version 5
; RUN: opt -mtriple=amdgcn-amd-amdhsa -S -O3 -enable-unsafe-fp-math %s  | FileCheck --check-prefixes=GCN,UNSAFE %s
; RUN: opt -mtriple=amdgcn-amd-amdhsa -S -O3 -enable-no-nans-fp-math %s | FileCheck --check-prefixes=GCN,NONANS %s
; RUN: opt -mtriple=amdgcn-amd-amdhsa -S -O3 -enable-no-infs-fp-math %s | FileCheck --check-prefixes=GCN,NOINFS %s

declare void @extern() #0

define float @foo(float %x) #0 {
; GCN-LABEL: define float @foo(
; GCN-SAME: float [[X:%.*]]) local_unnamed_addr #[[ATTR1:[0-9]+]] {
; GCN-NEXT:  [[ENTRY:.*:]]
; GCN-NEXT:    tail call void @extern()
; GCN-NEXT:    [[MUL:%.*]] = fmul float [[X]], 1.500000e+01
; GCN-NEXT:    ret float [[MUL]]
;
entry:
  call void @extern()
  %mul = fmul float %x, 1.500000e+01
  ret float %mul
}

define amdgpu_kernel void @caller(ptr addrspace(1) %p) #1 {
; GCN-LABEL: define amdgpu_kernel void @caller(
; GCN-SAME: ptr addrspace(1) captures(none) [[P:%.*]]) local_unnamed_addr #[[ATTR2:[0-9]+]] {
; GCN-NEXT:  [[ENTRY:.*:]]
; GCN-NEXT:    [[LOAD:%.*]] = load float, ptr addrspace(1) [[P]], align 4, !amdgpu.noclobber [[META0:![0-9]+]]
; GCN-NEXT:    tail call void @extern()
; GCN-NEXT:    [[MUL_I:%.*]] = fmul float [[LOAD]], 1.500000e+01
; GCN-NEXT:    store float [[MUL_I]], ptr addrspace(1) [[P]], align 4
; GCN-NEXT:    ret void
;
entry:
  %load = load float, ptr addrspace(1) %p, align 4
  %call = call fast float @foo(float %load)
  store float %call, ptr addrspace(1) %p, align 4
  ret void
}

attributes #0 = { nounwind "uniform-work-group-size"="false" "unsafe-fp-math"="true"}
attributes #1 = { nounwind "less-precise-fpmad"="true" "no-infs-fp-math"="true" "no-nans-fp-math"="true" "unsafe-fp-math"="true" }

;.
; UNSAFE: attributes #[[ATTR0:[0-9]+]] = { nounwind "uniform-work-group-size"="false" "unsafe-fp-math"="true" }
; UNSAFE: attributes #[[ATTR1]] = { nounwind "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="false" "unsafe-fp-math"="true" }
; UNSAFE: attributes #[[ATTR2]] = { nounwind "amdgpu-waves-per-eu"="4,10" "less-precise-fpmad"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "uniform-work-group-size"="false" "unsafe-fp-math"="true" }
;.
; NONANS: attributes #[[ATTR0:[0-9]+]] = { nounwind "no-nans-fp-math"="true" "uniform-work-group-size"="false" "unsafe-fp-math"="true" }
; NONANS: attributes #[[ATTR1]] = { nounwind "amdgpu-waves-per-eu"="4,10" "no-nans-fp-math"="true" "uniform-work-group-size"="false" "unsafe-fp-math"="true" }
; NONANS: attributes #[[ATTR2]] = { nounwind "amdgpu-waves-per-eu"="4,10" "less-precise-fpmad"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="true" "uniform-work-group-size"="false" "unsafe-fp-math"="true" }
;.
; NOINFS: attributes #[[ATTR0:[0-9]+]] = { nounwind "no-infs-fp-math"="true" "uniform-work-group-size"="false" "unsafe-fp-math"="true" }
; NOINFS: attributes #[[ATTR1]] = { nounwind "amdgpu-waves-per-eu"="4,10" "no-infs-fp-math"="true" "uniform-work-group-size"="false" "unsafe-fp-math"="true" }
; NOINFS: attributes #[[ATTR2]] = { nounwind "amdgpu-waves-per-eu"="4,10" "less-precise-fpmad"="false" "no-infs-fp-math"="true" "no-nans-fp-math"="false" "uniform-work-group-size"="false" "unsafe-fp-math"="true" }
;.
; UNSAFE: [[META0]] = !{}
;.
; NONANS: [[META0]] = !{}
;.
; NOINFS: [[META0]] = !{}
;.
;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
; NOINFS: {{.*}}
; NONANS: {{.*}}
; UNSAFE: {{.*}}