aboutsummaryrefslogtreecommitdiff
path: root/clang/test/CodeGenHLSL/inline-functions.hlsl
blob: 0c7467e2f972ed6ec40f4b047c5936b68b4baf50 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.3-library %s -emit-llvm -disable-llvm-passes -o - | FileCheck %s --check-prefixes=CHECK,NOINLINE,OPT_ATTR
// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.3-library %s -emit-llvm -O0 -o - | FileCheck %s --check-prefixes=CHECK,INLINE,OPT_ATTR
// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.3-library %s -emit-llvm -O1 -o - | FileCheck %s --check-prefixes=CHECK,INLINE,NOOPT_ATTR
// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.0-compute %s -emit-llvm -disable-llvm-passes -o - | FileCheck %s --check-prefixes=CHECK,NOINLINE
// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.0-compute %s -emit-llvm -O0 -o - | FileCheck %s --check-prefixes=CHECK,INLINE,OPT_ATTR
// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.0-compute %s -emit-llvm -O1 -o - | FileCheck %s --check-prefixes=CHECK,INLINE,NOOPT_ATTR

// Tests that user functions will always be inlined.
// This includes exported functions and mangled entry point implementation functions.
// The unmangled entry functions must not be alwaysinlined.

#define MAX 100

float nums[MAX];

// Verify that all functions have the alwaysinline attribute
// NOINLINE: Function Attrs: alwaysinline
// NOINLINE: define hidden void @_Z4swapA100_jjj(ptr noundef byval([100 x i32]) align 4 %Buf, i32 noundef %ix1, i32 noundef %ix2) [[Attr:\#[0-9]+]]
// NOINLINE: ret void
// Swap the values of Buf at indices ix1 and ix2
void swap(unsigned Buf[MAX], unsigned ix1, unsigned ix2) {
  float tmp = Buf[ix1];
  Buf[ix1] = Buf[ix2];
  Buf[ix2] = tmp;
}

// NOINLINE: Function Attrs: alwaysinline
// NOINLINE: define hidden void @_Z10BubbleSortA100_jj(ptr noundef byval([100 x i32]) align 4 %Buf, i32 noundef %size) [[Attr]]
// NOINLINE: ret void
// Inefficiently sort Buf in place
void BubbleSort(unsigned Buf[MAX], unsigned size) {
  bool swapped = true;
  while (swapped) {
    swapped = false;
    for (unsigned i = 1; i < size; i++) {
      if (Buf[i] < Buf[i-1]) {
	swap(Buf, i, i-1);
	swapped = true;
      }
    }
  }
}

// Note ExtAttr is the inlined export set of attribs
// CHECK: Function Attrs: alwaysinline
// CHECK: define noundef i32 @_Z11RemoveDupesA100_jj(ptr {{[a-z_ ]*}}noundef byval([100 x i32]) align 4 {{.*}}%Buf, i32 noundef %size) {{[a-z_ ]*}}[[Attr:\#[0-9]+]]
// CHECK: ret i32
// Sort Buf and remove any duplicate values
// returns the number of values left
export
unsigned RemoveDupes(unsigned Buf[MAX], unsigned size) {
  BubbleSort(Buf, size);
  unsigned insertPt = 0;
  for (unsigned i = 1; i < size; i++) {
    if (Buf[i] == Buf[i-1])
      insertPt++;
    else
      Buf[insertPt] = Buf[i];
  }
  return insertPt;
}


RWBuffer<unsigned> Indices;

// The mangled version of main only remains without inlining
// because it has internal linkage from the start
// Note main functions get the alwaysinline attrib, which Attr reflects
// NOINLINE: Function Attrs: alwaysinline
// NOINLINE: define internal void @_Z4mainj(i32 noundef %GI) [[Attr]]
// NOINLINE: ret void

// The unmangled version is not inlined, EntryAttr reflects that
// OPT_ATTR: Function Attrs: {{.*}}optnone
// NOOPT_ATTR-NOT: Function Attrs: {{.*}}optnone
// CHECK: define void @main() {{[a-z_ ]*}}[[EntryAttr:\#[0-9]+]]
// Make sure function calls are inlined when AlwaysInline is run
// This only leaves calls to llvm. intrinsics
// INLINE-NOT:   call {{[^@]*}} @{{[^l][^l][^v][^m][^\.]}}
// CHECK: ret void

[numthreads(1,1,1)]
[shader("compute")]
void main(unsigned int GI : SV_GroupIndex) {
  unsigned tmpIndices[MAX];
  if (GI > MAX) return;
  for (unsigned i = 1; i < GI; i++)
    tmpIndices[i] = Indices[i];
  RemoveDupes(tmpIndices, GI);
  for (unsigned i = 1; i < GI; i++)
    tmpIndices[i] = Indices[i];
}

// The mangled version of main only remains without inlining
// because it has internal linkage from the start
// Note main functions get the alwaysinline attrib, which Attr reflects
// NOINLINE: Function Attrs: alwaysinline
// NOINLINE: define internal void @_Z6main10v() [[Attr]]
// NOINLINE: ret void

// The unmangled version is not inlined, EntryAttr reflects that
// OPT_ATTR: Function Attrs: {{.*}}optnone
// NOOPT_ATTR-NOT: Function Attrs: {{.*}}optnone
// CHECK: define void @main10() {{[a-z_ ]*}}[[EntryAttr]]
// Make sure function calls are inlined when AlwaysInline is run
// This only leaves calls to llvm. intrinsics
// INLINE-NOT:   call {{[^@]*}} @{{[^l][^l][^v][^m][^\.]}}
// CHECK: ret void

[numthreads(1,1,1)]
[shader("compute")]
void main10() {
  main(10);
}

// CHECK: attributes [[Attr]] = {{.*}} alwaysinline
// CHECK: attributes [[EntryAttr]] = {{.*}} noinline