aboutsummaryrefslogtreecommitdiff
path: root/llvm/test/CodeGen/DirectX
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/test/CodeGen/DirectX')
-rw-r--r--llvm/test/CodeGen/DirectX/Binding/binding-overlap-7.ll35
-rw-r--r--llvm/test/CodeGen/DirectX/Metadata/srv_metadata.ll47
-rw-r--r--llvm/test/CodeGen/DirectX/Metadata/uav_metadata.ll51
-rw-r--r--llvm/test/CodeGen/DirectX/ShaderFlags/lifetimes-noint64op.ll8
-rw-r--r--llvm/test/CodeGen/DirectX/dot2add.ll4
-rw-r--r--llvm/test/CodeGen/DirectX/dot2add_error.ll15
-rw-r--r--llvm/test/CodeGen/DirectX/finalize_linkage.ll24
-rw-r--r--llvm/test/CodeGen/DirectX/forward_handle_on_alloca.ll (renamed from llvm/test/CodeGen/DirectX/issue-140819_allow_forward_handle_on_alloca.ll)14
-rw-r--r--llvm/test/CodeGen/DirectX/imad.ll102
-rw-r--r--llvm/test/CodeGen/DirectX/legalize-lifetimes-valver-1.5.ll4
-rw-r--r--llvm/test/CodeGen/DirectX/legalize-lifetimes-valver-1.6.ll8
-rw-r--r--llvm/test/CodeGen/DirectX/legalize-memset.ll26
-rw-r--r--llvm/test/CodeGen/DirectX/llc-pipeline.ll12
-rw-r--r--llvm/test/CodeGen/DirectX/rootsignature-validation-fail-cbuffer-range.ll15
-rw-r--r--llvm/test/CodeGen/DirectX/rootsignature-validation-fail-descriptor-table-range.ll16
-rw-r--r--llvm/test/CodeGen/DirectX/rootsignature-validation-fail-root-descriptor-range.ll15
-rw-r--r--llvm/test/CodeGen/DirectX/rootsignature-validation-fail-sampler.ll15
-rw-r--r--llvm/test/CodeGen/DirectX/rootsignature-validation-fail-static-sampler-range.ll14
-rw-r--r--llvm/test/CodeGen/DirectX/rootsignature-validation.ll20
-rw-r--r--llvm/test/CodeGen/DirectX/scalar-data.ll2
-rw-r--r--llvm/test/CodeGen/DirectX/umad.ll102
21 files changed, 454 insertions, 95 deletions
diff --git a/llvm/test/CodeGen/DirectX/Binding/binding-overlap-7.ll b/llvm/test/CodeGen/DirectX/Binding/binding-overlap-7.ll
new file mode 100644
index 0000000..25f81dd
--- /dev/null
+++ b/llvm/test/CodeGen/DirectX/Binding/binding-overlap-7.ll
@@ -0,0 +1,35 @@
+; Use llc for this test so that we don't abort after the first error.
+; RUN: not llc %s -o /dev/null 2>&1 | FileCheck %s
+
+; Check that there is no overlap with unbounded array in different space
+
+ ; Buffer<double> A[2] : register(t2, space4);
+ ; Buffer<double> B : register(t20, space5); // does not overlap
+ ; Buffer<double> C[] : register(t2, space4); // overlaps with A
+
+; CHECK: error: resource A at register 2 overlaps with resource C at register 2 in space 4
+; CHECK-NOT: error: resource C at register 2 overlaps with resource B at register 20 in space 5
+
+target triple = "dxil-pc-shadermodel6.3-library"
+
+@A.str = private unnamed_addr constant [2 x i8] c"A\00", align 1
+@B.str = private unnamed_addr constant [2 x i8] c"B\00", align 1
+@C.str = private unnamed_addr constant [2 x i8] c"C\00", align 1
+
+define void @test_not_overlapping_in_different_spaces() {
+entry:
+
+ ; Buffer<double> A[2] : register(t2, space4);
+ %h0 = call target("dx.TypedBuffer", double, 0, 0, 0)
+ @llvm.dx.resource.handlefrombinding(i32 4, i32 2, i32 2, i32 10, i1 false, ptr @A.str)
+
+ ; Buffer<double> B : register(t20, space5);
+ %h1 = call target("dx.TypedBuffer", i64, 0, 0, 0)
+ @llvm.dx.resource.handlefrombinding(i32 5, i32 20, i32 1, i32 0, i1 false, ptr @B.str)
+
+ ; Buffer<double> C[] : register(t2, space4);
+ %h2 = call target("dx.TypedBuffer", double, 0, 0, 0)
+ @llvm.dx.resource.handlefrombinding(i32 4, i32 2, i32 -1, i32 10, i1 false, ptr @C.str)
+
+ ret void
+}
diff --git a/llvm/test/CodeGen/DirectX/Metadata/srv_metadata.ll b/llvm/test/CodeGen/DirectX/Metadata/srv_metadata.ll
index abab5c9..86d69ab 100644
--- a/llvm/test/CodeGen/DirectX/Metadata/srv_metadata.ll
+++ b/llvm/test/CodeGen/DirectX/Metadata/srv_metadata.ll
@@ -1,6 +1,6 @@
; RUN: opt -S -dxil-translate-metadata < %s | FileCheck %s
; RUN: opt -S --passes="dxil-pretty-printer" < %s 2>&1 | FileCheck %s --check-prefix=PRINT
-; RUN: llc %s --filetype=asm -o - < %s 2>&1 | FileCheck %s --check-prefixes=CHECK,PRINT
+; RUN: llc %s --filetype=asm -o - 2>&1 | FileCheck %s --check-prefixes=CHECK,PRINT
target datalayout = "e-m:e-p:32:32-i1:32-i8:8-i16:16-i32:32-i64:64-f16:16-f32:32-f64:64-n8:16:32:64"
target triple = "dxil-pc-shadermodel6.6-compute"
@@ -14,20 +14,22 @@ target triple = "dxil-pc-shadermodel6.6-compute"
@Six.str = private unnamed_addr constant [4 x i8] c"Six\00", align 1
@Seven.str = private unnamed_addr constant [6 x i8] c"Seven\00", align 1
@Array.str = private unnamed_addr constant [6 x i8] c"Array\00", align 1
+@Array2.str = private unnamed_addr constant [7 x i8] c"Array2\00", align 1
; PRINT:; Resource Bindings:
; PRINT-NEXT:;
-; PRINT-NEXT:; Name Type Format Dim ID HLSL Bind Count
-; PRINT-NEXT:; ------------------------------ ---------- ------- ----------- ------- -------------- ------
-; PRINT-NEXT:; Zero texture f16 buf T0 t0 1
-; PRINT-NEXT:; One texture f32 buf T1 t1 1
-; PRINT-NEXT:; Two texture f64 buf T2 t2 1
-; PRINT-NEXT:; Three texture i32 buf T3 t3 1
-; PRINT-NEXT:; Four texture byte r/o T4 t5 1
-; PRINT-NEXT:; Five texture struct r/o T5 t6 1
-; PRINT-NEXT:; Six texture u64 buf T6 t10,space2 1
-; PRINT-NEXT:; Array texture f32 buf T7 t4,space3 100
-; PRINT-NEXT:; Seven texture u64 buf T8 t20,space5 1
+; PRINT-NEXT:; Name Type Format Dim ID HLSL Bind Count
+; PRINT-NEXT:; ------------------------------ ---------- ------- ----------- ------- -------------- ---------
+; PRINT-NEXT:; Zero texture f16 buf T0 t0 1
+; PRINT-NEXT:; One texture f32 buf T1 t1 1
+; PRINT-NEXT:; Two texture f64 buf T2 t2 1
+; PRINT-NEXT:; Three texture i32 buf T3 t3 1
+; PRINT-NEXT:; Four texture byte r/o T4 t5 1
+; PRINT-NEXT:; Five texture struct r/o T5 t6 1
+; PRINT-NEXT:; Six texture u64 buf T6 t10,space2 1
+; PRINT-NEXT:; Array texture f32 buf T7 t4,space3 100
+; PRINT-NEXT:; Array2 texture f64 buf T8 t2,space4 unbounded
+; PRINT-NEXT:; Seven texture u64 buf T9 t20,space5 1
;
define void @test() #0 {
@@ -60,19 +62,28 @@ define void @test() #0 {
@llvm.dx.resource.handlefrombinding(i32 2, i32 10, i32 1, i32 0, i1 false, ptr @Six.str)
; Same buffer type as Six - should have the same type in metadata
- ; Buffer<double> Seven : register(t10, space2);
+ ; Buffer<double> Seven : register(t20, space5);
%Seven_h = call target("dx.TypedBuffer", i64, 0, 0, 0)
@llvm.dx.resource.handlefrombinding(i32 5, i32 20, i32 1, i32 0, i1 false, ptr @Seven.str)
; Buffer<float4> Array[100] : register(t4, space3);
; Buffer<float4> B1 = Array[30];
- ; Buffer<float4> B1 = Array[42];
+ ; Buffer<float4> B2 = Array[42];
; resource array accesses should produce one metadata entry
%Array_30_h = call target("dx.TypedBuffer", <4 x float>, 0, 0, 0)
@llvm.dx.resource.handlefrombinding(i32 3, i32 4, i32 100, i32 30, i1 false, ptr @Array.str)
%Array_42_h = call target("dx.TypedBuffer", <4 x float>, 0, 0, 0)
@llvm.dx.resource.handlefrombinding(i32 3, i32 4, i32 100, i32 42, i1 false, ptr @Array.str)
+ ; test unbounded resource array
+ ; Buffer<double> Array2[] : register(t2, space4);
+ ; Buffer<double> C1 = Array[10];
+ ; Buffer<double> C2 = Array[20];
+ %Array2_10_h = call target("dx.TypedBuffer", double, 0, 0, 0)
+ @llvm.dx.resource.handlefrombinding(i32 4, i32 2, i32 -1, i32 10, i1 false, ptr @Array2.str)
+ %Array2_20_h = call target("dx.TypedBuffer", double, 0, 0, 0)
+ @llvm.dx.resource.handlefrombinding(i32 4, i32 2, i32 -1, i32 20, i1 false, ptr @Array2.str)
+
ret void
}
@@ -94,7 +105,8 @@ attributes #0 = { noinline nounwind "hlsl.shader"="compute" }
; CHECK: @Four = external constant %ByteAddressBuffer
; CHECK: @Five = external constant %"StructuredBuffer<int16_t>"
; CHECK: @Six = external constant %"Buffer<uint32_t>"
-; CHECK: @Array = external constant %"Buffer<float4>"
+; CHECK: @Array = external constant [100 x %"Buffer<float4>"]
+; CHECK: @Array2 = external constant [0 x %"Buffer<double>"]
; CHECK: @Seven = external constant %"Buffer<uint32_t>"
; CHECK: !dx.resources = !{[[ResList:[!][0-9]+]]}
@@ -102,7 +114,7 @@ attributes #0 = { noinline nounwind "hlsl.shader"="compute" }
; CHECK: [[ResList]] = !{[[SRVList:[!][0-9]+]], null, null, null}
; CHECK: [[SRVList]] = !{![[Zero:[0-9]+]], ![[One:[0-9]+]], ![[Two:[0-9]+]],
; CHECK-SAME: ![[Three:[0-9]+]], ![[Four:[0-9]+]], ![[Five:[0-9]+]],
-; CHECK-SAME: ![[Six:[0-9]+]], ![[Array:[0-9]+]], ![[Seven:[0-9]+]]}
+; CHECK-SAME: ![[Six:[0-9]+]], ![[Array:[0-9]+]], ![[Array2:[0-9]+]], ![[Seven:[0-9]+]]}
; CHECK: ![[Zero]] = !{i32 0, ptr @Zero, !"Zero", i32 0, i32 0, i32 1, i32 10, i32 0, ![[Half:[0-9]+]]}
; CHECK: ![[Half]] = !{i32 0, i32 8}
@@ -118,4 +130,5 @@ attributes #0 = { noinline nounwind "hlsl.shader"="compute" }
; CHECK: ![[Six]] = !{i32 6, ptr @Six, !"Six", i32 2, i32 10, i32 1, i32 10, i32 0, ![[U64:[0-9]+]]}
; CHECK: ![[U64]] = !{i32 0, i32 7}
; CHECK: ![[Array]] = !{i32 7, ptr @Array, !"Array", i32 3, i32 4, i32 100, i32 10, i32 0, ![[Float]]}
-; CHECK: ![[Seven]] = !{i32 8, ptr @Seven, !"Seven", i32 5, i32 20, i32 1, i32 10, i32 0, ![[U64]]}
+; CHECK: ![[Array2]] = !{i32 8, ptr @Array2, !"Array2", i32 4, i32 2, i32 -1, i32 10, i32 0, ![[Double]]}
+; CHECK: ![[Seven]] = !{i32 9, ptr @Seven, !"Seven", i32 5, i32 20, i32 1, i32 10, i32 0, ![[U64]]}
diff --git a/llvm/test/CodeGen/DirectX/Metadata/uav_metadata.ll b/llvm/test/CodeGen/DirectX/Metadata/uav_metadata.ll
index 9893f8b..4928b1d 100644
--- a/llvm/test/CodeGen/DirectX/Metadata/uav_metadata.ll
+++ b/llvm/test/CodeGen/DirectX/Metadata/uav_metadata.ll
@@ -1,6 +1,6 @@
; RUN: opt -S -dxil-translate-metadata < %s | FileCheck %s
; RUN: opt -S --passes="dxil-pretty-printer" < %s 2>&1 | FileCheck %s --check-prefix=PRINT
-; RUN: llc %s --filetype=asm -o - < %s 2>&1 | FileCheck %s --check-prefixes=CHECK,PRINT
+; RUN: llc %s --filetype=asm -o - 2>&1 | FileCheck %s --check-prefixes=CHECK,PRINT
target datalayout = "e-m:e-p:32:32-i1:32-i8:8-i16:16-i32:32-i64:64-f16:16-f32:32-f64:64-n8:16:32:64"
target triple = "dxil-pc-shadermodel6.6-compute"
@@ -17,23 +17,25 @@ target triple = "dxil-pc-shadermodel6.6-compute"
@Nine.str = private unnamed_addr constant [5 x i8] c"Nine\00", align 1
@Ten.str = private unnamed_addr constant [4 x i8] c"Ten\00", align 1
@Array.str = private unnamed_addr constant [6 x i8] c"Array\00", align 1
+@Array2.str = private unnamed_addr constant [7 x i8] c"Array2\00", align 1
; PRINT:; Resource Bindings:
; PRINT-NEXT:;
-; PRINT-NEXT:; Name Type Format Dim ID HLSL Bind Count
-; PRINT-NEXT:; ------------------------------ ---------- ------- ----------- ------- -------------- ------
-; PRINT-NEXT:; Zero UAV f16 buf U0 u0 1
-; PRINT-NEXT:; One UAV f32 buf U1 u1 1
-; PRINT-NEXT:; Two UAV f64 buf U2 u2 1
-; PRINT-NEXT:; Three UAV i32 buf U3 u3 1
-; PRINT-NEXT:; Four UAV byte r/w U4 u5 1
-; PRINT-NEXT:; Five UAV struct r/w U5 u6 1
-; PRINT-NEXT:; Six UAV i32 buf U6 u7 1
-; PRINT-NEXT:; Seven UAV struct r/w U7 u8 1
-; PRINT-NEXT:; Eight UAV byte r/w U8 u9 1
-; PRINT-NEXT:; Nine UAV u64 buf U9 u10,space2 1
-; PRINT-NEXT:; Array UAV f32 buf U10 u4,space3 100
-; PRINT-NEXT:; Ten UAV u64 buf U11 u22,space5 1
+; PRINT-NEXT:; Name Type Format Dim ID HLSL Bind Count
+; PRINT-NEXT:; ------------------------------ ---------- ------- ----------- ------- -------------- ---------
+; PRINT-NEXT:; Zero UAV f16 buf U0 u0 1
+; PRINT-NEXT:; One UAV f32 buf U1 u1 1
+; PRINT-NEXT:; Two UAV f64 buf U2 u2 1
+; PRINT-NEXT:; Three UAV i32 buf U3 u3 1
+; PRINT-NEXT:; Four UAV byte r/w U4 u5 1
+; PRINT-NEXT:; Five UAV struct r/w U5 u6 1
+; PRINT-NEXT:; Six UAV i32 buf U6 u7 1
+; PRINT-NEXT:; Seven UAV struct r/w U7 u8 1
+; PRINT-NEXT:; Eight UAV byte r/w U8 u9 1
+; PRINT-NEXT:; Nine UAV u64 buf U9 u10,space2 1
+; PRINT-NEXT:; Array UAV f32 buf U10 u4,space3 100
+; PRINT-NEXT:; Array2 UAV f64 buf U11 u2,space4 unbounded
+; PRINT-NEXT:; Ten UAV u64 buf U12 u22,space5 1
define void @test() #0 {
; RWBuffer<half4> Zero : register(u0)
@@ -78,13 +80,22 @@ define void @test() #0 {
; RWBuffer<float4> Array[100] : register(u4, space3);
; RWBuffer<float4> B1 = Array[30];
- ; RWBuffer<float4> B1 = Array[42];
+ ; RWBuffer<float4> B2 = Array[42];
; resource array accesses should produce one metadata entry
%Array_30_h = call target("dx.TypedBuffer", <4 x float>, 1, 0, 0)
@llvm.dx.resource.handlefrombinding(i32 3, i32 4, i32 100, i32 30, i1 false, ptr @Array.str)
%Array_42_h = call target("dx.TypedBuffer", <4 x float>, 1, 0, 0)
@llvm.dx.resource.handlefrombinding(i32 3, i32 4, i32 100, i32 42, i1 false, ptr @Array.str)
+ ; test unbounded resource array
+ ; RWBuffer<double> Array2[] : register(u2, space4);
+ ; RWBuffer<double> C1 = Array[10];
+ ; RWBuffer<double> C2 = Array[20];
+ %Array2_10_h = call target("dx.TypedBuffer", double, 1, 0, 0)
+ @llvm.dx.resource.handlefrombinding(i32 4, i32 2, i32 -1, i32 10, i1 false, ptr @Array2.str)
+ %Array2_20_h = call target("dx.TypedBuffer", double, 1, 0, 0)
+ @llvm.dx.resource.handlefrombinding(i32 4, i32 2, i32 -1, i32 20, i1 false, ptr @Array2.str)
+
; Same buffer type as Nine - should have the same type in metadata
; RWBuffer<double> Ten : register(u2);
%Ten_h = call target("dx.TypedBuffer", i64, 1, 0, 0)
@@ -117,7 +128,8 @@ attributes #0 = { noinline nounwind "hlsl.shader"="compute" }
; CHECK: @Seven = external constant %"RasterizerOrderedStructuredBuffer<int32_t4>"
; CHECK: @Eight = external constant %RasterizerOrderedByteAddressBuffer
; CHECK: @Nine = external constant %"RWBuffer<uint32_t>"
-; CHECK: @Array = external constant %"RWBuffer<float4>"
+; CHECK: @Array = external constant [100 x %"RWBuffer<float4>"]
+; CHECK: @Array2 = external constant [0 x %"RWBuffer<double>"]
; CHECK: @Ten = external constant %"RWBuffer<uint32_t>"
; CHECK: !dx.resources = !{[[ResList:[!][0-9]+]]}
@@ -126,7 +138,7 @@ attributes #0 = { noinline nounwind "hlsl.shader"="compute" }
; CHECK: [[UAVList]] = !{![[Zero:[0-9]+]], ![[One:[0-9]+]], ![[Two:[0-9]+]],
; CHECK-SAME: ![[Three:[0-9]+]], ![[Four:[0-9]+]], ![[Five:[0-9]+]],
; CHECK-SAME: ![[Six:[0-9]+]], ![[Seven:[0-9]+]], ![[Eight:[0-9]+]],
-; CHECK-SAME: ![[Nine:[0-9]+]], ![[Array:[0-9]+]], ![[Ten:[0-9]+]]}
+; CHECK-SAME: ![[Nine:[0-9]+]], ![[Array:[0-9]+]], ![[Array2:[0-9]+]], ![[Ten:[0-9]+]]}
; CHECK: ![[Zero]] = !{i32 0, ptr @Zero, !"Zero", i32 0, i32 0, i32 1, i32 10, i1 false, i1 false, i1 false, ![[Half:[0-9]+]]}
; CHECK: ![[Half]] = !{i32 0, i32 8}
@@ -146,4 +158,5 @@ attributes #0 = { noinline nounwind "hlsl.shader"="compute" }
; CHECK: ![[Nine]] = !{i32 9, ptr @Nine, !"Nine", i32 2, i32 10, i32 1, i32 10, i1 false, i1 false, i1 false, ![[U64:[0-9]+]]}
; CHECK: ![[U64]] = !{i32 0, i32 7}
; CHECK: ![[Array]] = !{i32 10, ptr @Array, !"Array", i32 3, i32 4, i32 100, i32 10, i1 false, i1 false, i1 false, ![[Float]]}
-; CHECK: ![[Ten]] = !{i32 11, ptr @Ten, !"Ten", i32 5, i32 22, i32 1, i32 10, i1 false, i1 false, i1 false, ![[U64:[0-9]+]]}
+; CHECK: ![[Array2]] = !{i32 11, ptr @Array2, !"Array2", i32 4, i32 2, i32 -1, i32 10, i1 false, i1 false, i1 false, ![[Double]]}
+; CHECK: ![[Ten]] = !{i32 12, ptr @Ten, !"Ten", i32 5, i32 22, i32 1, i32 10, i1 false, i1 false, i1 false, ![[U64:[0-9]+]]}
diff --git a/llvm/test/CodeGen/DirectX/ShaderFlags/lifetimes-noint64op.ll b/llvm/test/CodeGen/DirectX/ShaderFlags/lifetimes-noint64op.ll
index 736c86e..5cf4fe8 100644
--- a/llvm/test/CodeGen/DirectX/ShaderFlags/lifetimes-noint64op.ll
+++ b/llvm/test/CodeGen/DirectX/ShaderFlags/lifetimes-noint64op.ll
@@ -15,16 +15,16 @@ target triple = "dxil-pc-shadermodel6.7-library"
define void @lifetimes() #0 {
%a = alloca [4 x i32], align 8
- call void @llvm.lifetime.start.p0(i64 16, ptr nonnull %a)
- call void @llvm.lifetime.end.p0(i64 16, ptr nonnull %a)
+ call void @llvm.lifetime.start.p0(ptr nonnull %a)
+ call void @llvm.lifetime.end.p0(ptr nonnull %a)
ret void
}
; Function Attrs: nounwind memory(argmem: readwrite)
-declare void @llvm.lifetime.start.p0(i64, ptr) #1
+declare void @llvm.lifetime.start.p0(ptr) #1
; Function Attrs: nounwind memory(argmem: readwrite)
-declare void @llvm.lifetime.end.p0(i64, ptr) #1
+declare void @llvm.lifetime.end.p0(ptr) #1
attributes #0 = { convergent norecurse nounwind "hlsl.export"}
attributes #1 = { nounwind memory(argmem: readwrite) }
diff --git a/llvm/test/CodeGen/DirectX/dot2add.ll b/llvm/test/CodeGen/DirectX/dot2add.ll
index 3a2bbcc..5e1cf40 100644
--- a/llvm/test/CodeGen/DirectX/dot2add.ll
+++ b/llvm/test/CodeGen/DirectX/dot2add.ll
@@ -1,4 +1,4 @@
-; RUN: opt -S -dxil-op-lower -mtriple=dxil-pc-shadermodel6.3-compute %s | FileCheck %s
+; RUN: opt -S -dxil-op-lower -mtriple=dxil-pc-shadermodel6.4-compute %s | FileCheck %s
define noundef float @dot2add_simple(<2 x half> noundef %a, <2 x half> noundef %b, float %acc) {
entry:
@@ -7,7 +7,7 @@ entry:
%bx = extractelement <2 x half> %b, i32 0
%by = extractelement <2 x half> %b, i32 1
-; CHECK: call float @dx.op.dot2AddHalf(i32 162, float %acc, half %ax, half %ay, half %bx, half %by)
+; CHECK: call float @dx.op.dot2AddHalf.f32(i32 162, float %acc, half %ax, half %ay, half %bx, half %by)
%ret = call float @llvm.dx.dot2add(float %acc, half %ax, half %ay, half %bx, half %by)
ret float %ret
}
diff --git a/llvm/test/CodeGen/DirectX/dot2add_error.ll b/llvm/test/CodeGen/DirectX/dot2add_error.ll
new file mode 100644
index 0000000..c45133c
--- /dev/null
+++ b/llvm/test/CodeGen/DirectX/dot2add_error.ll
@@ -0,0 +1,15 @@
+; RUN: not opt -S -dxil-op-lower -mtriple=dxil-pc-shadermodel6.3-compute %s 2>&1 | FileCheck %s
+
+; CHECK: in function f
+; CHECK-SAME: Cannot create Dot2AddHalf operation: No valid overloads for DXIL version 1.3
+
+define noundef float @f(<2 x half> noundef %a, <2 x half> noundef %b, float %acc) {
+entry:
+ %ax = extractelement <2 x half> %a, i32 0
+ %ay = extractelement <2 x half> %a, i32 1
+ %bx = extractelement <2 x half> %b, i32 0
+ %by = extractelement <2 x half> %b, i32 1
+
+ %ret = call float @llvm.dx.dot2add(float %acc, half %ax, half %ay, half %bx, half %by)
+ ret float %ret
+}
diff --git a/llvm/test/CodeGen/DirectX/finalize_linkage.ll b/llvm/test/CodeGen/DirectX/finalize_linkage.ll
index dc1140f..f6e54cf 100644
--- a/llvm/test/CodeGen/DirectX/finalize_linkage.ll
+++ b/llvm/test/CodeGen/DirectX/finalize_linkage.ll
@@ -4,7 +4,8 @@
target triple = "dxilv1.5-pc-shadermodel6.5-compute"
; DXILFinalizeLinkage changes linkage of all functions that are hidden to
-; internal, and converts private global variables to internal linkage.
+; internal, converts private globals to internal linkage, and converts external globals
+; with no usage to internal linkage.
; CHECK: @switch.table = internal unnamed_addr constant [4 x i32]
@switch.table = private unnamed_addr constant [4 x i32] [i32 1, i32 257, i32 65793, i32 16843009], align 4
@@ -27,6 +28,27 @@ target triple = "dxilv1.5-pc-shadermodel6.5-compute"
; CHECK: @hidden_var = hidden global i32
@hidden_var = hidden global i32 1, align 4
+; Running the whole pipeline should remove unused global variables
+
+; CHECK: @aTile = internal addrspace(3) global
+; CHECK-LLC-NOT: @aTile
+@aTile = hidden addrspace(3) global [4 x [1 x i32]] zeroinitializer, align 4
+
+; CHECK: @bTile = internal addrspace(3) global
+; CHECK-LLC-NOT: @bTile
+@bTile = hidden addrspace(3) global [1 x [1 x i32]] zeroinitializer, align 4
+
+define void @anchor_function() #0 {
+entry:
+ %0 = load i32, ptr @switch.table, align 4
+ %1 = load [3 x float], ptr @private_array, align 4
+ %2 = load i32, ptr @private_var, align 4
+ %3 = load i32, ptr @internal_var, align 4
+ %4 = load i32, ptr @external_var, align 4
+ %5 = load i32, ptr @hidden_var, align 4
+ ret void
+}
+
; CHECK-NOT: define internal void @"?f1@@YAXXZ"()
define void @"?f1@@YAXXZ"() #0 {
entry:
diff --git a/llvm/test/CodeGen/DirectX/issue-140819_allow_forward_handle_on_alloca.ll b/llvm/test/CodeGen/DirectX/forward_handle_on_alloca.ll
index 7c0813b..ce5c2d7 100644
--- a/llvm/test/CodeGen/DirectX/issue-140819_allow_forward_handle_on_alloca.ll
+++ b/llvm/test/CodeGen/DirectX/forward_handle_on_alloca.ll
@@ -1,5 +1,7 @@
-; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
-; RUN: opt -S -dxil-forward-handle-accesses %s | FileCheck %s
+; RUN: opt -S -dxil-forward-handle-accesses %s | FileCheck %s --check-prefixes=CHECK,FHCHECK
+; RUN: opt -S -mtriple=dxil--shadermodel6.3-compute -passes='function(dxil-forward-handle-accesses),dse' %s | FileCheck %s --check-prefix=CHECK
+
+; Note: test to confirm fix for issues: 140819 & 151764
%"class.hlsl::RWStructuredBuffer" = type { target("dx.RawBuffer", i32, 1, 0) }
@global = internal unnamed_addr global %"class.hlsl::RWStructuredBuffer" poison, align 4
@@ -11,11 +13,11 @@
define void @CSMain() local_unnamed_addr {
; CHECK-LABEL: define void @CSMain() local_unnamed_addr {
; CHECK-NEXT: [[ENTRY:.*:]]
-; CHECK-NEXT: [[AGG_TMP_I1_SROA_0:%.*]] = alloca target("dx.RawBuffer", i32, 1, 0), align 8
+; FHCHECK-NEXT: [[AGG_TMP_I1_SROA_0:%.*]] = alloca target("dx.RawBuffer", i32, 1, 0), align 8
; CHECK-NEXT: [[TMP0:%.*]] = tail call target("dx.RawBuffer", i32, 1, 0) @llvm.dx.resource.handlefrombinding.tdx.RawBuffer_i32_1_0t(i32 0, i32 3, i32 1, i32 0, i1 false, ptr nonnull @name)
; CHECK-NEXT: store target("dx.RawBuffer", i32, 1, 0) [[TMP0]], ptr @global, align 4
-; CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr @global, align 4
-; CHECK-NEXT: store i32 [[TMP2]], ptr [[AGG_TMP_I1_SROA_0]], align 8
+; FHCHECK-NEXT: [[TMP2:%.*]] = load i32, ptr @global, align 4
+; FHCHECK-NEXT: store i32 [[TMP2]], ptr [[AGG_TMP_I1_SROA_0]], align 8
; CHECK-NEXT: [[TMP3:%.*]] = tail call noundef nonnull align 4 dereferenceable(4) ptr @llvm.dx.resource.getpointer.p0.tdx.RawBuffer_i32_1_0t(target("dx.RawBuffer", i32, 1, 0) [[TMP0]], i32 0)
; CHECK-NEXT: store i32 0, ptr [[TMP3]], align 4
; CHECK-NEXT: ret void
@@ -25,9 +27,11 @@ entry:
%handle = tail call target("dx.RawBuffer", i32, 1, 0) @llvm.dx.resource.handlefrombinding.tdx.RawBuffer_i32_1_0t(i32 0, i32 3, i32 1, i32 0, i1 false, ptr nonnull @name)
store target("dx.RawBuffer", i32, 1, 0) %handle , ptr @global, align 4
%val = load i32, ptr @global, align 4
+ call void @llvm.lifetime.start.p0(ptr nonnull %alloca)
store i32 %val , ptr %alloca, align 8
%indirect = load target("dx.RawBuffer", i32, 1, 0), ptr %alloca, align 8
%buff = tail call noundef nonnull align 4 dereferenceable(4) ptr @llvm.dx.resource.getpointer.p0.tdx.RawBuffer_i32_1_0t(target("dx.RawBuffer", i32, 1, 0) %indirect, i32 0)
store i32 0, ptr %buff, align 4
+ call void @llvm.lifetime.end.p0(ptr nonnull %alloca)
ret void
}
diff --git a/llvm/test/CodeGen/DirectX/imad.ll b/llvm/test/CodeGen/DirectX/imad.ll
index 5d9463d..2e612f0 100644
--- a/llvm/test/CodeGen/DirectX/imad.ll
+++ b/llvm/test/CodeGen/DirectX/imad.ll
@@ -1,17 +1,13 @@
-; RUN: opt -S -dxil-op-lower < %s | FileCheck %s
+; RUN: opt -S -scalarizer -dxil-op-lower < %s | FileCheck %s
; Make sure dxil operation function calls for round are generated for float and half.
-; CHECK:call i16 @dx.op.tertiary.i16(i32 48, i16 %{{.*}}, i16 %{{.*}}, i16 %{{.*}}) #[[#ATTR:]]
-; CHECK:call i32 @dx.op.tertiary.i32(i32 48, i32 %{{.*}}, i32 %{{.*}}, i32 %{{.*}}) #[[#ATTR]]
-; CHECK:call i64 @dx.op.tertiary.i64(i32 48, i64 %{{.*}}, i64 %{{.*}}, i64 %{{.*}}) #[[#ATTR]]
-
-; CHECK: attributes #[[#ATTR]] = {{{.*}} memory(none) {{.*}}}
target datalayout = "e-m:e-p:32:32-i1:32-i8:8-i16:16-i32:32-i64:64-f16:16-f32:32-f64:64-n8:16:32:64"
target triple = "dxil-pc-shadermodel6.7-library"
; Function Attrs: noinline nounwind optnone
define noundef i16 @imad_short(i16 noundef %p0, i16 noundef %p1, i16 noundef %p2) #0 {
entry:
+ ; CHECK: call i16 @dx.op.tertiary.i16(i32 48, i16 %{{.*}}, i16 %{{.*}}, i16 %{{.*}}) #[[#ATTR:]]
%p2.addr = alloca i16, align 2
%p1.addr = alloca i16, align 2
%p0.addr = alloca i16, align 2
@@ -31,6 +27,7 @@ declare i16 @llvm.dx.imad.i16(i16, i16, i16) #1
; Function Attrs: noinline nounwind optnone
define noundef i32 @imad_int(i32 noundef %p0, i32 noundef %p1, i32 noundef %p2) #0 {
entry:
+ ; CHECK: call i32 @dx.op.tertiary.i32(i32 48, i32 %{{.*}}, i32 %{{.*}}, i32 %{{.*}}) #[[#ATTR]]
%p2.addr = alloca i32, align 4
%p1.addr = alloca i32, align 4
%p0.addr = alloca i32, align 4
@@ -50,6 +47,7 @@ declare i32 @llvm.dx.imad.i32(i32, i32, i32) #1
; Function Attrs: noinline nounwind optnone
define noundef i64 @imad_int64(i64 noundef %p0, i64 noundef %p1, i64 noundef %p2) #0 {
entry:
+ ; CHECK: call i64 @dx.op.tertiary.i64(i32 48, i64 %{{.*}}, i64 %{{.*}}, i64 %{{.*}}) #[[#ATTR]]
%p2.addr = alloca i64, align 8
%p1.addr = alloca i64, align 8
%p0.addr = alloca i64, align 8
@@ -65,3 +63,95 @@ entry:
; Function Attrs: nocallback nofree nosync nounwind willreturn
declare i64 @llvm.dx.imad.i64(i64, i64, i64) #1
+
+; Function Attrs: noinline nounwind optnone
+define noundef <4 x i16> @imad_int16_t4(<4 x i16> noundef %p0, <4 x i16> noundef %p1, <4 x i16> noundef %p2) #0 {
+entry:
+ ; CHECK: extractelement <4 x i16> %p0, i64 0
+ ; CHECK: extractelement <4 x i16> %p1, i64 0
+ ; CHECK: extractelement <4 x i16> %p2, i64 0
+ ; CHECK: call i16 @dx.op.tertiary.i16(i32 48, i16 %{{.*}}, i16 %{{.*}}, i16 %{{.*}}) #[[#ATTR]]
+ ; CHECK: extractelement <4 x i16> %p0, i64 1
+ ; CHECK: extractelement <4 x i16> %p1, i64 1
+ ; CHECK: extractelement <4 x i16> %p2, i64 1
+ ; CHECK: call i16 @dx.op.tertiary.i16(i32 48, i16 %{{.*}}, i16 %{{.*}}, i16 %{{.*}}) #[[#ATTR]]
+ ; CHECK: extractelement <4 x i16> %p0, i64 2
+ ; CHECK: extractelement <4 x i16> %p1, i64 2
+ ; CHECK: extractelement <4 x i16> %p2, i64 2
+ ; CHECK: call i16 @dx.op.tertiary.i16(i32 48, i16 %{{.*}}, i16 %{{.*}}, i16 %{{.*}}) #[[#ATTR]]
+ ; CHECK: extractelement <4 x i16> %p0, i64 3
+ ; CHECK: extractelement <4 x i16> %p1, i64 3
+ ; CHECK: extractelement <4 x i16> %p2, i64 3
+ ; CHECK: call i16 @dx.op.tertiary.i16(i32 48, i16 %{{.*}}, i16 %{{.*}}, i16 %{{.*}}) #[[#ATTR]]
+ ; CHECK: insertelement <4 x i16> poison, i16 %{{.*}}, i64 0
+ ; CHECK: insertelement <4 x i16> %{{.*}}, i16 %{{.*}}, i64 1
+ ; CHECK: insertelement <4 x i16> %{{.*}}, i16 %{{.*}}, i64 2
+ ; CHECK: insertelement <4 x i16> %{{.*}}, i16 %{{.*}}, i64 3
+ %dx.imad = call <4 x i16> @llvm.dx.imad.v4i16(<4 x i16> %p0, <4 x i16> %p1, <4 x i16> %p2)
+ ret <4 x i16> %dx.imad
+}
+
+; Function Attrs: nocallback nofree nosync nounwind willreturn
+declare <4 x i16> @llvm.dx.imad.v4i16(<4 x i16>, <4 x i16>, <4 x i16>) #1
+
+; Function Attrs: noinline nounwind optnone
+define noundef <4 x i32> @imad_int4(<4 x i32> noundef %p0, <4 x i32> noundef %p1, <4 x i32> noundef %p2) #0 {
+entry:
+ ; CHECK: extractelement <4 x i32> %p0, i64 0
+ ; CHECK: extractelement <4 x i32> %p1, i64 0
+ ; CHECK: extractelement <4 x i32> %p2, i64 0
+ ; CHECK: call i32 @dx.op.tertiary.i32(i32 48, i32 %{{.*}}, i32 %{{.*}}, i32 %{{.*}}) #[[#ATTR]]
+ ; CHECK: extractelement <4 x i32> %p0, i64 1
+ ; CHECK: extractelement <4 x i32> %p1, i64 1
+ ; CHECK: extractelement <4 x i32> %p2, i64 1
+ ; CHECK: call i32 @dx.op.tertiary.i32(i32 48, i32 %{{.*}}, i32 %{{.*}}, i32 %{{.*}}) #[[#ATTR]]
+ ; CHECK: extractelement <4 x i32> %p0, i64 2
+ ; CHECK: extractelement <4 x i32> %p1, i64 2
+ ; CHECK: extractelement <4 x i32> %p2, i64 2
+ ; CHECK: call i32 @dx.op.tertiary.i32(i32 48, i32 %{{.*}}, i32 %{{.*}}, i32 %{{.*}}) #[[#ATTR]]
+ ; CHECK: extractelement <4 x i32> %p0, i64 3
+ ; CHECK: extractelement <4 x i32> %p1, i64 3
+ ; CHECK: extractelement <4 x i32> %p2, i64 3
+ ; CHECK: call i32 @dx.op.tertiary.i32(i32 48, i32 %{{.*}}, i32 %{{.*}}, i32 %{{.*}}) #[[#ATTR]]
+ ; CHECK: insertelement <4 x i32> poison, i32 %{{.*}}, i64 0
+ ; CHECK: insertelement <4 x i32> %{{.*}}, i32 %{{.*}}, i64 1
+ ; CHECK: insertelement <4 x i32> %{{.*}}, i32 %{{.*}}, i64 2
+ ; CHECK: insertelement <4 x i32> %{{.*}}, i32 %{{.*}}, i64 3
+ %dx.imad = call <4 x i32> @llvm.dx.imad.v4i32(<4 x i32> %p0, <4 x i32> %p1, <4 x i32> %p2)
+ ret <4 x i32> %dx.imad
+}
+
+; Function Attrs: nocallback nofree nosync nounwind willreturn
+declare <4 x i32> @llvm.dx.imad.v4i32(<4 x i32>, <4 x i32>, <4 x i32>) #1
+
+; Function Attrs: noinline nounwind optnone
+define noundef <4 x i64> @imad_int64_t4(<4 x i64> noundef %p0, <4 x i64> noundef %p1, <4 x i64> noundef %p2) #0 {
+entry:
+ ; CHECK: extractelement <4 x i64> %p0, i64 0
+ ; CHECK: extractelement <4 x i64> %p1, i64 0
+ ; CHECK: extractelement <4 x i64> %p2, i64 0
+ ; CHECK: call i64 @dx.op.tertiary.i64(i32 48, i64 %{{.*}}, i64 %{{.*}}, i64 %{{.*}}) #[[#ATTR]]
+ ; CHECK: extractelement <4 x i64> %p0, i64 1
+ ; CHECK: extractelement <4 x i64> %p1, i64 1
+ ; CHECK: extractelement <4 x i64> %p2, i64 1
+ ; CHECK: call i64 @dx.op.tertiary.i64(i32 48, i64 %{{.*}}, i64 %{{.*}}, i64 %{{.*}}) #[[#ATTR]]
+ ; CHECK: extractelement <4 x i64> %p0, i64 2
+ ; CHECK: extractelement <4 x i64> %p1, i64 2
+ ; CHECK: extractelement <4 x i64> %p2, i64 2
+ ; CHECK: call i64 @dx.op.tertiary.i64(i32 48, i64 %{{.*}}, i64 %{{.*}}, i64 %{{.*}}) #[[#ATTR]]
+ ; CHECK: extractelement <4 x i64> %p0, i64 3
+ ; CHECK: extractelement <4 x i64> %p1, i64 3
+ ; CHECK: extractelement <4 x i64> %p2, i64 3
+ ; CHECK: call i64 @dx.op.tertiary.i64(i32 48, i64 %{{.*}}, i64 %{{.*}}, i64 %{{.*}}) #[[#ATTR]]
+ ; CHECK: insertelement <4 x i64> poison, i64 %{{.*}}, i64 0
+ ; CHECK: insertelement <4 x i64> %{{.*}}, i64 %{{.*}}, i64 1
+ ; CHECK: insertelement <4 x i64> %{{.*}}, i64 %{{.*}}, i64 2
+ ; CHECK: insertelement <4 x i64> %{{.*}}, i64 %{{.*}}, i64 3
+ %dx.imad = call <4 x i64> @llvm.dx.imad.v4i64(<4 x i64> %p0, <4 x i64> %p1, <4 x i64> %p2)
+ ret <4 x i64> %dx.imad
+}
+
+; Function Attrs: nocallback nofree nosync nounwind willreturn
+declare <4 x i64> @llvm.dx.imad.v4i64(<4 x i64>, <4 x i64>, <4 x i64>) #1
+
+; CHECK: attributes #[[#ATTR]] = {{{.*}} memory(none) {{.*}}}
diff --git a/llvm/test/CodeGen/DirectX/legalize-lifetimes-valver-1.5.ll b/llvm/test/CodeGen/DirectX/legalize-lifetimes-valver-1.5.ll
index e485fa2..b1eea30 100644
--- a/llvm/test/CodeGen/DirectX/legalize-lifetimes-valver-1.5.ll
+++ b/llvm/test/CodeGen/DirectX/legalize-lifetimes-valver-1.5.ll
@@ -11,9 +11,9 @@
define void @test_legal_lifetime() {
%accum.i.flat = alloca [1 x i32], align 4
%gep = getelementptr i32, ptr %accum.i.flat, i32 0
- call void @llvm.lifetime.start.p0(i64 4, ptr nonnull %accum.i.flat)
+ call void @llvm.lifetime.start.p0(ptr nonnull %accum.i.flat)
store i32 0, ptr %gep, align 4
- call void @llvm.lifetime.end.p0(i64 4, ptr nonnull %accum.i.flat)
+ call void @llvm.lifetime.end.p0(ptr nonnull %accum.i.flat)
ret void
}
diff --git a/llvm/test/CodeGen/DirectX/legalize-lifetimes-valver-1.6.ll b/llvm/test/CodeGen/DirectX/legalize-lifetimes-valver-1.6.ll
index 77133eb..256fcc0 100644
--- a/llvm/test/CodeGen/DirectX/legalize-lifetimes-valver-1.6.ll
+++ b/llvm/test/CodeGen/DirectX/legalize-lifetimes-valver-1.6.ll
@@ -13,12 +13,12 @@
; CHECK-NEXT: [[ACCUM_I_FLAT:%.*]] = alloca [1 x i32], align 4
; CHECK-NEXT: [[GEP:%.*]] = getelementptr i32, ptr [[ACCUM_I_FLAT]], i32 0
; CHECK-SM63-NEXT: store [1 x i32] undef, ptr [[ACCUM_I_FLAT]], align 4
-; CHECK-SM66-NEXT: call void @llvm.lifetime.start.p0(i64 4, ptr nonnull [[ACCUM_I_FLAT]])
+; CHECK-SM66-NEXT: call void @llvm.lifetime.start.p0(ptr nonnull [[ACCUM_I_FLAT]])
; CHECK-EMBED-NOT: bitcast
; CHECK-EMBED-NOT: lifetime
; CHECK-NEXT: store i32 0, ptr [[GEP]], align 4
; CHECK-SM63-NEXT: store [1 x i32] undef, ptr [[ACCUM_I_FLAT]], align 4
-; CHECK-SM66-NEXT: call void @llvm.lifetime.end.p0(i64 4, ptr nonnull [[ACCUM_I_FLAT]])
+; CHECK-SM66-NEXT: call void @llvm.lifetime.end.p0(ptr nonnull [[ACCUM_I_FLAT]])
; CHECK-EMBED-NOT: bitcast
; CHECK-EMBED-NOT: lifetime
; CHECK-NEXT: ret void
@@ -26,9 +26,9 @@
define void @test_legal_lifetime() {
%accum.i.flat = alloca [1 x i32], align 4
%gep = getelementptr i32, ptr %accum.i.flat, i32 0
- call void @llvm.lifetime.start.p0(i64 4, ptr nonnull %accum.i.flat)
+ call void @llvm.lifetime.start.p0(ptr nonnull %accum.i.flat)
store i32 0, ptr %gep, align 4
- call void @llvm.lifetime.end.p0(i64 4, ptr nonnull %accum.i.flat)
+ call void @llvm.lifetime.end.p0(ptr nonnull %accum.i.flat)
ret void
}
diff --git a/llvm/test/CodeGen/DirectX/legalize-memset.ll b/llvm/test/CodeGen/DirectX/legalize-memset.ll
index a73e737..ad45ac6 100644
--- a/llvm/test/CodeGen/DirectX/legalize-memset.ll
+++ b/llvm/test/CodeGen/DirectX/legalize-memset.ll
@@ -5,18 +5,14 @@ define void @replace_float_memset_test() #0 {
; CHECK-LABEL: define void @replace_float_memset_test(
; CHECK-SAME: ) #[[ATTR0:[0-9]+]] {
; CHECK-NEXT: [[ACCUM_I_FLAT:%.*]] = alloca [2 x float], align 4
-; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 8, ptr nonnull [[ACCUM_I_FLAT]])
; CHECK-NEXT: [[GEP:%.*]] = getelementptr [2 x float], ptr [[ACCUM_I_FLAT]], i32 0, i32 0
; CHECK-NEXT: store float 0.000000e+00, ptr [[GEP]], align 4
; CHECK-NEXT: [[GEP1:%.*]] = getelementptr [2 x float], ptr [[ACCUM_I_FLAT]], i32 0, i32 1
; CHECK-NEXT: store float 0.000000e+00, ptr [[GEP1]], align 4
-; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 8, ptr nonnull [[ACCUM_I_FLAT]])
; CHECK-NEXT: ret void
;
%accum.i.flat = alloca [2 x float], align 4
- call void @llvm.lifetime.start.p0(i64 8, ptr nonnull %accum.i.flat)
call void @llvm.memset.p0.i32(ptr nonnull align 4 dereferenceable(8) %accum.i.flat, i8 0, i32 8, i1 false)
- call void @llvm.lifetime.end.p0(i64 8, ptr nonnull %accum.i.flat)
ret void
}
@@ -24,18 +20,14 @@ define void @replace_half_memset_test() #0 {
; CHECK-LABEL: define void @replace_half_memset_test(
; CHECK-SAME: ) #[[ATTR0]] {
; CHECK-NEXT: [[ACCUM_I_FLAT:%.*]] = alloca [2 x half], align 4
-; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 4, ptr nonnull [[ACCUM_I_FLAT]])
; CHECK-NEXT: [[GEP:%.*]] = getelementptr [2 x half], ptr [[ACCUM_I_FLAT]], i32 0, i32 0
; CHECK-NEXT: store half 0xH0000, ptr [[GEP]], align 2
; CHECK-NEXT: [[GEP1:%.*]] = getelementptr [2 x half], ptr [[ACCUM_I_FLAT]], i32 0, i32 1
; CHECK-NEXT: store half 0xH0000, ptr [[GEP1]], align 2
-; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 4, ptr nonnull [[ACCUM_I_FLAT]])
; CHECK-NEXT: ret void
;
%accum.i.flat = alloca [2 x half], align 4
- call void @llvm.lifetime.start.p0(i64 4, ptr nonnull %accum.i.flat)
call void @llvm.memset.p0.i32(ptr nonnull align 4 dereferenceable(8) %accum.i.flat, i8 0, i32 4, i1 false)
- call void @llvm.lifetime.end.p0(i64 4, ptr nonnull %accum.i.flat)
ret void
}
@@ -43,18 +35,14 @@ define void @replace_double_memset_test() #0 {
; CHECK-LABEL: define void @replace_double_memset_test(
; CHECK-SAME: ) #[[ATTR0]] {
; CHECK-NEXT: [[ACCUM_I_FLAT:%.*]] = alloca [2 x double], align 4
-; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 16, ptr nonnull [[ACCUM_I_FLAT]])
; CHECK-NEXT: [[GEP:%.*]] = getelementptr [2 x double], ptr [[ACCUM_I_FLAT]], i32 0, i32 0
; CHECK-NEXT: store double 0.000000e+00, ptr [[GEP]], align 8
; CHECK-NEXT: [[GEP1:%.*]] = getelementptr [2 x double], ptr [[ACCUM_I_FLAT]], i32 0, i32 1
; CHECK-NEXT: store double 0.000000e+00, ptr [[GEP1]], align 8
-; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 16, ptr nonnull [[ACCUM_I_FLAT]])
; CHECK-NEXT: ret void
;
%accum.i.flat = alloca [2 x double], align 4
- call void @llvm.lifetime.start.p0(i64 16, ptr nonnull %accum.i.flat)
call void @llvm.memset.p0.i32(ptr nonnull align 4 dereferenceable(8) %accum.i.flat, i8 0, i32 16, i1 false)
- call void @llvm.lifetime.end.p0(i64 16, ptr nonnull %accum.i.flat)
ret void
}
@@ -62,18 +50,14 @@ define void @replace_int16_memset_test() #0 {
; CHECK-LABEL: define void @replace_int16_memset_test(
; CHECK-SAME: ) #[[ATTR0]] {
; CHECK-NEXT: [[CACHE_I:%.*]] = alloca [2 x i16], align 2
-; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 4, ptr nonnull [[CACHE_I]])
; CHECK-NEXT: [[GEP:%.*]] = getelementptr [2 x i16], ptr [[CACHE_I]], i32 0, i32 0
; CHECK-NEXT: store i16 0, ptr [[GEP]], align 2
; CHECK-NEXT: [[GEP1:%.*]] = getelementptr [2 x i16], ptr [[CACHE_I]], i32 0, i32 1
; CHECK-NEXT: store i16 0, ptr [[GEP1]], align 2
-; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 4, ptr nonnull [[CACHE_I]])
; CHECK-NEXT: ret void
;
%cache.i = alloca [2 x i16], align 2
- call void @llvm.lifetime.start.p0(i64 4, ptr nonnull %cache.i)
call void @llvm.memset.p0.i32(ptr nonnull align 2 dereferenceable(4) %cache.i, i8 0, i32 4, i1 false)
- call void @llvm.lifetime.end.p0(i64 4, ptr nonnull %cache.i)
ret void
}
@@ -81,16 +65,12 @@ define void @replace_int_memset_test() #0 {
; CHECK-LABEL: define void @replace_int_memset_test(
; CHECK-SAME: ) #[[ATTR0]] {
; CHECK-NEXT: [[ACCUM_I_FLAT:%.*]] = alloca [1 x i32], align 4
-; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 4, ptr nonnull [[ACCUM_I_FLAT]])
; CHECK-NEXT: [[GEP:%.*]] = getelementptr [1 x i32], ptr [[ACCUM_I_FLAT]], i32 0, i32 0
; CHECK-NEXT: store i32 0, ptr [[GEP]], align 4
-; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 4, ptr nonnull [[ACCUM_I_FLAT]])
; CHECK-NEXT: ret void
;
%accum.i.flat = alloca [1 x i32], align 4
- call void @llvm.lifetime.start.p0(i64 4, ptr nonnull %accum.i.flat)
call void @llvm.memset.p0.i32(ptr nonnull align 4 dereferenceable(8) %accum.i.flat, i8 0, i32 4, i1 false)
- call void @llvm.lifetime.end.p0(i64 4, ptr nonnull %accum.i.flat)
ret void
}
@@ -101,25 +81,19 @@ define void @replace_int_memset_to_var_test() #0 {
; CHECK-NEXT: [[I:%.*]] = alloca i32, align 4
; CHECK-NEXT: store i32 1, ptr [[I]], align 4
; CHECK-NEXT: [[I8_LOAD:%.*]] = load i32, ptr [[I]], align 4
-; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 4, ptr nonnull [[ACCUM_I_FLAT]])
; CHECK-NEXT: [[GEP:%.*]] = getelementptr [1 x i32], ptr [[ACCUM_I_FLAT]], i32 0, i32 0
; CHECK-NEXT: store i32 [[I8_LOAD]], ptr [[GEP]], align 4
-; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 4, ptr nonnull [[ACCUM_I_FLAT]])
; CHECK-NEXT: ret void
;
%accum.i.flat = alloca [1 x i32], align 4
%i = alloca i8, align 4
store i8 1, ptr %i
%i8.load = load i8, ptr %i
- call void @llvm.lifetime.start.p0(i64 4, ptr nonnull %accum.i.flat)
call void @llvm.memset.p0.i32(ptr nonnull align 4 dereferenceable(8) %accum.i.flat, i8 %i8.load, i32 4, i1 false)
- call void @llvm.lifetime.end.p0(i64 4, ptr nonnull %accum.i.flat)
ret void
}
attributes #0 = {"hlsl.export"}
-declare void @llvm.lifetime.end.p0(i64 immarg, ptr captures(none))
-declare void @llvm.lifetime.start.p0(i64 immarg, ptr captures(none))
declare void @llvm.memset.p0.i32(ptr writeonly captures(none), i8, i32, i1 immarg)
diff --git a/llvm/test/CodeGen/DirectX/llc-pipeline.ll b/llvm/test/CodeGen/DirectX/llc-pipeline.ll
index 151603a..13c2539 100644
--- a/llvm/test/CodeGen/DirectX/llc-pipeline.ll
+++ b/llvm/test/CodeGen/DirectX/llc-pipeline.ll
@@ -7,13 +7,14 @@
; CHECK-NEXT: Target Library Information
; CHECK-NEXT: DXIL Resource Type Analysis
; CHECK-NEXT: Target Transform Information
-
+; CHECK-NEXT: Assumption Cache Tracker
; CHECK-OBJ-NEXT: Machine Module Information
; CHECK-OBJ-NEXT: Machine Branch Probability Analysis
; CHECK-OBJ-NEXT: Create Garbage Collector Module Metadata
; CHECK-NEXT: ModulePass Manager
; CHECK-NEXT: DXIL Finalize Linkage
+; CHECK-NEXT: Dead Global Elimination
; CHECK-NEXT: FunctionPass Manager
; CHECK-NEXT: DXIL Resource Access
; CHECK-NEXT: DXIL Intrinsic Expansion
@@ -26,6 +27,13 @@
; CHECK-NEXT: FunctionPass Manager
; CHECK-NEXT: Dominator Tree Construction
; CHECK-NEXT: DXIL Forward Handle Accesses
+; CHECK-NEXT: Dominator Tree Construction
+; CHECK-NEXT: Basic Alias Analysis (stateless AA impl)
+; CHECK-NEXT: Function Alias Analysis Results
+; CHECK-NEXT: Post-Dominator Tree Construction
+; CHECK-NEXT: Memory SSA
+; CHECK-NEXT: Natural Loop Information
+; CHECK-NEXT: Dead Store Elimination
; CHECK-NEXT: DXIL Legalizer
; CHECK-NEXT: DXIL Resource Binding Analysis
; CHECK-NEXT: DXIL Resource Implicit Binding
@@ -33,9 +41,9 @@
; CHECK-NEXT: DXIL Module Metadata analysis
; CHECK-NEXT: DXIL Shader Flag Analysis
; CHECK-NEXT: DXIL Translate Metadata
+; CHECK-NEXT: DXIL Root Signature Analysis
; CHECK-NEXT: DXIL Post Optimization Validation
; CHECK-NEXT: DXIL Op Lowering
-; CHECK-NEXT: DXIL Root Signature Analysis
; CHECK-NEXT: DXIL Prepare Module
; CHECK-ASM-NEXT: DXIL Metadata Pretty Printer
diff --git a/llvm/test/CodeGen/DirectX/rootsignature-validation-fail-cbuffer-range.ll b/llvm/test/CodeGen/DirectX/rootsignature-validation-fail-cbuffer-range.ll
new file mode 100644
index 0000000..e420225
--- /dev/null
+++ b/llvm/test/CodeGen/DirectX/rootsignature-validation-fail-cbuffer-range.ll
@@ -0,0 +1,15 @@
+; RUN: not opt -S -passes='dxil-post-optimization-validation' -mtriple=dxil-pc-shadermodel6.6-compute %s 2>&1 | FileCheck %s
+; CHECK: error: resource CBV (space=0, registers=[2, 2]) overlaps with resource CBV (space=0, registers=[0, 2])
+
+define void @CSMain() "hlsl.shader"="compute" {
+entry:
+ ret void
+}
+
+!dx.rootsignatures = !{!0}
+
+!0 = !{ptr @CSMain, !1, i32 2}
+!1 = !{!2, !3}
+!2 = !{!"RootConstants", i32 0, i32 2, i32 0, i32 4}
+!3 = !{!"DescriptorTable", i32 0, !4}
+!4 = !{!"CBV", i32 3, i32 0, i32 0, i32 -1, i32 4}
diff --git a/llvm/test/CodeGen/DirectX/rootsignature-validation-fail-descriptor-table-range.ll b/llvm/test/CodeGen/DirectX/rootsignature-validation-fail-descriptor-table-range.ll
new file mode 100644
index 0000000..037f8c7
--- /dev/null
+++ b/llvm/test/CodeGen/DirectX/rootsignature-validation-fail-descriptor-table-range.ll
@@ -0,0 +1,16 @@
+; RUN: not opt -S -passes='dxil-post-optimization-validation' -mtriple=dxil-pc-shadermodel6.6-compute %s 2>&1 | FileCheck %s
+; CHECK: error: resource UAV (space=10, registers=[4294967295, 4294967295]) overlaps with resource UAV (space=10, registers=[4294967295, 4294967295])
+
+define void @CSMain() "hlsl.shader"="compute" {
+entry:
+ ret void
+}
+
+!dx.rootsignatures = !{!0}
+
+!0 = !{ptr @CSMain, !1, i32 2}
+!1 = !{!2, !4}
+!2 = !{!"DescriptorTable", i32 0, !3}
+!3 = !{!"UAV", i32 -1, i32 -1, i32 10, i32 -1, i32 2}
+!4 = !{!"DescriptorTable", i32 0, !5}
+!5 = !{ !"UAV", i32 -1, i32 -1, i32 10, i32 5, i32 2 }
diff --git a/llvm/test/CodeGen/DirectX/rootsignature-validation-fail-root-descriptor-range.ll b/llvm/test/CodeGen/DirectX/rootsignature-validation-fail-root-descriptor-range.ll
new file mode 100644
index 0000000..7098efb
--- /dev/null
+++ b/llvm/test/CodeGen/DirectX/rootsignature-validation-fail-root-descriptor-range.ll
@@ -0,0 +1,15 @@
+; RUN: not opt -S -passes='dxil-post-optimization-validation' -mtriple=dxil-pc-shadermodel6.6-compute %s 2>&1 | FileCheck %s
+; CHECK: error: resource UAV (space=1, registers=[3, 3]) overlaps with resource UAV (space=1, registers=[0, 3])
+
+define void @CSMain() "hlsl.shader"="compute" {
+entry:
+ ret void
+}
+
+!dx.rootsignatures = !{!0}
+
+!0 = !{ptr @CSMain, !1, i32 2}
+!1 = !{!2, !4}
+!2 = !{!"RootUAV", i32 0, i32 3, i32 1, i32 4}
+!4 = !{!"DescriptorTable", i32 0, !5}
+!5 = !{!"UAV", i32 4, i32 0, i32 1, i32 -1, i32 2}
diff --git a/llvm/test/CodeGen/DirectX/rootsignature-validation-fail-sampler.ll b/llvm/test/CodeGen/DirectX/rootsignature-validation-fail-sampler.ll
new file mode 100644
index 0000000..c244095
--- /dev/null
+++ b/llvm/test/CodeGen/DirectX/rootsignature-validation-fail-sampler.ll
@@ -0,0 +1,15 @@
+; RUN: not opt -S -passes='dxil-post-optimization-validation' -mtriple=dxil-pc-shadermodel6.6-compute %s 2>&1 | FileCheck %s
+; CHECK: error: resource Sampler (space=0, registers=[42, 42]) overlaps with resource Sampler (space=0, registers=[42, 42])
+
+define void @CSMain() "hlsl.shader"="compute" {
+entry:
+ ret void
+}
+
+!dx.rootsignatures = !{!0}
+
+!0 = !{ptr @CSMain, !1, i32 2}
+!1 = !{!2, !3}
+!2 = !{ !"StaticSampler", i32 5, i32 4, i32 5, i32 3, float 0x3FF7CCCCC0000000, i32 10, i32 2, i32 1, float -1.270000e+02, float 1.220000e+02, i32 42, i32 0, i32 0 }
+!3 = !{!"DescriptorTable", i32 0, !4}
+!4 = !{!"Sampler", i32 1, i32 42, i32 0, i32 -1, i32 0}
diff --git a/llvm/test/CodeGen/DirectX/rootsignature-validation-fail-static-sampler-range.ll b/llvm/test/CodeGen/DirectX/rootsignature-validation-fail-static-sampler-range.ll
new file mode 100644
index 0000000..9ac02ebb
--- /dev/null
+++ b/llvm/test/CodeGen/DirectX/rootsignature-validation-fail-static-sampler-range.ll
@@ -0,0 +1,14 @@
+; RUN: not opt -S -passes='dxil-post-optimization-validation' -mtriple=dxil-pc-shadermodel6.6-compute %s 2>&1 | FileCheck %s
+; CHECK: error: resource Sampler (space=0, registers=[42, 42]) overlaps with resource Sampler (space=0, registers=[42, 42])
+
+define void @CSMain() "hlsl.shader"="compute" {
+entry:
+ ret void
+}
+
+!dx.rootsignatures = !{!0}
+
+!0 = !{ptr @CSMain, !1, i32 2}
+!1 = !{!2, !3}
+!2 = !{ !"StaticSampler", i32 5, i32 4, i32 5, i32 3, float 0x3FF7CCCCC0000000, i32 10, i32 2, i32 1, float -1.270000e+02, float 1.220000e+02, i32 42, i32 0, i32 0 }
+!3 = !{ !"StaticSampler", i32 4, i32 2, i32 3, i32 5, float 0x3FF6CCCCC0000000, i32 9, i32 3, i32 2, float -1.280000e+02, float 1.280000e+02, i32 42, i32 0, i32 0 }
diff --git a/llvm/test/CodeGen/DirectX/rootsignature-validation.ll b/llvm/test/CodeGen/DirectX/rootsignature-validation.ll
new file mode 100644
index 0000000..0fdba27
--- /dev/null
+++ b/llvm/test/CodeGen/DirectX/rootsignature-validation.ll
@@ -0,0 +1,20 @@
+; RUN: opt -S -passes='dxil-post-optimization-validation' -mtriple=dxil-pc-shadermodel6.6-compute %s
+; We have a valid root signature, this should compile successfully
+
+define void @CSMain() "hlsl.shader"="compute" {
+entry:
+ ret void
+}
+
+!dx.rootsignatures = !{!0}
+
+!0 = !{ptr @CSMain, !1, i32 2}
+!1 = !{!2, !3, !5, !7, !9}
+!2 = !{!"RootCBV", i32 0, i32 3, i32 1, i32 4}
+!9 = !{!"RootConstants", i32 0, i32 2, i32 0, i32 4}
+!3 = !{!"DescriptorTable", i32 0, !4}
+!4 = !{!"SRV", i32 1, i32 0, i32 0, i32 -1, i32 0}
+!5 = !{!"DescriptorTable", i32 0, !6}
+!6 = !{!"Sampler", i32 5, i32 3, i32 2, i32 -1, i32 0}
+!7 = !{!"DescriptorTable", i32 0, !8}
+!8 = !{!"UAV", i32 -1, i32 0, i32 0, i32 -1, i32 2}
diff --git a/llvm/test/CodeGen/DirectX/scalar-data.ll b/llvm/test/CodeGen/DirectX/scalar-data.ll
index 4861a08..d9c8df9 100644
--- a/llvm/test/CodeGen/DirectX/scalar-data.ll
+++ b/llvm/test/CodeGen/DirectX/scalar-data.ll
@@ -1,4 +1,4 @@
-; RUN: llc %s -mtriple=dxil-pc-shadermodel6.3-library --filetype=asm -o - | FileCheck %s
+; RUN: opt -S -passes='dxil-data-scalarization,dxil-flatten-arrays' -mtriple=dxil-unknown-shadermodel6.5-compute %s | FileCheck %s
; Make sure we don't touch arrays without vectors and that can recurse and flatten multiple-dimension arrays of vectors
diff --git a/llvm/test/CodeGen/DirectX/umad.ll b/llvm/test/CodeGen/DirectX/umad.ll
index 104d238..76516a2 100644
--- a/llvm/test/CodeGen/DirectX/umad.ll
+++ b/llvm/test/CodeGen/DirectX/umad.ll
@@ -1,17 +1,13 @@
-; RUN: opt -S -dxil-op-lower < %s | FileCheck %s
+; RUN: opt -S -scalarizer -dxil-op-lower < %s | FileCheck %s
; Make sure dxil operation function calls for round are generated for float and half.
-; CHECK:call i16 @dx.op.tertiary.i16(i32 49, i16 %{{.*}}, i16 %{{.*}}, i16 %{{.*}}) #[[#ATTR:]]
-; CHECK:call i32 @dx.op.tertiary.i32(i32 49, i32 %{{.*}}, i32 %{{.*}}, i32 %{{.*}}) #[[#ATTR]]
-; CHECK:call i64 @dx.op.tertiary.i64(i32 49, i64 %{{.*}}, i64 %{{.*}}, i64 %{{.*}}) #[[#ATTR]]
-
-; CHECK: attributes #[[#ATTR]] = {{{.*}} memory(none) {{.*}}}
target datalayout = "e-m:e-p:32:32-i1:32-i8:8-i16:16-i32:32-i64:64-f16:16-f32:32-f64:64-n8:16:32:64"
target triple = "dxil-pc-shadermodel6.7-library"
; Function Attrs: noinline nounwind optnone
define noundef i16 @umad_ushort(i16 noundef %p0, i16 noundef %p1, i16 noundef %p2) #0 {
entry:
+ ; CHECK: call i16 @dx.op.tertiary.i16(i32 49, i16 %{{.*}}, i16 %{{.*}}, i16 %{{.*}}) #[[#ATTR:]]
%p2.addr = alloca i16, align 2
%p1.addr = alloca i16, align 2
%p0.addr = alloca i16, align 2
@@ -31,6 +27,7 @@ declare i16 @llvm.dx.umad.i16(i16, i16, i16) #1
; Function Attrs: noinline nounwind optnone
define noundef i32 @umad_uint(i32 noundef %p0, i32 noundef %p1, i32 noundef %p2) #0 {
entry:
+ ; CHECK: call i32 @dx.op.tertiary.i32(i32 49, i32 %{{.*}}, i32 %{{.*}}, i32 %{{.*}}) #[[#ATTR]]
%p2.addr = alloca i32, align 4
%p1.addr = alloca i32, align 4
%p0.addr = alloca i32, align 4
@@ -50,6 +47,7 @@ declare i32 @llvm.dx.umad.i32(i32, i32, i32) #1
; Function Attrs: noinline nounwind optnone
define noundef i64 @umad_uint64(i64 noundef %p0, i64 noundef %p1, i64 noundef %p2) #0 {
entry:
+ ; CHECK: call i64 @dx.op.tertiary.i64(i32 49, i64 %{{.*}}, i64 %{{.*}}, i64 %{{.*}}) #[[#ATTR]]
%p2.addr = alloca i64, align 8
%p1.addr = alloca i64, align 8
%p0.addr = alloca i64, align 8
@@ -65,3 +63,95 @@ entry:
; Function Attrs: nocallback nofree nosync nounwind willreturn
declare i64 @llvm.dx.umad.i64(i64, i64, i64) #1
+
+; Function Attrs: noinline nounwind optnone
+define noundef <4 x i16> @umad_uint16_t4(<4 x i16> noundef %p0, <4 x i16> noundef %p1, <4 x i16> noundef %p2) #0 {
+entry:
+ ; CHECK: extractelement <4 x i16> %p0, i64 0
+ ; CHECK: extractelement <4 x i16> %p1, i64 0
+ ; CHECK: extractelement <4 x i16> %p2, i64 0
+ ; CHECK: call i16 @dx.op.tertiary.i16(i32 49, i16 %{{.*}}, i16 %{{.*}}, i16 %{{.*}}) #[[#ATTR]]
+ ; CHECK: extractelement <4 x i16> %p0, i64 1
+ ; CHECK: extractelement <4 x i16> %p1, i64 1
+ ; CHECK: extractelement <4 x i16> %p2, i64 1
+ ; CHECK: call i16 @dx.op.tertiary.i16(i32 49, i16 %{{.*}}, i16 %{{.*}}, i16 %{{.*}}) #[[#ATTR]]
+ ; CHECK: extractelement <4 x i16> %p0, i64 2
+ ; CHECK: extractelement <4 x i16> %p1, i64 2
+ ; CHECK: extractelement <4 x i16> %p2, i64 2
+ ; CHECK: call i16 @dx.op.tertiary.i16(i32 49, i16 %{{.*}}, i16 %{{.*}}, i16 %{{.*}}) #[[#ATTR]]
+ ; CHECK: extractelement <4 x i16> %p0, i64 3
+ ; CHECK: extractelement <4 x i16> %p1, i64 3
+ ; CHECK: extractelement <4 x i16> %p2, i64 3
+ ; CHECK: call i16 @dx.op.tertiary.i16(i32 49, i16 %{{.*}}, i16 %{{.*}}, i16 %{{.*}}) #[[#ATTR]]
+ ; CHECK: insertelement <4 x i16> poison, i16 %{{.*}}, i64 0
+ ; CHECK: insertelement <4 x i16> %{{.*}}, i16 %{{.*}}, i64 1
+ ; CHECK: insertelement <4 x i16> %{{.*}}, i16 %{{.*}}, i64 2
+ ; CHECK: insertelement <4 x i16> %{{.*}}, i16 %{{.*}}, i64 3
+ %dx.umad = call <4 x i16> @llvm.dx.umad.v4i16(<4 x i16> %p0, <4 x i16> %p1, <4 x i16> %p2)
+ ret <4 x i16> %dx.umad
+}
+
+; Function Attrs: nocallback nofree nosync nounwind willreturn
+declare <4 x i16> @llvm.dx.umad.v4i16(<4 x i16>, <4 x i16>, <4 x i16>) #1
+
+; Function Attrs: noinline nounwind optnone
+define noundef <4 x i32> @umad_uint4(<4 x i32> noundef %p0, <4 x i32> noundef %p1, <4 x i32> noundef %p2) #0 {
+entry:
+ ; CHECK: extractelement <4 x i32> %p0, i64 0
+ ; CHECK: extractelement <4 x i32> %p1, i64 0
+ ; CHECK: extractelement <4 x i32> %p2, i64 0
+ ; CHECK: call i32 @dx.op.tertiary.i32(i32 49, i32 %{{.*}}, i32 %{{.*}}, i32 %{{.*}}) #[[#ATTR]]
+ ; CHECK: extractelement <4 x i32> %p0, i64 1
+ ; CHECK: extractelement <4 x i32> %p1, i64 1
+ ; CHECK: extractelement <4 x i32> %p2, i64 1
+ ; CHECK: call i32 @dx.op.tertiary.i32(i32 49, i32 %{{.*}}, i32 %{{.*}}, i32 %{{.*}}) #[[#ATTR]]
+ ; CHECK: extractelement <4 x i32> %p0, i64 2
+ ; CHECK: extractelement <4 x i32> %p1, i64 2
+ ; CHECK: extractelement <4 x i32> %p2, i64 2
+ ; CHECK: call i32 @dx.op.tertiary.i32(i32 49, i32 %{{.*}}, i32 %{{.*}}, i32 %{{.*}}) #[[#ATTR]]
+ ; CHECK: extractelement <4 x i32> %p0, i64 3
+ ; CHECK: extractelement <4 x i32> %p1, i64 3
+ ; CHECK: extractelement <4 x i32> %p2, i64 3
+ ; CHECK: call i32 @dx.op.tertiary.i32(i32 49, i32 %{{.*}}, i32 %{{.*}}, i32 %{{.*}}) #[[#ATTR]]
+ ; CHECK: insertelement <4 x i32> poison, i32 %{{.*}}, i64 0
+ ; CHECK: insertelement <4 x i32> %{{.*}}, i32 %{{.*}}, i64 1
+ ; CHECK: insertelement <4 x i32> %{{.*}}, i32 %{{.*}}, i64 2
+ ; CHECK: insertelement <4 x i32> %{{.*}}, i32 %{{.*}}, i64 3
+ %dx.umad = call <4 x i32> @llvm.dx.umad.v4i32(<4 x i32> %p0, <4 x i32> %p1, <4 x i32> %p2)
+ ret <4 x i32> %dx.umad
+}
+
+; Function Attrs: nocallback nofree nosync nounwind willreturn
+declare <4 x i32> @llvm.dx.umad.v4i32(<4 x i32>, <4 x i32>, <4 x i32>) #1
+
+; Function Attrs: noinline nounwind optnone
+define noundef <4 x i64> @umad_uint64_t4(<4 x i64> noundef %p0, <4 x i64> noundef %p1, <4 x i64> noundef %p2) #0 {
+entry:
+ ; CHECK: extractelement <4 x i64> %p0, i64 0
+ ; CHECK: extractelement <4 x i64> %p1, i64 0
+ ; CHECK: extractelement <4 x i64> %p2, i64 0
+ ; CHECK: call i64 @dx.op.tertiary.i64(i32 49, i64 %{{.*}}, i64 %{{.*}}, i64 %{{.*}}) #[[#ATTR]]
+ ; CHECK: extractelement <4 x i64> %p0, i64 1
+ ; CHECK: extractelement <4 x i64> %p1, i64 1
+ ; CHECK: extractelement <4 x i64> %p2, i64 1
+ ; CHECK: call i64 @dx.op.tertiary.i64(i32 49, i64 %{{.*}}, i64 %{{.*}}, i64 %{{.*}}) #[[#ATTR]]
+ ; CHECK: extractelement <4 x i64> %p0, i64 2
+ ; CHECK: extractelement <4 x i64> %p1, i64 2
+ ; CHECK: extractelement <4 x i64> %p2, i64 2
+ ; CHECK: call i64 @dx.op.tertiary.i64(i32 49, i64 %{{.*}}, i64 %{{.*}}, i64 %{{.*}}) #[[#ATTR]]
+ ; CHECK: extractelement <4 x i64> %p0, i64 3
+ ; CHECK: extractelement <4 x i64> %p1, i64 3
+ ; CHECK: extractelement <4 x i64> %p2, i64 3
+ ; CHECK: call i64 @dx.op.tertiary.i64(i32 49, i64 %{{.*}}, i64 %{{.*}}, i64 %{{.*}}) #[[#ATTR]]
+ ; CHECK: insertelement <4 x i64> poison, i64 %{{.*}}, i64 0
+ ; CHECK: insertelement <4 x i64> %{{.*}}, i64 %{{.*}}, i64 1
+ ; CHECK: insertelement <4 x i64> %{{.*}}, i64 %{{.*}}, i64 2
+ ; CHECK: insertelement <4 x i64> %{{.*}}, i64 %{{.*}}, i64 3
+ %dx.umad = call <4 x i64> @llvm.dx.umad.v4i64(<4 x i64> %p0, <4 x i64> %p1, <4 x i64> %p2)
+ ret <4 x i64> %dx.umad
+}
+
+; Function Attrs: nocallback nofree nosync nounwind willreturn
+declare <4 x i64> @llvm.dx.umad.v4i64(<4 x i64>, <4 x i64>, <4 x i64>) #1
+
+; CHECK: attributes #[[#ATTR]] = {{{.*}} memory(none) {{.*}}}