diff options
Diffstat (limited to 'llvm/test/CodeGen/DirectX')
21 files changed, 454 insertions, 95 deletions
diff --git a/llvm/test/CodeGen/DirectX/Binding/binding-overlap-7.ll b/llvm/test/CodeGen/DirectX/Binding/binding-overlap-7.ll new file mode 100644 index 0000000..25f81dd --- /dev/null +++ b/llvm/test/CodeGen/DirectX/Binding/binding-overlap-7.ll @@ -0,0 +1,35 @@ +; Use llc for this test so that we don't abort after the first error. +; RUN: not llc %s -o /dev/null 2>&1 | FileCheck %s + +; Check that there is no overlap with unbounded array in different space + + ; Buffer<double> A[2] : register(t2, space4); + ; Buffer<double> B : register(t20, space5); // does not overlap + ; Buffer<double> C[] : register(t2, space4); // overlaps with A + +; CHECK: error: resource A at register 2 overlaps with resource C at register 2 in space 4 +; CHECK-NOT: error: resource C at register 2 overlaps with resource B at register 20 in space 5 + +target triple = "dxil-pc-shadermodel6.3-library" + +@A.str = private unnamed_addr constant [2 x i8] c"A\00", align 1 +@B.str = private unnamed_addr constant [2 x i8] c"B\00", align 1 +@C.str = private unnamed_addr constant [2 x i8] c"C\00", align 1 + +define void @test_not_overlapping_in_different_spaces() { +entry: + + ; Buffer<double> A[2] : register(t2, space4); + %h0 = call target("dx.TypedBuffer", double, 0, 0, 0) + @llvm.dx.resource.handlefrombinding(i32 4, i32 2, i32 2, i32 10, i1 false, ptr @A.str) + + ; Buffer<double> B : register(t20, space5); + %h1 = call target("dx.TypedBuffer", i64, 0, 0, 0) + @llvm.dx.resource.handlefrombinding(i32 5, i32 20, i32 1, i32 0, i1 false, ptr @B.str) + + ; Buffer<double> C[] : register(t2, space4); + %h2 = call target("dx.TypedBuffer", double, 0, 0, 0) + @llvm.dx.resource.handlefrombinding(i32 4, i32 2, i32 -1, i32 10, i1 false, ptr @C.str) + + ret void +} diff --git a/llvm/test/CodeGen/DirectX/Metadata/srv_metadata.ll b/llvm/test/CodeGen/DirectX/Metadata/srv_metadata.ll index abab5c9..86d69ab 100644 --- a/llvm/test/CodeGen/DirectX/Metadata/srv_metadata.ll +++ b/llvm/test/CodeGen/DirectX/Metadata/srv_metadata.ll @@ -1,6 +1,6 @@ ; RUN: opt -S -dxil-translate-metadata < %s | FileCheck %s ; RUN: opt -S --passes="dxil-pretty-printer" < %s 2>&1 | FileCheck %s --check-prefix=PRINT -; RUN: llc %s --filetype=asm -o - < %s 2>&1 | FileCheck %s --check-prefixes=CHECK,PRINT +; RUN: llc %s --filetype=asm -o - 2>&1 | FileCheck %s --check-prefixes=CHECK,PRINT target datalayout = "e-m:e-p:32:32-i1:32-i8:8-i16:16-i32:32-i64:64-f16:16-f32:32-f64:64-n8:16:32:64" target triple = "dxil-pc-shadermodel6.6-compute" @@ -14,20 +14,22 @@ target triple = "dxil-pc-shadermodel6.6-compute" @Six.str = private unnamed_addr constant [4 x i8] c"Six\00", align 1 @Seven.str = private unnamed_addr constant [6 x i8] c"Seven\00", align 1 @Array.str = private unnamed_addr constant [6 x i8] c"Array\00", align 1 +@Array2.str = private unnamed_addr constant [7 x i8] c"Array2\00", align 1 ; PRINT:; Resource Bindings: ; PRINT-NEXT:; -; PRINT-NEXT:; Name Type Format Dim ID HLSL Bind Count -; PRINT-NEXT:; ------------------------------ ---------- ------- ----------- ------- -------------- ------ -; PRINT-NEXT:; Zero texture f16 buf T0 t0 1 -; PRINT-NEXT:; One texture f32 buf T1 t1 1 -; PRINT-NEXT:; Two texture f64 buf T2 t2 1 -; PRINT-NEXT:; Three texture i32 buf T3 t3 1 -; PRINT-NEXT:; Four texture byte r/o T4 t5 1 -; PRINT-NEXT:; Five texture struct r/o T5 t6 1 -; PRINT-NEXT:; Six texture u64 buf T6 t10,space2 1 -; PRINT-NEXT:; Array texture f32 buf T7 t4,space3 100 -; PRINT-NEXT:; Seven texture u64 buf T8 t20,space5 1 +; PRINT-NEXT:; Name Type Format Dim ID HLSL Bind Count +; PRINT-NEXT:; ------------------------------ ---------- ------- ----------- ------- -------------- --------- +; PRINT-NEXT:; Zero texture f16 buf T0 t0 1 +; PRINT-NEXT:; One texture f32 buf T1 t1 1 +; PRINT-NEXT:; Two texture f64 buf T2 t2 1 +; PRINT-NEXT:; Three texture i32 buf T3 t3 1 +; PRINT-NEXT:; Four texture byte r/o T4 t5 1 +; PRINT-NEXT:; Five texture struct r/o T5 t6 1 +; PRINT-NEXT:; Six texture u64 buf T6 t10,space2 1 +; PRINT-NEXT:; Array texture f32 buf T7 t4,space3 100 +; PRINT-NEXT:; Array2 texture f64 buf T8 t2,space4 unbounded +; PRINT-NEXT:; Seven texture u64 buf T9 t20,space5 1 ; define void @test() #0 { @@ -60,19 +62,28 @@ define void @test() #0 { @llvm.dx.resource.handlefrombinding(i32 2, i32 10, i32 1, i32 0, i1 false, ptr @Six.str) ; Same buffer type as Six - should have the same type in metadata - ; Buffer<double> Seven : register(t10, space2); + ; Buffer<double> Seven : register(t20, space5); %Seven_h = call target("dx.TypedBuffer", i64, 0, 0, 0) @llvm.dx.resource.handlefrombinding(i32 5, i32 20, i32 1, i32 0, i1 false, ptr @Seven.str) ; Buffer<float4> Array[100] : register(t4, space3); ; Buffer<float4> B1 = Array[30]; - ; Buffer<float4> B1 = Array[42]; + ; Buffer<float4> B2 = Array[42]; ; resource array accesses should produce one metadata entry %Array_30_h = call target("dx.TypedBuffer", <4 x float>, 0, 0, 0) @llvm.dx.resource.handlefrombinding(i32 3, i32 4, i32 100, i32 30, i1 false, ptr @Array.str) %Array_42_h = call target("dx.TypedBuffer", <4 x float>, 0, 0, 0) @llvm.dx.resource.handlefrombinding(i32 3, i32 4, i32 100, i32 42, i1 false, ptr @Array.str) + ; test unbounded resource array + ; Buffer<double> Array2[] : register(t2, space4); + ; Buffer<double> C1 = Array[10]; + ; Buffer<double> C2 = Array[20]; + %Array2_10_h = call target("dx.TypedBuffer", double, 0, 0, 0) + @llvm.dx.resource.handlefrombinding(i32 4, i32 2, i32 -1, i32 10, i1 false, ptr @Array2.str) + %Array2_20_h = call target("dx.TypedBuffer", double, 0, 0, 0) + @llvm.dx.resource.handlefrombinding(i32 4, i32 2, i32 -1, i32 20, i1 false, ptr @Array2.str) + ret void } @@ -94,7 +105,8 @@ attributes #0 = { noinline nounwind "hlsl.shader"="compute" } ; CHECK: @Four = external constant %ByteAddressBuffer ; CHECK: @Five = external constant %"StructuredBuffer<int16_t>" ; CHECK: @Six = external constant %"Buffer<uint32_t>" -; CHECK: @Array = external constant %"Buffer<float4>" +; CHECK: @Array = external constant [100 x %"Buffer<float4>"] +; CHECK: @Array2 = external constant [0 x %"Buffer<double>"] ; CHECK: @Seven = external constant %"Buffer<uint32_t>" ; CHECK: !dx.resources = !{[[ResList:[!][0-9]+]]} @@ -102,7 +114,7 @@ attributes #0 = { noinline nounwind "hlsl.shader"="compute" } ; CHECK: [[ResList]] = !{[[SRVList:[!][0-9]+]], null, null, null} ; CHECK: [[SRVList]] = !{![[Zero:[0-9]+]], ![[One:[0-9]+]], ![[Two:[0-9]+]], ; CHECK-SAME: ![[Three:[0-9]+]], ![[Four:[0-9]+]], ![[Five:[0-9]+]], -; CHECK-SAME: ![[Six:[0-9]+]], ![[Array:[0-9]+]], ![[Seven:[0-9]+]]} +; CHECK-SAME: ![[Six:[0-9]+]], ![[Array:[0-9]+]], ![[Array2:[0-9]+]], ![[Seven:[0-9]+]]} ; CHECK: ![[Zero]] = !{i32 0, ptr @Zero, !"Zero", i32 0, i32 0, i32 1, i32 10, i32 0, ![[Half:[0-9]+]]} ; CHECK: ![[Half]] = !{i32 0, i32 8} @@ -118,4 +130,5 @@ attributes #0 = { noinline nounwind "hlsl.shader"="compute" } ; CHECK: ![[Six]] = !{i32 6, ptr @Six, !"Six", i32 2, i32 10, i32 1, i32 10, i32 0, ![[U64:[0-9]+]]} ; CHECK: ![[U64]] = !{i32 0, i32 7} ; CHECK: ![[Array]] = !{i32 7, ptr @Array, !"Array", i32 3, i32 4, i32 100, i32 10, i32 0, ![[Float]]} -; CHECK: ![[Seven]] = !{i32 8, ptr @Seven, !"Seven", i32 5, i32 20, i32 1, i32 10, i32 0, ![[U64]]} +; CHECK: ![[Array2]] = !{i32 8, ptr @Array2, !"Array2", i32 4, i32 2, i32 -1, i32 10, i32 0, ![[Double]]} +; CHECK: ![[Seven]] = !{i32 9, ptr @Seven, !"Seven", i32 5, i32 20, i32 1, i32 10, i32 0, ![[U64]]} diff --git a/llvm/test/CodeGen/DirectX/Metadata/uav_metadata.ll b/llvm/test/CodeGen/DirectX/Metadata/uav_metadata.ll index 9893f8b..4928b1d 100644 --- a/llvm/test/CodeGen/DirectX/Metadata/uav_metadata.ll +++ b/llvm/test/CodeGen/DirectX/Metadata/uav_metadata.ll @@ -1,6 +1,6 @@ ; RUN: opt -S -dxil-translate-metadata < %s | FileCheck %s ; RUN: opt -S --passes="dxil-pretty-printer" < %s 2>&1 | FileCheck %s --check-prefix=PRINT -; RUN: llc %s --filetype=asm -o - < %s 2>&1 | FileCheck %s --check-prefixes=CHECK,PRINT +; RUN: llc %s --filetype=asm -o - 2>&1 | FileCheck %s --check-prefixes=CHECK,PRINT target datalayout = "e-m:e-p:32:32-i1:32-i8:8-i16:16-i32:32-i64:64-f16:16-f32:32-f64:64-n8:16:32:64" target triple = "dxil-pc-shadermodel6.6-compute" @@ -17,23 +17,25 @@ target triple = "dxil-pc-shadermodel6.6-compute" @Nine.str = private unnamed_addr constant [5 x i8] c"Nine\00", align 1 @Ten.str = private unnamed_addr constant [4 x i8] c"Ten\00", align 1 @Array.str = private unnamed_addr constant [6 x i8] c"Array\00", align 1 +@Array2.str = private unnamed_addr constant [7 x i8] c"Array2\00", align 1 ; PRINT:; Resource Bindings: ; PRINT-NEXT:; -; PRINT-NEXT:; Name Type Format Dim ID HLSL Bind Count -; PRINT-NEXT:; ------------------------------ ---------- ------- ----------- ------- -------------- ------ -; PRINT-NEXT:; Zero UAV f16 buf U0 u0 1 -; PRINT-NEXT:; One UAV f32 buf U1 u1 1 -; PRINT-NEXT:; Two UAV f64 buf U2 u2 1 -; PRINT-NEXT:; Three UAV i32 buf U3 u3 1 -; PRINT-NEXT:; Four UAV byte r/w U4 u5 1 -; PRINT-NEXT:; Five UAV struct r/w U5 u6 1 -; PRINT-NEXT:; Six UAV i32 buf U6 u7 1 -; PRINT-NEXT:; Seven UAV struct r/w U7 u8 1 -; PRINT-NEXT:; Eight UAV byte r/w U8 u9 1 -; PRINT-NEXT:; Nine UAV u64 buf U9 u10,space2 1 -; PRINT-NEXT:; Array UAV f32 buf U10 u4,space3 100 -; PRINT-NEXT:; Ten UAV u64 buf U11 u22,space5 1 +; PRINT-NEXT:; Name Type Format Dim ID HLSL Bind Count +; PRINT-NEXT:; ------------------------------ ---------- ------- ----------- ------- -------------- --------- +; PRINT-NEXT:; Zero UAV f16 buf U0 u0 1 +; PRINT-NEXT:; One UAV f32 buf U1 u1 1 +; PRINT-NEXT:; Two UAV f64 buf U2 u2 1 +; PRINT-NEXT:; Three UAV i32 buf U3 u3 1 +; PRINT-NEXT:; Four UAV byte r/w U4 u5 1 +; PRINT-NEXT:; Five UAV struct r/w U5 u6 1 +; PRINT-NEXT:; Six UAV i32 buf U6 u7 1 +; PRINT-NEXT:; Seven UAV struct r/w U7 u8 1 +; PRINT-NEXT:; Eight UAV byte r/w U8 u9 1 +; PRINT-NEXT:; Nine UAV u64 buf U9 u10,space2 1 +; PRINT-NEXT:; Array UAV f32 buf U10 u4,space3 100 +; PRINT-NEXT:; Array2 UAV f64 buf U11 u2,space4 unbounded +; PRINT-NEXT:; Ten UAV u64 buf U12 u22,space5 1 define void @test() #0 { ; RWBuffer<half4> Zero : register(u0) @@ -78,13 +80,22 @@ define void @test() #0 { ; RWBuffer<float4> Array[100] : register(u4, space3); ; RWBuffer<float4> B1 = Array[30]; - ; RWBuffer<float4> B1 = Array[42]; + ; RWBuffer<float4> B2 = Array[42]; ; resource array accesses should produce one metadata entry %Array_30_h = call target("dx.TypedBuffer", <4 x float>, 1, 0, 0) @llvm.dx.resource.handlefrombinding(i32 3, i32 4, i32 100, i32 30, i1 false, ptr @Array.str) %Array_42_h = call target("dx.TypedBuffer", <4 x float>, 1, 0, 0) @llvm.dx.resource.handlefrombinding(i32 3, i32 4, i32 100, i32 42, i1 false, ptr @Array.str) + ; test unbounded resource array + ; RWBuffer<double> Array2[] : register(u2, space4); + ; RWBuffer<double> C1 = Array[10]; + ; RWBuffer<double> C2 = Array[20]; + %Array2_10_h = call target("dx.TypedBuffer", double, 1, 0, 0) + @llvm.dx.resource.handlefrombinding(i32 4, i32 2, i32 -1, i32 10, i1 false, ptr @Array2.str) + %Array2_20_h = call target("dx.TypedBuffer", double, 1, 0, 0) + @llvm.dx.resource.handlefrombinding(i32 4, i32 2, i32 -1, i32 20, i1 false, ptr @Array2.str) + ; Same buffer type as Nine - should have the same type in metadata ; RWBuffer<double> Ten : register(u2); %Ten_h = call target("dx.TypedBuffer", i64, 1, 0, 0) @@ -117,7 +128,8 @@ attributes #0 = { noinline nounwind "hlsl.shader"="compute" } ; CHECK: @Seven = external constant %"RasterizerOrderedStructuredBuffer<int32_t4>" ; CHECK: @Eight = external constant %RasterizerOrderedByteAddressBuffer ; CHECK: @Nine = external constant %"RWBuffer<uint32_t>" -; CHECK: @Array = external constant %"RWBuffer<float4>" +; CHECK: @Array = external constant [100 x %"RWBuffer<float4>"] +; CHECK: @Array2 = external constant [0 x %"RWBuffer<double>"] ; CHECK: @Ten = external constant %"RWBuffer<uint32_t>" ; CHECK: !dx.resources = !{[[ResList:[!][0-9]+]]} @@ -126,7 +138,7 @@ attributes #0 = { noinline nounwind "hlsl.shader"="compute" } ; CHECK: [[UAVList]] = !{![[Zero:[0-9]+]], ![[One:[0-9]+]], ![[Two:[0-9]+]], ; CHECK-SAME: ![[Three:[0-9]+]], ![[Four:[0-9]+]], ![[Five:[0-9]+]], ; CHECK-SAME: ![[Six:[0-9]+]], ![[Seven:[0-9]+]], ![[Eight:[0-9]+]], -; CHECK-SAME: ![[Nine:[0-9]+]], ![[Array:[0-9]+]], ![[Ten:[0-9]+]]} +; CHECK-SAME: ![[Nine:[0-9]+]], ![[Array:[0-9]+]], ![[Array2:[0-9]+]], ![[Ten:[0-9]+]]} ; CHECK: ![[Zero]] = !{i32 0, ptr @Zero, !"Zero", i32 0, i32 0, i32 1, i32 10, i1 false, i1 false, i1 false, ![[Half:[0-9]+]]} ; CHECK: ![[Half]] = !{i32 0, i32 8} @@ -146,4 +158,5 @@ attributes #0 = { noinline nounwind "hlsl.shader"="compute" } ; CHECK: ![[Nine]] = !{i32 9, ptr @Nine, !"Nine", i32 2, i32 10, i32 1, i32 10, i1 false, i1 false, i1 false, ![[U64:[0-9]+]]} ; CHECK: ![[U64]] = !{i32 0, i32 7} ; CHECK: ![[Array]] = !{i32 10, ptr @Array, !"Array", i32 3, i32 4, i32 100, i32 10, i1 false, i1 false, i1 false, ![[Float]]} -; CHECK: ![[Ten]] = !{i32 11, ptr @Ten, !"Ten", i32 5, i32 22, i32 1, i32 10, i1 false, i1 false, i1 false, ![[U64:[0-9]+]]} +; CHECK: ![[Array2]] = !{i32 11, ptr @Array2, !"Array2", i32 4, i32 2, i32 -1, i32 10, i1 false, i1 false, i1 false, ![[Double]]} +; CHECK: ![[Ten]] = !{i32 12, ptr @Ten, !"Ten", i32 5, i32 22, i32 1, i32 10, i1 false, i1 false, i1 false, ![[U64:[0-9]+]]} diff --git a/llvm/test/CodeGen/DirectX/ShaderFlags/lifetimes-noint64op.ll b/llvm/test/CodeGen/DirectX/ShaderFlags/lifetimes-noint64op.ll index 736c86e..5cf4fe8 100644 --- a/llvm/test/CodeGen/DirectX/ShaderFlags/lifetimes-noint64op.ll +++ b/llvm/test/CodeGen/DirectX/ShaderFlags/lifetimes-noint64op.ll @@ -15,16 +15,16 @@ target triple = "dxil-pc-shadermodel6.7-library" define void @lifetimes() #0 { %a = alloca [4 x i32], align 8 - call void @llvm.lifetime.start.p0(i64 16, ptr nonnull %a) - call void @llvm.lifetime.end.p0(i64 16, ptr nonnull %a) + call void @llvm.lifetime.start.p0(ptr nonnull %a) + call void @llvm.lifetime.end.p0(ptr nonnull %a) ret void } ; Function Attrs: nounwind memory(argmem: readwrite) -declare void @llvm.lifetime.start.p0(i64, ptr) #1 +declare void @llvm.lifetime.start.p0(ptr) #1 ; Function Attrs: nounwind memory(argmem: readwrite) -declare void @llvm.lifetime.end.p0(i64, ptr) #1 +declare void @llvm.lifetime.end.p0(ptr) #1 attributes #0 = { convergent norecurse nounwind "hlsl.export"} attributes #1 = { nounwind memory(argmem: readwrite) } diff --git a/llvm/test/CodeGen/DirectX/dot2add.ll b/llvm/test/CodeGen/DirectX/dot2add.ll index 3a2bbcc..5e1cf40 100644 --- a/llvm/test/CodeGen/DirectX/dot2add.ll +++ b/llvm/test/CodeGen/DirectX/dot2add.ll @@ -1,4 +1,4 @@ -; RUN: opt -S -dxil-op-lower -mtriple=dxil-pc-shadermodel6.3-compute %s | FileCheck %s
+; RUN: opt -S -dxil-op-lower -mtriple=dxil-pc-shadermodel6.4-compute %s | FileCheck %s
define noundef float @dot2add_simple(<2 x half> noundef %a, <2 x half> noundef %b, float %acc) {
entry:
@@ -7,7 +7,7 @@ entry: %bx = extractelement <2 x half> %b, i32 0
%by = extractelement <2 x half> %b, i32 1
-; CHECK: call float @dx.op.dot2AddHalf(i32 162, float %acc, half %ax, half %ay, half %bx, half %by)
+; CHECK: call float @dx.op.dot2AddHalf.f32(i32 162, float %acc, half %ax, half %ay, half %bx, half %by)
%ret = call float @llvm.dx.dot2add(float %acc, half %ax, half %ay, half %bx, half %by)
ret float %ret
}
diff --git a/llvm/test/CodeGen/DirectX/dot2add_error.ll b/llvm/test/CodeGen/DirectX/dot2add_error.ll new file mode 100644 index 0000000..c45133c --- /dev/null +++ b/llvm/test/CodeGen/DirectX/dot2add_error.ll @@ -0,0 +1,15 @@ +; RUN: not opt -S -dxil-op-lower -mtriple=dxil-pc-shadermodel6.3-compute %s 2>&1 | FileCheck %s
+
+; CHECK: in function f
+; CHECK-SAME: Cannot create Dot2AddHalf operation: No valid overloads for DXIL version 1.3
+
+define noundef float @f(<2 x half> noundef %a, <2 x half> noundef %b, float %acc) {
+entry:
+ %ax = extractelement <2 x half> %a, i32 0
+ %ay = extractelement <2 x half> %a, i32 1
+ %bx = extractelement <2 x half> %b, i32 0
+ %by = extractelement <2 x half> %b, i32 1
+
+ %ret = call float @llvm.dx.dot2add(float %acc, half %ax, half %ay, half %bx, half %by)
+ ret float %ret
+}
diff --git a/llvm/test/CodeGen/DirectX/finalize_linkage.ll b/llvm/test/CodeGen/DirectX/finalize_linkage.ll index dc1140f..f6e54cf 100644 --- a/llvm/test/CodeGen/DirectX/finalize_linkage.ll +++ b/llvm/test/CodeGen/DirectX/finalize_linkage.ll @@ -4,7 +4,8 @@ target triple = "dxilv1.5-pc-shadermodel6.5-compute" ; DXILFinalizeLinkage changes linkage of all functions that are hidden to -; internal, and converts private global variables to internal linkage. +; internal, converts private globals to internal linkage, and converts external globals +; with no usage to internal linkage. ; CHECK: @switch.table = internal unnamed_addr constant [4 x i32] @switch.table = private unnamed_addr constant [4 x i32] [i32 1, i32 257, i32 65793, i32 16843009], align 4 @@ -27,6 +28,27 @@ target triple = "dxilv1.5-pc-shadermodel6.5-compute" ; CHECK: @hidden_var = hidden global i32 @hidden_var = hidden global i32 1, align 4 +; Running the whole pipeline should remove unused global variables + +; CHECK: @aTile = internal addrspace(3) global +; CHECK-LLC-NOT: @aTile +@aTile = hidden addrspace(3) global [4 x [1 x i32]] zeroinitializer, align 4 + +; CHECK: @bTile = internal addrspace(3) global +; CHECK-LLC-NOT: @bTile +@bTile = hidden addrspace(3) global [1 x [1 x i32]] zeroinitializer, align 4 + +define void @anchor_function() #0 { +entry: + %0 = load i32, ptr @switch.table, align 4 + %1 = load [3 x float], ptr @private_array, align 4 + %2 = load i32, ptr @private_var, align 4 + %3 = load i32, ptr @internal_var, align 4 + %4 = load i32, ptr @external_var, align 4 + %5 = load i32, ptr @hidden_var, align 4 + ret void +} + ; CHECK-NOT: define internal void @"?f1@@YAXXZ"() define void @"?f1@@YAXXZ"() #0 { entry: diff --git a/llvm/test/CodeGen/DirectX/issue-140819_allow_forward_handle_on_alloca.ll b/llvm/test/CodeGen/DirectX/forward_handle_on_alloca.ll index 7c0813b..ce5c2d7 100644 --- a/llvm/test/CodeGen/DirectX/issue-140819_allow_forward_handle_on_alloca.ll +++ b/llvm/test/CodeGen/DirectX/forward_handle_on_alloca.ll @@ -1,5 +1,7 @@ -; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 -; RUN: opt -S -dxil-forward-handle-accesses %s | FileCheck %s +; RUN: opt -S -dxil-forward-handle-accesses %s | FileCheck %s --check-prefixes=CHECK,FHCHECK +; RUN: opt -S -mtriple=dxil--shadermodel6.3-compute -passes='function(dxil-forward-handle-accesses),dse' %s | FileCheck %s --check-prefix=CHECK + +; Note: test to confirm fix for issues: 140819 & 151764 %"class.hlsl::RWStructuredBuffer" = type { target("dx.RawBuffer", i32, 1, 0) } @global = internal unnamed_addr global %"class.hlsl::RWStructuredBuffer" poison, align 4 @@ -11,11 +13,11 @@ define void @CSMain() local_unnamed_addr { ; CHECK-LABEL: define void @CSMain() local_unnamed_addr { ; CHECK-NEXT: [[ENTRY:.*:]] -; CHECK-NEXT: [[AGG_TMP_I1_SROA_0:%.*]] = alloca target("dx.RawBuffer", i32, 1, 0), align 8 +; FHCHECK-NEXT: [[AGG_TMP_I1_SROA_0:%.*]] = alloca target("dx.RawBuffer", i32, 1, 0), align 8 ; CHECK-NEXT: [[TMP0:%.*]] = tail call target("dx.RawBuffer", i32, 1, 0) @llvm.dx.resource.handlefrombinding.tdx.RawBuffer_i32_1_0t(i32 0, i32 3, i32 1, i32 0, i1 false, ptr nonnull @name) ; CHECK-NEXT: store target("dx.RawBuffer", i32, 1, 0) [[TMP0]], ptr @global, align 4 -; CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr @global, align 4 -; CHECK-NEXT: store i32 [[TMP2]], ptr [[AGG_TMP_I1_SROA_0]], align 8 +; FHCHECK-NEXT: [[TMP2:%.*]] = load i32, ptr @global, align 4 +; FHCHECK-NEXT: store i32 [[TMP2]], ptr [[AGG_TMP_I1_SROA_0]], align 8 ; CHECK-NEXT: [[TMP3:%.*]] = tail call noundef nonnull align 4 dereferenceable(4) ptr @llvm.dx.resource.getpointer.p0.tdx.RawBuffer_i32_1_0t(target("dx.RawBuffer", i32, 1, 0) [[TMP0]], i32 0) ; CHECK-NEXT: store i32 0, ptr [[TMP3]], align 4 ; CHECK-NEXT: ret void @@ -25,9 +27,11 @@ entry: %handle = tail call target("dx.RawBuffer", i32, 1, 0) @llvm.dx.resource.handlefrombinding.tdx.RawBuffer_i32_1_0t(i32 0, i32 3, i32 1, i32 0, i1 false, ptr nonnull @name) store target("dx.RawBuffer", i32, 1, 0) %handle , ptr @global, align 4 %val = load i32, ptr @global, align 4 + call void @llvm.lifetime.start.p0(ptr nonnull %alloca) store i32 %val , ptr %alloca, align 8 %indirect = load target("dx.RawBuffer", i32, 1, 0), ptr %alloca, align 8 %buff = tail call noundef nonnull align 4 dereferenceable(4) ptr @llvm.dx.resource.getpointer.p0.tdx.RawBuffer_i32_1_0t(target("dx.RawBuffer", i32, 1, 0) %indirect, i32 0) store i32 0, ptr %buff, align 4 + call void @llvm.lifetime.end.p0(ptr nonnull %alloca) ret void } diff --git a/llvm/test/CodeGen/DirectX/imad.ll b/llvm/test/CodeGen/DirectX/imad.ll index 5d9463d..2e612f0 100644 --- a/llvm/test/CodeGen/DirectX/imad.ll +++ b/llvm/test/CodeGen/DirectX/imad.ll @@ -1,17 +1,13 @@ -; RUN: opt -S -dxil-op-lower < %s | FileCheck %s +; RUN: opt -S -scalarizer -dxil-op-lower < %s | FileCheck %s ; Make sure dxil operation function calls for round are generated for float and half. -; CHECK:call i16 @dx.op.tertiary.i16(i32 48, i16 %{{.*}}, i16 %{{.*}}, i16 %{{.*}}) #[[#ATTR:]] -; CHECK:call i32 @dx.op.tertiary.i32(i32 48, i32 %{{.*}}, i32 %{{.*}}, i32 %{{.*}}) #[[#ATTR]] -; CHECK:call i64 @dx.op.tertiary.i64(i32 48, i64 %{{.*}}, i64 %{{.*}}, i64 %{{.*}}) #[[#ATTR]] - -; CHECK: attributes #[[#ATTR]] = {{{.*}} memory(none) {{.*}}} target datalayout = "e-m:e-p:32:32-i1:32-i8:8-i16:16-i32:32-i64:64-f16:16-f32:32-f64:64-n8:16:32:64" target triple = "dxil-pc-shadermodel6.7-library" ; Function Attrs: noinline nounwind optnone define noundef i16 @imad_short(i16 noundef %p0, i16 noundef %p1, i16 noundef %p2) #0 { entry: + ; CHECK: call i16 @dx.op.tertiary.i16(i32 48, i16 %{{.*}}, i16 %{{.*}}, i16 %{{.*}}) #[[#ATTR:]] %p2.addr = alloca i16, align 2 %p1.addr = alloca i16, align 2 %p0.addr = alloca i16, align 2 @@ -31,6 +27,7 @@ declare i16 @llvm.dx.imad.i16(i16, i16, i16) #1 ; Function Attrs: noinline nounwind optnone define noundef i32 @imad_int(i32 noundef %p0, i32 noundef %p1, i32 noundef %p2) #0 { entry: + ; CHECK: call i32 @dx.op.tertiary.i32(i32 48, i32 %{{.*}}, i32 %{{.*}}, i32 %{{.*}}) #[[#ATTR]] %p2.addr = alloca i32, align 4 %p1.addr = alloca i32, align 4 %p0.addr = alloca i32, align 4 @@ -50,6 +47,7 @@ declare i32 @llvm.dx.imad.i32(i32, i32, i32) #1 ; Function Attrs: noinline nounwind optnone define noundef i64 @imad_int64(i64 noundef %p0, i64 noundef %p1, i64 noundef %p2) #0 { entry: + ; CHECK: call i64 @dx.op.tertiary.i64(i32 48, i64 %{{.*}}, i64 %{{.*}}, i64 %{{.*}}) #[[#ATTR]] %p2.addr = alloca i64, align 8 %p1.addr = alloca i64, align 8 %p0.addr = alloca i64, align 8 @@ -65,3 +63,95 @@ entry: ; Function Attrs: nocallback nofree nosync nounwind willreturn declare i64 @llvm.dx.imad.i64(i64, i64, i64) #1 + +; Function Attrs: noinline nounwind optnone +define noundef <4 x i16> @imad_int16_t4(<4 x i16> noundef %p0, <4 x i16> noundef %p1, <4 x i16> noundef %p2) #0 { +entry: + ; CHECK: extractelement <4 x i16> %p0, i64 0 + ; CHECK: extractelement <4 x i16> %p1, i64 0 + ; CHECK: extractelement <4 x i16> %p2, i64 0 + ; CHECK: call i16 @dx.op.tertiary.i16(i32 48, i16 %{{.*}}, i16 %{{.*}}, i16 %{{.*}}) #[[#ATTR]] + ; CHECK: extractelement <4 x i16> %p0, i64 1 + ; CHECK: extractelement <4 x i16> %p1, i64 1 + ; CHECK: extractelement <4 x i16> %p2, i64 1 + ; CHECK: call i16 @dx.op.tertiary.i16(i32 48, i16 %{{.*}}, i16 %{{.*}}, i16 %{{.*}}) #[[#ATTR]] + ; CHECK: extractelement <4 x i16> %p0, i64 2 + ; CHECK: extractelement <4 x i16> %p1, i64 2 + ; CHECK: extractelement <4 x i16> %p2, i64 2 + ; CHECK: call i16 @dx.op.tertiary.i16(i32 48, i16 %{{.*}}, i16 %{{.*}}, i16 %{{.*}}) #[[#ATTR]] + ; CHECK: extractelement <4 x i16> %p0, i64 3 + ; CHECK: extractelement <4 x i16> %p1, i64 3 + ; CHECK: extractelement <4 x i16> %p2, i64 3 + ; CHECK: call i16 @dx.op.tertiary.i16(i32 48, i16 %{{.*}}, i16 %{{.*}}, i16 %{{.*}}) #[[#ATTR]] + ; CHECK: insertelement <4 x i16> poison, i16 %{{.*}}, i64 0 + ; CHECK: insertelement <4 x i16> %{{.*}}, i16 %{{.*}}, i64 1 + ; CHECK: insertelement <4 x i16> %{{.*}}, i16 %{{.*}}, i64 2 + ; CHECK: insertelement <4 x i16> %{{.*}}, i16 %{{.*}}, i64 3 + %dx.imad = call <4 x i16> @llvm.dx.imad.v4i16(<4 x i16> %p0, <4 x i16> %p1, <4 x i16> %p2) + ret <4 x i16> %dx.imad +} + +; Function Attrs: nocallback nofree nosync nounwind willreturn +declare <4 x i16> @llvm.dx.imad.v4i16(<4 x i16>, <4 x i16>, <4 x i16>) #1 + +; Function Attrs: noinline nounwind optnone +define noundef <4 x i32> @imad_int4(<4 x i32> noundef %p0, <4 x i32> noundef %p1, <4 x i32> noundef %p2) #0 { +entry: + ; CHECK: extractelement <4 x i32> %p0, i64 0 + ; CHECK: extractelement <4 x i32> %p1, i64 0 + ; CHECK: extractelement <4 x i32> %p2, i64 0 + ; CHECK: call i32 @dx.op.tertiary.i32(i32 48, i32 %{{.*}}, i32 %{{.*}}, i32 %{{.*}}) #[[#ATTR]] + ; CHECK: extractelement <4 x i32> %p0, i64 1 + ; CHECK: extractelement <4 x i32> %p1, i64 1 + ; CHECK: extractelement <4 x i32> %p2, i64 1 + ; CHECK: call i32 @dx.op.tertiary.i32(i32 48, i32 %{{.*}}, i32 %{{.*}}, i32 %{{.*}}) #[[#ATTR]] + ; CHECK: extractelement <4 x i32> %p0, i64 2 + ; CHECK: extractelement <4 x i32> %p1, i64 2 + ; CHECK: extractelement <4 x i32> %p2, i64 2 + ; CHECK: call i32 @dx.op.tertiary.i32(i32 48, i32 %{{.*}}, i32 %{{.*}}, i32 %{{.*}}) #[[#ATTR]] + ; CHECK: extractelement <4 x i32> %p0, i64 3 + ; CHECK: extractelement <4 x i32> %p1, i64 3 + ; CHECK: extractelement <4 x i32> %p2, i64 3 + ; CHECK: call i32 @dx.op.tertiary.i32(i32 48, i32 %{{.*}}, i32 %{{.*}}, i32 %{{.*}}) #[[#ATTR]] + ; CHECK: insertelement <4 x i32> poison, i32 %{{.*}}, i64 0 + ; CHECK: insertelement <4 x i32> %{{.*}}, i32 %{{.*}}, i64 1 + ; CHECK: insertelement <4 x i32> %{{.*}}, i32 %{{.*}}, i64 2 + ; CHECK: insertelement <4 x i32> %{{.*}}, i32 %{{.*}}, i64 3 + %dx.imad = call <4 x i32> @llvm.dx.imad.v4i32(<4 x i32> %p0, <4 x i32> %p1, <4 x i32> %p2) + ret <4 x i32> %dx.imad +} + +; Function Attrs: nocallback nofree nosync nounwind willreturn +declare <4 x i32> @llvm.dx.imad.v4i32(<4 x i32>, <4 x i32>, <4 x i32>) #1 + +; Function Attrs: noinline nounwind optnone +define noundef <4 x i64> @imad_int64_t4(<4 x i64> noundef %p0, <4 x i64> noundef %p1, <4 x i64> noundef %p2) #0 { +entry: + ; CHECK: extractelement <4 x i64> %p0, i64 0 + ; CHECK: extractelement <4 x i64> %p1, i64 0 + ; CHECK: extractelement <4 x i64> %p2, i64 0 + ; CHECK: call i64 @dx.op.tertiary.i64(i32 48, i64 %{{.*}}, i64 %{{.*}}, i64 %{{.*}}) #[[#ATTR]] + ; CHECK: extractelement <4 x i64> %p0, i64 1 + ; CHECK: extractelement <4 x i64> %p1, i64 1 + ; CHECK: extractelement <4 x i64> %p2, i64 1 + ; CHECK: call i64 @dx.op.tertiary.i64(i32 48, i64 %{{.*}}, i64 %{{.*}}, i64 %{{.*}}) #[[#ATTR]] + ; CHECK: extractelement <4 x i64> %p0, i64 2 + ; CHECK: extractelement <4 x i64> %p1, i64 2 + ; CHECK: extractelement <4 x i64> %p2, i64 2 + ; CHECK: call i64 @dx.op.tertiary.i64(i32 48, i64 %{{.*}}, i64 %{{.*}}, i64 %{{.*}}) #[[#ATTR]] + ; CHECK: extractelement <4 x i64> %p0, i64 3 + ; CHECK: extractelement <4 x i64> %p1, i64 3 + ; CHECK: extractelement <4 x i64> %p2, i64 3 + ; CHECK: call i64 @dx.op.tertiary.i64(i32 48, i64 %{{.*}}, i64 %{{.*}}, i64 %{{.*}}) #[[#ATTR]] + ; CHECK: insertelement <4 x i64> poison, i64 %{{.*}}, i64 0 + ; CHECK: insertelement <4 x i64> %{{.*}}, i64 %{{.*}}, i64 1 + ; CHECK: insertelement <4 x i64> %{{.*}}, i64 %{{.*}}, i64 2 + ; CHECK: insertelement <4 x i64> %{{.*}}, i64 %{{.*}}, i64 3 + %dx.imad = call <4 x i64> @llvm.dx.imad.v4i64(<4 x i64> %p0, <4 x i64> %p1, <4 x i64> %p2) + ret <4 x i64> %dx.imad +} + +; Function Attrs: nocallback nofree nosync nounwind willreturn +declare <4 x i64> @llvm.dx.imad.v4i64(<4 x i64>, <4 x i64>, <4 x i64>) #1 + +; CHECK: attributes #[[#ATTR]] = {{{.*}} memory(none) {{.*}}} diff --git a/llvm/test/CodeGen/DirectX/legalize-lifetimes-valver-1.5.ll b/llvm/test/CodeGen/DirectX/legalize-lifetimes-valver-1.5.ll index e485fa2..b1eea30 100644 --- a/llvm/test/CodeGen/DirectX/legalize-lifetimes-valver-1.5.ll +++ b/llvm/test/CodeGen/DirectX/legalize-lifetimes-valver-1.5.ll @@ -11,9 +11,9 @@ define void @test_legal_lifetime() { %accum.i.flat = alloca [1 x i32], align 4 %gep = getelementptr i32, ptr %accum.i.flat, i32 0 - call void @llvm.lifetime.start.p0(i64 4, ptr nonnull %accum.i.flat) + call void @llvm.lifetime.start.p0(ptr nonnull %accum.i.flat) store i32 0, ptr %gep, align 4 - call void @llvm.lifetime.end.p0(i64 4, ptr nonnull %accum.i.flat) + call void @llvm.lifetime.end.p0(ptr nonnull %accum.i.flat) ret void } diff --git a/llvm/test/CodeGen/DirectX/legalize-lifetimes-valver-1.6.ll b/llvm/test/CodeGen/DirectX/legalize-lifetimes-valver-1.6.ll index 77133eb..256fcc0 100644 --- a/llvm/test/CodeGen/DirectX/legalize-lifetimes-valver-1.6.ll +++ b/llvm/test/CodeGen/DirectX/legalize-lifetimes-valver-1.6.ll @@ -13,12 +13,12 @@ ; CHECK-NEXT: [[ACCUM_I_FLAT:%.*]] = alloca [1 x i32], align 4 ; CHECK-NEXT: [[GEP:%.*]] = getelementptr i32, ptr [[ACCUM_I_FLAT]], i32 0 ; CHECK-SM63-NEXT: store [1 x i32] undef, ptr [[ACCUM_I_FLAT]], align 4 -; CHECK-SM66-NEXT: call void @llvm.lifetime.start.p0(i64 4, ptr nonnull [[ACCUM_I_FLAT]]) +; CHECK-SM66-NEXT: call void @llvm.lifetime.start.p0(ptr nonnull [[ACCUM_I_FLAT]]) ; CHECK-EMBED-NOT: bitcast ; CHECK-EMBED-NOT: lifetime ; CHECK-NEXT: store i32 0, ptr [[GEP]], align 4 ; CHECK-SM63-NEXT: store [1 x i32] undef, ptr [[ACCUM_I_FLAT]], align 4 -; CHECK-SM66-NEXT: call void @llvm.lifetime.end.p0(i64 4, ptr nonnull [[ACCUM_I_FLAT]]) +; CHECK-SM66-NEXT: call void @llvm.lifetime.end.p0(ptr nonnull [[ACCUM_I_FLAT]]) ; CHECK-EMBED-NOT: bitcast ; CHECK-EMBED-NOT: lifetime ; CHECK-NEXT: ret void @@ -26,9 +26,9 @@ define void @test_legal_lifetime() { %accum.i.flat = alloca [1 x i32], align 4 %gep = getelementptr i32, ptr %accum.i.flat, i32 0 - call void @llvm.lifetime.start.p0(i64 4, ptr nonnull %accum.i.flat) + call void @llvm.lifetime.start.p0(ptr nonnull %accum.i.flat) store i32 0, ptr %gep, align 4 - call void @llvm.lifetime.end.p0(i64 4, ptr nonnull %accum.i.flat) + call void @llvm.lifetime.end.p0(ptr nonnull %accum.i.flat) ret void } diff --git a/llvm/test/CodeGen/DirectX/legalize-memset.ll b/llvm/test/CodeGen/DirectX/legalize-memset.ll index a73e737..ad45ac6 100644 --- a/llvm/test/CodeGen/DirectX/legalize-memset.ll +++ b/llvm/test/CodeGen/DirectX/legalize-memset.ll @@ -5,18 +5,14 @@ define void @replace_float_memset_test() #0 { ; CHECK-LABEL: define void @replace_float_memset_test( ; CHECK-SAME: ) #[[ATTR0:[0-9]+]] { ; CHECK-NEXT: [[ACCUM_I_FLAT:%.*]] = alloca [2 x float], align 4 -; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 8, ptr nonnull [[ACCUM_I_FLAT]]) ; CHECK-NEXT: [[GEP:%.*]] = getelementptr [2 x float], ptr [[ACCUM_I_FLAT]], i32 0, i32 0 ; CHECK-NEXT: store float 0.000000e+00, ptr [[GEP]], align 4 ; CHECK-NEXT: [[GEP1:%.*]] = getelementptr [2 x float], ptr [[ACCUM_I_FLAT]], i32 0, i32 1 ; CHECK-NEXT: store float 0.000000e+00, ptr [[GEP1]], align 4 -; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 8, ptr nonnull [[ACCUM_I_FLAT]]) ; CHECK-NEXT: ret void ; %accum.i.flat = alloca [2 x float], align 4 - call void @llvm.lifetime.start.p0(i64 8, ptr nonnull %accum.i.flat) call void @llvm.memset.p0.i32(ptr nonnull align 4 dereferenceable(8) %accum.i.flat, i8 0, i32 8, i1 false) - call void @llvm.lifetime.end.p0(i64 8, ptr nonnull %accum.i.flat) ret void } @@ -24,18 +20,14 @@ define void @replace_half_memset_test() #0 { ; CHECK-LABEL: define void @replace_half_memset_test( ; CHECK-SAME: ) #[[ATTR0]] { ; CHECK-NEXT: [[ACCUM_I_FLAT:%.*]] = alloca [2 x half], align 4 -; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 4, ptr nonnull [[ACCUM_I_FLAT]]) ; CHECK-NEXT: [[GEP:%.*]] = getelementptr [2 x half], ptr [[ACCUM_I_FLAT]], i32 0, i32 0 ; CHECK-NEXT: store half 0xH0000, ptr [[GEP]], align 2 ; CHECK-NEXT: [[GEP1:%.*]] = getelementptr [2 x half], ptr [[ACCUM_I_FLAT]], i32 0, i32 1 ; CHECK-NEXT: store half 0xH0000, ptr [[GEP1]], align 2 -; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 4, ptr nonnull [[ACCUM_I_FLAT]]) ; CHECK-NEXT: ret void ; %accum.i.flat = alloca [2 x half], align 4 - call void @llvm.lifetime.start.p0(i64 4, ptr nonnull %accum.i.flat) call void @llvm.memset.p0.i32(ptr nonnull align 4 dereferenceable(8) %accum.i.flat, i8 0, i32 4, i1 false) - call void @llvm.lifetime.end.p0(i64 4, ptr nonnull %accum.i.flat) ret void } @@ -43,18 +35,14 @@ define void @replace_double_memset_test() #0 { ; CHECK-LABEL: define void @replace_double_memset_test( ; CHECK-SAME: ) #[[ATTR0]] { ; CHECK-NEXT: [[ACCUM_I_FLAT:%.*]] = alloca [2 x double], align 4 -; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 16, ptr nonnull [[ACCUM_I_FLAT]]) ; CHECK-NEXT: [[GEP:%.*]] = getelementptr [2 x double], ptr [[ACCUM_I_FLAT]], i32 0, i32 0 ; CHECK-NEXT: store double 0.000000e+00, ptr [[GEP]], align 8 ; CHECK-NEXT: [[GEP1:%.*]] = getelementptr [2 x double], ptr [[ACCUM_I_FLAT]], i32 0, i32 1 ; CHECK-NEXT: store double 0.000000e+00, ptr [[GEP1]], align 8 -; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 16, ptr nonnull [[ACCUM_I_FLAT]]) ; CHECK-NEXT: ret void ; %accum.i.flat = alloca [2 x double], align 4 - call void @llvm.lifetime.start.p0(i64 16, ptr nonnull %accum.i.flat) call void @llvm.memset.p0.i32(ptr nonnull align 4 dereferenceable(8) %accum.i.flat, i8 0, i32 16, i1 false) - call void @llvm.lifetime.end.p0(i64 16, ptr nonnull %accum.i.flat) ret void } @@ -62,18 +50,14 @@ define void @replace_int16_memset_test() #0 { ; CHECK-LABEL: define void @replace_int16_memset_test( ; CHECK-SAME: ) #[[ATTR0]] { ; CHECK-NEXT: [[CACHE_I:%.*]] = alloca [2 x i16], align 2 -; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 4, ptr nonnull [[CACHE_I]]) ; CHECK-NEXT: [[GEP:%.*]] = getelementptr [2 x i16], ptr [[CACHE_I]], i32 0, i32 0 ; CHECK-NEXT: store i16 0, ptr [[GEP]], align 2 ; CHECK-NEXT: [[GEP1:%.*]] = getelementptr [2 x i16], ptr [[CACHE_I]], i32 0, i32 1 ; CHECK-NEXT: store i16 0, ptr [[GEP1]], align 2 -; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 4, ptr nonnull [[CACHE_I]]) ; CHECK-NEXT: ret void ; %cache.i = alloca [2 x i16], align 2 - call void @llvm.lifetime.start.p0(i64 4, ptr nonnull %cache.i) call void @llvm.memset.p0.i32(ptr nonnull align 2 dereferenceable(4) %cache.i, i8 0, i32 4, i1 false) - call void @llvm.lifetime.end.p0(i64 4, ptr nonnull %cache.i) ret void } @@ -81,16 +65,12 @@ define void @replace_int_memset_test() #0 { ; CHECK-LABEL: define void @replace_int_memset_test( ; CHECK-SAME: ) #[[ATTR0]] { ; CHECK-NEXT: [[ACCUM_I_FLAT:%.*]] = alloca [1 x i32], align 4 -; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 4, ptr nonnull [[ACCUM_I_FLAT]]) ; CHECK-NEXT: [[GEP:%.*]] = getelementptr [1 x i32], ptr [[ACCUM_I_FLAT]], i32 0, i32 0 ; CHECK-NEXT: store i32 0, ptr [[GEP]], align 4 -; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 4, ptr nonnull [[ACCUM_I_FLAT]]) ; CHECK-NEXT: ret void ; %accum.i.flat = alloca [1 x i32], align 4 - call void @llvm.lifetime.start.p0(i64 4, ptr nonnull %accum.i.flat) call void @llvm.memset.p0.i32(ptr nonnull align 4 dereferenceable(8) %accum.i.flat, i8 0, i32 4, i1 false) - call void @llvm.lifetime.end.p0(i64 4, ptr nonnull %accum.i.flat) ret void } @@ -101,25 +81,19 @@ define void @replace_int_memset_to_var_test() #0 { ; CHECK-NEXT: [[I:%.*]] = alloca i32, align 4 ; CHECK-NEXT: store i32 1, ptr [[I]], align 4 ; CHECK-NEXT: [[I8_LOAD:%.*]] = load i32, ptr [[I]], align 4 -; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 4, ptr nonnull [[ACCUM_I_FLAT]]) ; CHECK-NEXT: [[GEP:%.*]] = getelementptr [1 x i32], ptr [[ACCUM_I_FLAT]], i32 0, i32 0 ; CHECK-NEXT: store i32 [[I8_LOAD]], ptr [[GEP]], align 4 -; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 4, ptr nonnull [[ACCUM_I_FLAT]]) ; CHECK-NEXT: ret void ; %accum.i.flat = alloca [1 x i32], align 4 %i = alloca i8, align 4 store i8 1, ptr %i %i8.load = load i8, ptr %i - call void @llvm.lifetime.start.p0(i64 4, ptr nonnull %accum.i.flat) call void @llvm.memset.p0.i32(ptr nonnull align 4 dereferenceable(8) %accum.i.flat, i8 %i8.load, i32 4, i1 false) - call void @llvm.lifetime.end.p0(i64 4, ptr nonnull %accum.i.flat) ret void } attributes #0 = {"hlsl.export"} -declare void @llvm.lifetime.end.p0(i64 immarg, ptr captures(none)) -declare void @llvm.lifetime.start.p0(i64 immarg, ptr captures(none)) declare void @llvm.memset.p0.i32(ptr writeonly captures(none), i8, i32, i1 immarg) diff --git a/llvm/test/CodeGen/DirectX/llc-pipeline.ll b/llvm/test/CodeGen/DirectX/llc-pipeline.ll index 151603a..13c2539 100644 --- a/llvm/test/CodeGen/DirectX/llc-pipeline.ll +++ b/llvm/test/CodeGen/DirectX/llc-pipeline.ll @@ -7,13 +7,14 @@ ; CHECK-NEXT: Target Library Information ; CHECK-NEXT: DXIL Resource Type Analysis ; CHECK-NEXT: Target Transform Information - +; CHECK-NEXT: Assumption Cache Tracker ; CHECK-OBJ-NEXT: Machine Module Information ; CHECK-OBJ-NEXT: Machine Branch Probability Analysis ; CHECK-OBJ-NEXT: Create Garbage Collector Module Metadata ; CHECK-NEXT: ModulePass Manager ; CHECK-NEXT: DXIL Finalize Linkage +; CHECK-NEXT: Dead Global Elimination ; CHECK-NEXT: FunctionPass Manager ; CHECK-NEXT: DXIL Resource Access ; CHECK-NEXT: DXIL Intrinsic Expansion @@ -26,6 +27,13 @@ ; CHECK-NEXT: FunctionPass Manager ; CHECK-NEXT: Dominator Tree Construction ; CHECK-NEXT: DXIL Forward Handle Accesses +; CHECK-NEXT: Dominator Tree Construction +; CHECK-NEXT: Basic Alias Analysis (stateless AA impl) +; CHECK-NEXT: Function Alias Analysis Results +; CHECK-NEXT: Post-Dominator Tree Construction +; CHECK-NEXT: Memory SSA +; CHECK-NEXT: Natural Loop Information +; CHECK-NEXT: Dead Store Elimination ; CHECK-NEXT: DXIL Legalizer ; CHECK-NEXT: DXIL Resource Binding Analysis ; CHECK-NEXT: DXIL Resource Implicit Binding @@ -33,9 +41,9 @@ ; CHECK-NEXT: DXIL Module Metadata analysis ; CHECK-NEXT: DXIL Shader Flag Analysis ; CHECK-NEXT: DXIL Translate Metadata +; CHECK-NEXT: DXIL Root Signature Analysis ; CHECK-NEXT: DXIL Post Optimization Validation ; CHECK-NEXT: DXIL Op Lowering -; CHECK-NEXT: DXIL Root Signature Analysis ; CHECK-NEXT: DXIL Prepare Module ; CHECK-ASM-NEXT: DXIL Metadata Pretty Printer diff --git a/llvm/test/CodeGen/DirectX/rootsignature-validation-fail-cbuffer-range.ll b/llvm/test/CodeGen/DirectX/rootsignature-validation-fail-cbuffer-range.ll new file mode 100644 index 0000000..e420225 --- /dev/null +++ b/llvm/test/CodeGen/DirectX/rootsignature-validation-fail-cbuffer-range.ll @@ -0,0 +1,15 @@ +; RUN: not opt -S -passes='dxil-post-optimization-validation' -mtriple=dxil-pc-shadermodel6.6-compute %s 2>&1 | FileCheck %s +; CHECK: error: resource CBV (space=0, registers=[2, 2]) overlaps with resource CBV (space=0, registers=[0, 2]) + +define void @CSMain() "hlsl.shader"="compute" { +entry: + ret void +} + +!dx.rootsignatures = !{!0} + +!0 = !{ptr @CSMain, !1, i32 2} +!1 = !{!2, !3} +!2 = !{!"RootConstants", i32 0, i32 2, i32 0, i32 4} +!3 = !{!"DescriptorTable", i32 0, !4} +!4 = !{!"CBV", i32 3, i32 0, i32 0, i32 -1, i32 4} diff --git a/llvm/test/CodeGen/DirectX/rootsignature-validation-fail-descriptor-table-range.ll b/llvm/test/CodeGen/DirectX/rootsignature-validation-fail-descriptor-table-range.ll new file mode 100644 index 0000000..037f8c7 --- /dev/null +++ b/llvm/test/CodeGen/DirectX/rootsignature-validation-fail-descriptor-table-range.ll @@ -0,0 +1,16 @@ +; RUN: not opt -S -passes='dxil-post-optimization-validation' -mtriple=dxil-pc-shadermodel6.6-compute %s 2>&1 | FileCheck %s +; CHECK: error: resource UAV (space=10, registers=[4294967295, 4294967295]) overlaps with resource UAV (space=10, registers=[4294967295, 4294967295]) + +define void @CSMain() "hlsl.shader"="compute" { +entry: + ret void +} + +!dx.rootsignatures = !{!0} + +!0 = !{ptr @CSMain, !1, i32 2} +!1 = !{!2, !4} +!2 = !{!"DescriptorTable", i32 0, !3} +!3 = !{!"UAV", i32 -1, i32 -1, i32 10, i32 -1, i32 2} +!4 = !{!"DescriptorTable", i32 0, !5} +!5 = !{ !"UAV", i32 -1, i32 -1, i32 10, i32 5, i32 2 } diff --git a/llvm/test/CodeGen/DirectX/rootsignature-validation-fail-root-descriptor-range.ll b/llvm/test/CodeGen/DirectX/rootsignature-validation-fail-root-descriptor-range.ll new file mode 100644 index 0000000..7098efb --- /dev/null +++ b/llvm/test/CodeGen/DirectX/rootsignature-validation-fail-root-descriptor-range.ll @@ -0,0 +1,15 @@ +; RUN: not opt -S -passes='dxil-post-optimization-validation' -mtriple=dxil-pc-shadermodel6.6-compute %s 2>&1 | FileCheck %s +; CHECK: error: resource UAV (space=1, registers=[3, 3]) overlaps with resource UAV (space=1, registers=[0, 3]) + +define void @CSMain() "hlsl.shader"="compute" { +entry: + ret void +} + +!dx.rootsignatures = !{!0} + +!0 = !{ptr @CSMain, !1, i32 2} +!1 = !{!2, !4} +!2 = !{!"RootUAV", i32 0, i32 3, i32 1, i32 4} +!4 = !{!"DescriptorTable", i32 0, !5} +!5 = !{!"UAV", i32 4, i32 0, i32 1, i32 -1, i32 2} diff --git a/llvm/test/CodeGen/DirectX/rootsignature-validation-fail-sampler.ll b/llvm/test/CodeGen/DirectX/rootsignature-validation-fail-sampler.ll new file mode 100644 index 0000000..c244095 --- /dev/null +++ b/llvm/test/CodeGen/DirectX/rootsignature-validation-fail-sampler.ll @@ -0,0 +1,15 @@ +; RUN: not opt -S -passes='dxil-post-optimization-validation' -mtriple=dxil-pc-shadermodel6.6-compute %s 2>&1 | FileCheck %s +; CHECK: error: resource Sampler (space=0, registers=[42, 42]) overlaps with resource Sampler (space=0, registers=[42, 42]) + +define void @CSMain() "hlsl.shader"="compute" { +entry: + ret void +} + +!dx.rootsignatures = !{!0} + +!0 = !{ptr @CSMain, !1, i32 2} +!1 = !{!2, !3} +!2 = !{ !"StaticSampler", i32 5, i32 4, i32 5, i32 3, float 0x3FF7CCCCC0000000, i32 10, i32 2, i32 1, float -1.270000e+02, float 1.220000e+02, i32 42, i32 0, i32 0 } +!3 = !{!"DescriptorTable", i32 0, !4} +!4 = !{!"Sampler", i32 1, i32 42, i32 0, i32 -1, i32 0} diff --git a/llvm/test/CodeGen/DirectX/rootsignature-validation-fail-static-sampler-range.ll b/llvm/test/CodeGen/DirectX/rootsignature-validation-fail-static-sampler-range.ll new file mode 100644 index 0000000..9ac02ebb --- /dev/null +++ b/llvm/test/CodeGen/DirectX/rootsignature-validation-fail-static-sampler-range.ll @@ -0,0 +1,14 @@ +; RUN: not opt -S -passes='dxil-post-optimization-validation' -mtriple=dxil-pc-shadermodel6.6-compute %s 2>&1 | FileCheck %s +; CHECK: error: resource Sampler (space=0, registers=[42, 42]) overlaps with resource Sampler (space=0, registers=[42, 42]) + +define void @CSMain() "hlsl.shader"="compute" { +entry: + ret void +} + +!dx.rootsignatures = !{!0} + +!0 = !{ptr @CSMain, !1, i32 2} +!1 = !{!2, !3} +!2 = !{ !"StaticSampler", i32 5, i32 4, i32 5, i32 3, float 0x3FF7CCCCC0000000, i32 10, i32 2, i32 1, float -1.270000e+02, float 1.220000e+02, i32 42, i32 0, i32 0 } +!3 = !{ !"StaticSampler", i32 4, i32 2, i32 3, i32 5, float 0x3FF6CCCCC0000000, i32 9, i32 3, i32 2, float -1.280000e+02, float 1.280000e+02, i32 42, i32 0, i32 0 } diff --git a/llvm/test/CodeGen/DirectX/rootsignature-validation.ll b/llvm/test/CodeGen/DirectX/rootsignature-validation.ll new file mode 100644 index 0000000..0fdba27 --- /dev/null +++ b/llvm/test/CodeGen/DirectX/rootsignature-validation.ll @@ -0,0 +1,20 @@ +; RUN: opt -S -passes='dxil-post-optimization-validation' -mtriple=dxil-pc-shadermodel6.6-compute %s +; We have a valid root signature, this should compile successfully + +define void @CSMain() "hlsl.shader"="compute" { +entry: + ret void +} + +!dx.rootsignatures = !{!0} + +!0 = !{ptr @CSMain, !1, i32 2} +!1 = !{!2, !3, !5, !7, !9} +!2 = !{!"RootCBV", i32 0, i32 3, i32 1, i32 4} +!9 = !{!"RootConstants", i32 0, i32 2, i32 0, i32 4} +!3 = !{!"DescriptorTable", i32 0, !4} +!4 = !{!"SRV", i32 1, i32 0, i32 0, i32 -1, i32 0} +!5 = !{!"DescriptorTable", i32 0, !6} +!6 = !{!"Sampler", i32 5, i32 3, i32 2, i32 -1, i32 0} +!7 = !{!"DescriptorTable", i32 0, !8} +!8 = !{!"UAV", i32 -1, i32 0, i32 0, i32 -1, i32 2} diff --git a/llvm/test/CodeGen/DirectX/scalar-data.ll b/llvm/test/CodeGen/DirectX/scalar-data.ll index 4861a08..d9c8df9 100644 --- a/llvm/test/CodeGen/DirectX/scalar-data.ll +++ b/llvm/test/CodeGen/DirectX/scalar-data.ll @@ -1,4 +1,4 @@ -; RUN: llc %s -mtriple=dxil-pc-shadermodel6.3-library --filetype=asm -o - | FileCheck %s +; RUN: opt -S -passes='dxil-data-scalarization,dxil-flatten-arrays' -mtriple=dxil-unknown-shadermodel6.5-compute %s | FileCheck %s ; Make sure we don't touch arrays without vectors and that can recurse and flatten multiple-dimension arrays of vectors diff --git a/llvm/test/CodeGen/DirectX/umad.ll b/llvm/test/CodeGen/DirectX/umad.ll index 104d238..76516a2 100644 --- a/llvm/test/CodeGen/DirectX/umad.ll +++ b/llvm/test/CodeGen/DirectX/umad.ll @@ -1,17 +1,13 @@ -; RUN: opt -S -dxil-op-lower < %s | FileCheck %s +; RUN: opt -S -scalarizer -dxil-op-lower < %s | FileCheck %s ; Make sure dxil operation function calls for round are generated for float and half. -; CHECK:call i16 @dx.op.tertiary.i16(i32 49, i16 %{{.*}}, i16 %{{.*}}, i16 %{{.*}}) #[[#ATTR:]] -; CHECK:call i32 @dx.op.tertiary.i32(i32 49, i32 %{{.*}}, i32 %{{.*}}, i32 %{{.*}}) #[[#ATTR]] -; CHECK:call i64 @dx.op.tertiary.i64(i32 49, i64 %{{.*}}, i64 %{{.*}}, i64 %{{.*}}) #[[#ATTR]] - -; CHECK: attributes #[[#ATTR]] = {{{.*}} memory(none) {{.*}}} target datalayout = "e-m:e-p:32:32-i1:32-i8:8-i16:16-i32:32-i64:64-f16:16-f32:32-f64:64-n8:16:32:64" target triple = "dxil-pc-shadermodel6.7-library" ; Function Attrs: noinline nounwind optnone define noundef i16 @umad_ushort(i16 noundef %p0, i16 noundef %p1, i16 noundef %p2) #0 { entry: + ; CHECK: call i16 @dx.op.tertiary.i16(i32 49, i16 %{{.*}}, i16 %{{.*}}, i16 %{{.*}}) #[[#ATTR:]] %p2.addr = alloca i16, align 2 %p1.addr = alloca i16, align 2 %p0.addr = alloca i16, align 2 @@ -31,6 +27,7 @@ declare i16 @llvm.dx.umad.i16(i16, i16, i16) #1 ; Function Attrs: noinline nounwind optnone define noundef i32 @umad_uint(i32 noundef %p0, i32 noundef %p1, i32 noundef %p2) #0 { entry: + ; CHECK: call i32 @dx.op.tertiary.i32(i32 49, i32 %{{.*}}, i32 %{{.*}}, i32 %{{.*}}) #[[#ATTR]] %p2.addr = alloca i32, align 4 %p1.addr = alloca i32, align 4 %p0.addr = alloca i32, align 4 @@ -50,6 +47,7 @@ declare i32 @llvm.dx.umad.i32(i32, i32, i32) #1 ; Function Attrs: noinline nounwind optnone define noundef i64 @umad_uint64(i64 noundef %p0, i64 noundef %p1, i64 noundef %p2) #0 { entry: + ; CHECK: call i64 @dx.op.tertiary.i64(i32 49, i64 %{{.*}}, i64 %{{.*}}, i64 %{{.*}}) #[[#ATTR]] %p2.addr = alloca i64, align 8 %p1.addr = alloca i64, align 8 %p0.addr = alloca i64, align 8 @@ -65,3 +63,95 @@ entry: ; Function Attrs: nocallback nofree nosync nounwind willreturn declare i64 @llvm.dx.umad.i64(i64, i64, i64) #1 + +; Function Attrs: noinline nounwind optnone +define noundef <4 x i16> @umad_uint16_t4(<4 x i16> noundef %p0, <4 x i16> noundef %p1, <4 x i16> noundef %p2) #0 { +entry: + ; CHECK: extractelement <4 x i16> %p0, i64 0 + ; CHECK: extractelement <4 x i16> %p1, i64 0 + ; CHECK: extractelement <4 x i16> %p2, i64 0 + ; CHECK: call i16 @dx.op.tertiary.i16(i32 49, i16 %{{.*}}, i16 %{{.*}}, i16 %{{.*}}) #[[#ATTR]] + ; CHECK: extractelement <4 x i16> %p0, i64 1 + ; CHECK: extractelement <4 x i16> %p1, i64 1 + ; CHECK: extractelement <4 x i16> %p2, i64 1 + ; CHECK: call i16 @dx.op.tertiary.i16(i32 49, i16 %{{.*}}, i16 %{{.*}}, i16 %{{.*}}) #[[#ATTR]] + ; CHECK: extractelement <4 x i16> %p0, i64 2 + ; CHECK: extractelement <4 x i16> %p1, i64 2 + ; CHECK: extractelement <4 x i16> %p2, i64 2 + ; CHECK: call i16 @dx.op.tertiary.i16(i32 49, i16 %{{.*}}, i16 %{{.*}}, i16 %{{.*}}) #[[#ATTR]] + ; CHECK: extractelement <4 x i16> %p0, i64 3 + ; CHECK: extractelement <4 x i16> %p1, i64 3 + ; CHECK: extractelement <4 x i16> %p2, i64 3 + ; CHECK: call i16 @dx.op.tertiary.i16(i32 49, i16 %{{.*}}, i16 %{{.*}}, i16 %{{.*}}) #[[#ATTR]] + ; CHECK: insertelement <4 x i16> poison, i16 %{{.*}}, i64 0 + ; CHECK: insertelement <4 x i16> %{{.*}}, i16 %{{.*}}, i64 1 + ; CHECK: insertelement <4 x i16> %{{.*}}, i16 %{{.*}}, i64 2 + ; CHECK: insertelement <4 x i16> %{{.*}}, i16 %{{.*}}, i64 3 + %dx.umad = call <4 x i16> @llvm.dx.umad.v4i16(<4 x i16> %p0, <4 x i16> %p1, <4 x i16> %p2) + ret <4 x i16> %dx.umad +} + +; Function Attrs: nocallback nofree nosync nounwind willreturn +declare <4 x i16> @llvm.dx.umad.v4i16(<4 x i16>, <4 x i16>, <4 x i16>) #1 + +; Function Attrs: noinline nounwind optnone +define noundef <4 x i32> @umad_uint4(<4 x i32> noundef %p0, <4 x i32> noundef %p1, <4 x i32> noundef %p2) #0 { +entry: + ; CHECK: extractelement <4 x i32> %p0, i64 0 + ; CHECK: extractelement <4 x i32> %p1, i64 0 + ; CHECK: extractelement <4 x i32> %p2, i64 0 + ; CHECK: call i32 @dx.op.tertiary.i32(i32 49, i32 %{{.*}}, i32 %{{.*}}, i32 %{{.*}}) #[[#ATTR]] + ; CHECK: extractelement <4 x i32> %p0, i64 1 + ; CHECK: extractelement <4 x i32> %p1, i64 1 + ; CHECK: extractelement <4 x i32> %p2, i64 1 + ; CHECK: call i32 @dx.op.tertiary.i32(i32 49, i32 %{{.*}}, i32 %{{.*}}, i32 %{{.*}}) #[[#ATTR]] + ; CHECK: extractelement <4 x i32> %p0, i64 2 + ; CHECK: extractelement <4 x i32> %p1, i64 2 + ; CHECK: extractelement <4 x i32> %p2, i64 2 + ; CHECK: call i32 @dx.op.tertiary.i32(i32 49, i32 %{{.*}}, i32 %{{.*}}, i32 %{{.*}}) #[[#ATTR]] + ; CHECK: extractelement <4 x i32> %p0, i64 3 + ; CHECK: extractelement <4 x i32> %p1, i64 3 + ; CHECK: extractelement <4 x i32> %p2, i64 3 + ; CHECK: call i32 @dx.op.tertiary.i32(i32 49, i32 %{{.*}}, i32 %{{.*}}, i32 %{{.*}}) #[[#ATTR]] + ; CHECK: insertelement <4 x i32> poison, i32 %{{.*}}, i64 0 + ; CHECK: insertelement <4 x i32> %{{.*}}, i32 %{{.*}}, i64 1 + ; CHECK: insertelement <4 x i32> %{{.*}}, i32 %{{.*}}, i64 2 + ; CHECK: insertelement <4 x i32> %{{.*}}, i32 %{{.*}}, i64 3 + %dx.umad = call <4 x i32> @llvm.dx.umad.v4i32(<4 x i32> %p0, <4 x i32> %p1, <4 x i32> %p2) + ret <4 x i32> %dx.umad +} + +; Function Attrs: nocallback nofree nosync nounwind willreturn +declare <4 x i32> @llvm.dx.umad.v4i32(<4 x i32>, <4 x i32>, <4 x i32>) #1 + +; Function Attrs: noinline nounwind optnone +define noundef <4 x i64> @umad_uint64_t4(<4 x i64> noundef %p0, <4 x i64> noundef %p1, <4 x i64> noundef %p2) #0 { +entry: + ; CHECK: extractelement <4 x i64> %p0, i64 0 + ; CHECK: extractelement <4 x i64> %p1, i64 0 + ; CHECK: extractelement <4 x i64> %p2, i64 0 + ; CHECK: call i64 @dx.op.tertiary.i64(i32 49, i64 %{{.*}}, i64 %{{.*}}, i64 %{{.*}}) #[[#ATTR]] + ; CHECK: extractelement <4 x i64> %p0, i64 1 + ; CHECK: extractelement <4 x i64> %p1, i64 1 + ; CHECK: extractelement <4 x i64> %p2, i64 1 + ; CHECK: call i64 @dx.op.tertiary.i64(i32 49, i64 %{{.*}}, i64 %{{.*}}, i64 %{{.*}}) #[[#ATTR]] + ; CHECK: extractelement <4 x i64> %p0, i64 2 + ; CHECK: extractelement <4 x i64> %p1, i64 2 + ; CHECK: extractelement <4 x i64> %p2, i64 2 + ; CHECK: call i64 @dx.op.tertiary.i64(i32 49, i64 %{{.*}}, i64 %{{.*}}, i64 %{{.*}}) #[[#ATTR]] + ; CHECK: extractelement <4 x i64> %p0, i64 3 + ; CHECK: extractelement <4 x i64> %p1, i64 3 + ; CHECK: extractelement <4 x i64> %p2, i64 3 + ; CHECK: call i64 @dx.op.tertiary.i64(i32 49, i64 %{{.*}}, i64 %{{.*}}, i64 %{{.*}}) #[[#ATTR]] + ; CHECK: insertelement <4 x i64> poison, i64 %{{.*}}, i64 0 + ; CHECK: insertelement <4 x i64> %{{.*}}, i64 %{{.*}}, i64 1 + ; CHECK: insertelement <4 x i64> %{{.*}}, i64 %{{.*}}, i64 2 + ; CHECK: insertelement <4 x i64> %{{.*}}, i64 %{{.*}}, i64 3 + %dx.umad = call <4 x i64> @llvm.dx.umad.v4i64(<4 x i64> %p0, <4 x i64> %p1, <4 x i64> %p2) + ret <4 x i64> %dx.umad +} + +; Function Attrs: nocallback nofree nosync nounwind willreturn +declare <4 x i64> @llvm.dx.umad.v4i64(<4 x i64>, <4 x i64>, <4 x i64>) #1 + +; CHECK: attributes #[[#ATTR]] = {{{.*}} memory(none) {{.*}}} |