diff options
Diffstat (limited to 'llvm/test')
| -rw-r--r-- | llvm/test/Analysis/DXILResource/buffer-frombinding.ll | 19 | ||||
| -rw-r--r-- | llvm/test/CodeGen/DirectX/Metadata/srv_metadata.ll | 19 | ||||
| -rw-r--r-- | llvm/test/CodeGen/DirectX/Metadata/uav_metadata.ll | 17 | ||||
| -rw-r--r-- | llvm/test/CodeGen/LoongArch/ir-instruction/flog2.ll | 32 | ||||
| -rw-r--r-- | llvm/test/CodeGen/LoongArch/lasx/ir-instruction/flog2.ll | 264 | ||||
| -rw-r--r-- | llvm/test/CodeGen/LoongArch/lsx/ir-instruction/flog2.ll | 162 | ||||
| -rw-r--r-- | llvm/test/MC/AMDGPU/buffer-op-swz-operand.s | 43 | ||||
| -rw-r--r-- | llvm/test/Other/print-on-crash.ll | 2 | ||||
| -rw-r--r-- | llvm/test/Transforms/InstCombine/fold-selective-shift.ll | 22 | ||||
| -rw-r--r-- | llvm/test/Transforms/InstCombine/sext-of-trunc-nsw.ll | 11 | ||||
| -rw-r--r-- | llvm/test/Transforms/InstCombine/trunc-sext.ll | 16 | ||||
| -rw-r--r-- | llvm/test/tools/llvm-dwarfdump/X86/type_units_split_dwp_v4.s | 245 | ||||
| -rw-r--r-- | llvm/test/tools/llvm-mca/AMDGPU/buffer-op-swz-operand.s | 45 |
13 files changed, 877 insertions, 20 deletions
diff --git a/llvm/test/Analysis/DXILResource/buffer-frombinding.ll b/llvm/test/Analysis/DXILResource/buffer-frombinding.ll index ab1945d..d92010e 100644 --- a/llvm/test/Analysis/DXILResource/buffer-frombinding.ll +++ b/llvm/test/Analysis/DXILResource/buffer-frombinding.ll @@ -9,6 +9,7 @@ @Four.str = private unnamed_addr constant [5 x i8] c"Four\00", align 1 @Array.str = private unnamed_addr constant [6 x i8] c"Array\00", align 1 @Five.str = private unnamed_addr constant [5 x i8] c"Five\00", align 1 +@Six.str = private unnamed_addr constant [4 x i8] c"Six\00", align 1 @CB.str = private unnamed_addr constant [3 x i8] c"CB\00", align 1 @Constants.str = private unnamed_addr constant [10 x i8] c"Constants\00", align 1 @@ -137,6 +138,23 @@ define void @test_typedbuffer() { ; CHECK: Element Type: f32 ; CHECK: Element Count: 4 + %uav4 = call target("dx.TypedBuffer", double, 1, 0, 0) + @llvm.dx.resource.handlefrombinding(i32 5, i32 0, i32 1, i32 0, ptr @Six.str) + ; CHECK: Resource [[UAV4:[0-9]+]]: + ; CHECK: Name: Six + ; CHECK: Binding: + ; CHECK: Record ID: 4 + ; CHECK: Space: 5 + ; CHECK: Lower Bound: 0 + ; CHECK: Size: 1 + ; CHECK: Globally Coherent: 0 + ; CHECK: Counter Direction: Unknown + ; CHECK: Class: UAV + ; CHECK: Kind: Buffer + ; CHECK: IsROV: 0 + ; CHECK: Element Type: f64 (stored as u32) + ; CHECK: Element Count: 1 + %cb0 = call target("dx.CBuffer", {float}) @llvm.dx.resource.handlefrombinding(i32 1, i32 0, i32 1, i32 0, ptr @CB.str) ; CHECK: Resource [[CB0:[0-9]+]]: @@ -175,6 +193,7 @@ define void @test_typedbuffer() { ; CHECK-DAG: Call bound to [[UAV1]]: %uav1 = ; CHECK-DAG: Call bound to [[UAV2]]: %uav2_1 = ; CHECK-DAG: Call bound to [[UAV2]]: %uav2_2 = +; CHECK-DAG: Call bound to [[UAV4]]: %uav4 = ; CHECK-DAG: Call bound to [[CB0]]: %cb0 = ; CHECK-DAG: Call bound to [[CB1]]: %cb1 = diff --git a/llvm/test/CodeGen/DirectX/Metadata/srv_metadata.ll b/llvm/test/CodeGen/DirectX/Metadata/srv_metadata.ll index a2059be..0062f90 100644 --- a/llvm/test/CodeGen/DirectX/Metadata/srv_metadata.ll +++ b/llvm/test/CodeGen/DirectX/Metadata/srv_metadata.ll @@ -22,14 +22,14 @@ target triple = "dxil-pc-shadermodel6.6-compute" ; PRINT-NEXT:; ------------------------------ ---------- ------- ----------- ------- -------------- --------- ; PRINT-NEXT:; Zero texture f16 buf T0 t0 1 ; PRINT-NEXT:; One texture f32 buf T1 t1 1 -; PRINT-NEXT:; Two texture f64 buf T2 t2 1 +; PRINT-NEXT:; Two texture u32 buf T2 t2 1 ; PRINT-NEXT:; Three texture i32 buf T3 t3 1 ; PRINT-NEXT:; Four texture byte r/o T4 t5 1 ; PRINT-NEXT:; Five texture struct r/o T5 t6 1 -; PRINT-NEXT:; Six texture u64 buf T6 t10,space2 1 +; PRINT-NEXT:; Six texture u32 buf T6 t10,space2 1 ; PRINT-NEXT:; Array texture f32 buf T7 t4,space3 100 -; PRINT-NEXT:; Array2 texture f64 buf T8 t2,space4 unbounded -; PRINT-NEXT:; Seven texture u64 buf T9 t20,space5 1 +; PRINT-NEXT:; Array2 texture u32 buf T8 t2,space4 unbounded +; PRINT-NEXT:; Seven texture u32 buf T9 t20,space5 1 ; define void @test() #0 { @@ -120,15 +120,14 @@ attributes #0 = { noinline nounwind "hlsl.shader"="compute" } ; CHECK: ![[Half]] = !{i32 0, i32 8} ; CHECK: ![[One]] = !{i32 1, ptr @One, !"One", i32 0, i32 1, i32 1, i32 10, i32 0, ![[Float:[0-9]+]]} ; CHECK: ![[Float]] = !{i32 0, i32 9} -; CHECK: ![[Two]] = !{i32 2, ptr @Two, !"Two", i32 0, i32 2, i32 1, i32 10, i32 0, ![[Double:[0-9]+]]} -; CHECK: ![[Double]] = !{i32 0, i32 10} +; CHECK: ![[Two]] = !{i32 2, ptr @Two, !"Two", i32 0, i32 2, i32 1, i32 10, i32 0, ![[U32:[0-9]+]]} +; CHECK: ![[U32]] = !{i32 0, i32 5} ; CHECK: ![[Three]] = !{i32 3, ptr @Three, !"Three", i32 0, i32 3, i32 1, i32 10, i32 0, ![[I32:[0-9]+]]} ; CHECK: ![[I32]] = !{i32 0, i32 4} ; CHECK: ![[Four]] = !{i32 4, ptr @Four, !"Four", i32 0, i32 5, i32 1, i32 11, i32 0, null} ; CHECK: ![[Five]] = !{i32 5, ptr @Five, !"Five", i32 0, i32 6, i32 1, i32 12, i32 0, ![[FiveStride:[0-9]+]]} ; CHECK: ![[FiveStride]] = !{i32 1, i32 2} -; CHECK: ![[Six]] = !{i32 6, ptr @Six, !"Six", i32 2, i32 10, i32 1, i32 10, i32 0, ![[U64:[0-9]+]]} -; CHECK: ![[U64]] = !{i32 0, i32 7} +; CHECK: ![[Six]] = !{i32 6, ptr @Six, !"Six", i32 2, i32 10, i32 1, i32 10, i32 0, ![[U32:[0-9]+]]} ; CHECK: ![[Array]] = !{i32 7, ptr @Array, !"Array", i32 3, i32 4, i32 100, i32 10, i32 0, ![[Float]]} -; CHECK: ![[Array2]] = !{i32 8, ptr @Array2, !"Array2", i32 4, i32 2, i32 -1, i32 10, i32 0, ![[Double]]} -; CHECK: ![[Seven]] = !{i32 9, ptr @Seven, !"Seven", i32 5, i32 20, i32 1, i32 10, i32 0, ![[U64]]} +; CHECK: ![[Array2]] = !{i32 8, ptr @Array2, !"Array2", i32 4, i32 2, i32 -1, i32 10, i32 0, ![[U32]]} +; CHECK: ![[Seven]] = !{i32 9, ptr @Seven, !"Seven", i32 5, i32 20, i32 1, i32 10, i32 0, ![[U32]]} diff --git a/llvm/test/CodeGen/DirectX/Metadata/uav_metadata.ll b/llvm/test/CodeGen/DirectX/Metadata/uav_metadata.ll index 5b2b3ef..d377a52 100644 --- a/llvm/test/CodeGen/DirectX/Metadata/uav_metadata.ll +++ b/llvm/test/CodeGen/DirectX/Metadata/uav_metadata.ll @@ -25,17 +25,17 @@ target triple = "dxil-pc-shadermodel6.6-compute" ; PRINT-NEXT:; ------------------------------ ---------- ------- ----------- ------- -------------- --------- ; PRINT-NEXT:; Zero UAV f16 buf U0 u0 1 ; PRINT-NEXT:; One UAV f32 buf U1 u1 1 -; PRINT-NEXT:; Two UAV f64 buf U2 u2 1 +; PRINT-NEXT:; Two UAV u32 buf U2 u2 1 ; PRINT-NEXT:; Three UAV i32 buf U3 u3 1 ; PRINT-NEXT:; Four UAV byte r/w U4 u5 1 ; PRINT-NEXT:; Five UAV struct r/w U5 u6 1 ; PRINT-NEXT:; Six UAV i32 buf U6 u7 1 ; PRINT-NEXT:; Seven UAV struct r/w U7 u8 1 ; PRINT-NEXT:; Eight UAV byte r/w U8 u9 1 -; PRINT-NEXT:; Nine UAV u64 buf U9 u10,space2 1 +; PRINT-NEXT:; Nine UAV u32 buf U9 u10,space2 1 ; PRINT-NEXT:; Array UAV f32 buf U10 u4,space3 100 -; PRINT-NEXT:; Array2 UAV f64 buf U11 u2,space4 unbounded -; PRINT-NEXT:; Ten UAV u64 buf U12 u22,space5 1 +; PRINT-NEXT:; Array2 UAV u32 buf U11 u2,space4 unbounded +; PRINT-NEXT:; Ten UAV u32 buf U12 u22,space5 1 define void @test() #0 { ; RWBuffer<half4> Zero : register(u0) @@ -144,8 +144,8 @@ attributes #0 = { noinline nounwind "hlsl.shader"="compute" } ; CHECK: ![[Half]] = !{i32 0, i32 8} ; CHECK: ![[One]] = !{i32 1, ptr @One, !"One", i32 0, i32 1, i32 1, i32 10, i1 false, i1 false, i1 false, ![[Float:[0-9]+]]} ; CHECK: ![[Float]] = !{i32 0, i32 9} -; CHECK: ![[Two]] = !{i32 2, ptr @Two, !"Two", i32 0, i32 2, i32 1, i32 10, i1 false, i1 false, i1 false, ![[Double:[0-9]+]]} -; CHECK: ![[Double]] = !{i32 0, i32 10} +; CHECK: ![[Two]] = !{i32 2, ptr @Two, !"Two", i32 0, i32 2, i32 1, i32 10, i1 false, i1 false, i1 false, ![[U32:[0-9]+]]} +; CHECK: ![[U32]] = !{i32 0, i32 5} ; CHECK: ![[Three]] = !{i32 3, ptr @Three, !"Three", i32 0, i32 3, i32 1, i32 10, i1 false, i1 false, i1 false, ![[I32:[0-9]+]]} ; CHECK: ![[I32]] = !{i32 0, i32 4} ; CHECK: ![[Four]] = !{i32 4, ptr @Four, !"Four", i32 0, i32 5, i32 1, i32 11, i1 false, i1 false, i1 false, null} @@ -155,8 +155,7 @@ attributes #0 = { noinline nounwind "hlsl.shader"="compute" } ; CHECK: ![[Seven]] = !{i32 7, ptr @Seven, !"Seven", i32 0, i32 8, i32 1, i32 12, i1 false, i1 false, i1 true, ![[SevenStride:[0-9]+]]} ; CHECK: ![[SevenStride]] = !{i32 1, i32 16} ; CHECK: ![[Eight]] = !{i32 8, ptr @Eight, !"Eight", i32 0, i32 9, i32 1, i32 11, i1 false, i1 false, i1 true, null} -; CHECK: ![[Nine]] = !{i32 9, ptr @Nine, !"Nine", i32 2, i32 10, i32 1, i32 10, i1 false, i1 false, i1 false, ![[U64:[0-9]+]]} -; CHECK: ![[U64]] = !{i32 0, i32 7} +; CHECK: ![[Nine]] = !{i32 9, ptr @Nine, !"Nine", i32 2, i32 10, i32 1, i32 10, i1 false, i1 false, i1 false, ![[U32]]} ; CHECK: ![[Array]] = !{i32 10, ptr @Array, !"Array", i32 3, i32 4, i32 100, i32 10, i1 false, i1 false, i1 false, ![[Float]]} -; CHECK: ![[Array2]] = !{i32 11, ptr @Array2, !"Array2", i32 4, i32 2, i32 -1, i32 10, i1 false, i1 false, i1 false, ![[Double]]} +; CHECK: ![[Array2]] = !{i32 11, ptr @Array2, !"Array2", i32 4, i32 2, i32 -1, i32 10, i1 false, i1 false, i1 false, ![[U32]]} ; CHECK: ![[Ten]] = !{i32 12, ptr @Ten, !"Ten", i32 5, i32 22, i32 1, i32 10, i1 false, i1 false, i1 false, ![[U64:[0-9]+]]} diff --git a/llvm/test/CodeGen/LoongArch/ir-instruction/flog2.ll b/llvm/test/CodeGen/LoongArch/ir-instruction/flog2.ll new file mode 100644 index 0000000..93fcd42 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/ir-instruction/flog2.ll @@ -0,0 +1,32 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6 +; RUN: llc --mtriple=loongarch32 --mattr=+d < %s | FileCheck %s --check-prefix=LA32 +; RUN: llc --mtriple=loongarch64 --mattr=+d < %s | FileCheck %s --check-prefix=LA64 + +declare float @llvm.log2.f32(float) +declare double @llvm.log2.f64(double) + +define float @flog2_s(float %x) nounwind { +; LA32-LABEL: flog2_s: +; LA32: # %bb.0: +; LA32-NEXT: b log2f +; +; LA64-LABEL: flog2_s: +; LA64: # %bb.0: +; LA64-NEXT: pcaddu18i $t8, %call36(log2f) +; LA64-NEXT: jr $t8 + %y = call float @llvm.log2.f32(float %x) + ret float %y +} + +define double @flog2_d(double %x) nounwind { +; LA32-LABEL: flog2_d: +; LA32: # %bb.0: +; LA32-NEXT: b log2 +; +; LA64-LABEL: flog2_d: +; LA64: # %bb.0: +; LA64-NEXT: pcaddu18i $t8, %call36(log2) +; LA64-NEXT: jr $t8 + %y = call double @llvm.log2.f64(double %x) + ret double %y +} diff --git a/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/flog2.ll b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/flog2.ll new file mode 100644 index 0000000..68f2e3a --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/flog2.ll @@ -0,0 +1,264 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6 +; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lasx < %s | FileCheck %s --check-prefix=LA32 +; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s --check-prefix=LA64 + +declare <8 x float> @llvm.log2.v8f32(<8 x float>) +declare <4 x double> @llvm.log2.v4f64(<4 x double>) + +define void @flog2_v8f32(ptr %res, ptr %a) nounwind { +; LA32-LABEL: flog2_v8f32: +; LA32: # %bb.0: # %entry +; LA32-NEXT: addi.w $sp, $sp, -128 +; LA32-NEXT: st.w $ra, $sp, 124 # 4-byte Folded Spill +; LA32-NEXT: st.w $fp, $sp, 120 # 4-byte Folded Spill +; LA32-NEXT: xvld $xr0, $a1, 0 +; LA32-NEXT: xvst $xr0, $sp, 80 # 32-byte Folded Spill +; LA32-NEXT: move $fp, $a0 +; LA32-NEXT: xvpickve.w $xr0, $xr0, 5 +; LA32-NEXT: # kill: def $f0 killed $f0 killed $xr0 +; LA32-NEXT: bl log2f +; LA32-NEXT: # kill: def $f0 killed $f0 def $vr0 +; LA32-NEXT: vst $vr0, $sp, 48 # 16-byte Folded Spill +; LA32-NEXT: xvld $xr0, $sp, 80 # 32-byte Folded Reload +; LA32-NEXT: xvpickve.w $xr0, $xr0, 4 +; LA32-NEXT: # kill: def $f0 killed $f0 killed $xr0 +; LA32-NEXT: bl log2f +; LA32-NEXT: # kill: def $f0 killed $f0 def $xr0 +; LA32-NEXT: vld $vr1, $sp, 48 # 16-byte Folded Reload +; LA32-NEXT: vextrins.w $vr0, $vr1, 16 +; LA32-NEXT: xvst $xr0, $sp, 48 # 32-byte Folded Spill +; LA32-NEXT: xvld $xr0, $sp, 80 # 32-byte Folded Reload +; LA32-NEXT: xvpickve.w $xr0, $xr0, 6 +; LA32-NEXT: # kill: def $f0 killed $f0 killed $xr0 +; LA32-NEXT: bl log2f +; LA32-NEXT: # kill: def $f0 killed $f0 def $vr0 +; LA32-NEXT: xvld $xr1, $sp, 48 # 32-byte Folded Reload +; LA32-NEXT: vextrins.w $vr1, $vr0, 32 +; LA32-NEXT: xvst $xr1, $sp, 48 # 32-byte Folded Spill +; LA32-NEXT: xvld $xr0, $sp, 80 # 32-byte Folded Reload +; LA32-NEXT: xvpickve.w $xr0, $xr0, 7 +; LA32-NEXT: # kill: def $f0 killed $f0 killed $xr0 +; LA32-NEXT: bl log2f +; LA32-NEXT: # kill: def $f0 killed $f0 def $vr0 +; LA32-NEXT: xvld $xr1, $sp, 48 # 32-byte Folded Reload +; LA32-NEXT: vextrins.w $vr1, $vr0, 48 +; LA32-NEXT: xvst $xr1, $sp, 48 # 32-byte Folded Spill +; LA32-NEXT: xvld $xr0, $sp, 80 # 32-byte Folded Reload +; LA32-NEXT: xvpickve.w $xr0, $xr0, 1 +; LA32-NEXT: # kill: def $f0 killed $f0 killed $xr0 +; LA32-NEXT: bl log2f +; LA32-NEXT: # kill: def $f0 killed $f0 def $vr0 +; LA32-NEXT: vst $vr0, $sp, 16 # 16-byte Folded Spill +; LA32-NEXT: xvld $xr0, $sp, 80 # 32-byte Folded Reload +; LA32-NEXT: xvpickve.w $xr0, $xr0, 0 +; LA32-NEXT: # kill: def $f0 killed $f0 killed $xr0 +; LA32-NEXT: bl log2f +; LA32-NEXT: # kill: def $f0 killed $f0 def $xr0 +; LA32-NEXT: vld $vr1, $sp, 16 # 16-byte Folded Reload +; LA32-NEXT: vextrins.w $vr0, $vr1, 16 +; LA32-NEXT: xvst $xr0, $sp, 16 # 32-byte Folded Spill +; LA32-NEXT: xvld $xr0, $sp, 80 # 32-byte Folded Reload +; LA32-NEXT: xvpickve.w $xr0, $xr0, 2 +; LA32-NEXT: # kill: def $f0 killed $f0 killed $xr0 +; LA32-NEXT: bl log2f +; LA32-NEXT: # kill: def $f0 killed $f0 def $vr0 +; LA32-NEXT: xvld $xr1, $sp, 16 # 32-byte Folded Reload +; LA32-NEXT: vextrins.w $vr1, $vr0, 32 +; LA32-NEXT: xvst $xr1, $sp, 16 # 32-byte Folded Spill +; LA32-NEXT: xvld $xr0, $sp, 80 # 32-byte Folded Reload +; LA32-NEXT: xvpickve.w $xr0, $xr0, 3 +; LA32-NEXT: # kill: def $f0 killed $f0 killed $xr0 +; LA32-NEXT: bl log2f +; LA32-NEXT: # kill: def $f0 killed $f0 def $vr0 +; LA32-NEXT: xvld $xr1, $sp, 16 # 32-byte Folded Reload +; LA32-NEXT: vextrins.w $vr1, $vr0, 48 +; LA32-NEXT: xvld $xr0, $sp, 48 # 32-byte Folded Reload +; LA32-NEXT: xvpermi.q $xr1, $xr0, 2 +; LA32-NEXT: xvst $xr1, $fp, 0 +; LA32-NEXT: ld.w $fp, $sp, 120 # 4-byte Folded Reload +; LA32-NEXT: ld.w $ra, $sp, 124 # 4-byte Folded Reload +; LA32-NEXT: addi.w $sp, $sp, 128 +; LA32-NEXT: ret +; +; LA64-LABEL: flog2_v8f32: +; LA64: # %bb.0: # %entry +; LA64-NEXT: addi.d $sp, $sp, -128 +; LA64-NEXT: st.d $ra, $sp, 120 # 8-byte Folded Spill +; LA64-NEXT: st.d $fp, $sp, 112 # 8-byte Folded Spill +; LA64-NEXT: xvld $xr0, $a1, 0 +; LA64-NEXT: xvst $xr0, $sp, 80 # 32-byte Folded Spill +; LA64-NEXT: move $fp, $a0 +; LA64-NEXT: xvpickve.w $xr0, $xr0, 5 +; LA64-NEXT: # kill: def $f0 killed $f0 killed $xr0 +; LA64-NEXT: pcaddu18i $ra, %call36(log2f) +; LA64-NEXT: jirl $ra, $ra, 0 +; LA64-NEXT: # kill: def $f0 killed $f0 def $vr0 +; LA64-NEXT: vst $vr0, $sp, 48 # 16-byte Folded Spill +; LA64-NEXT: xvld $xr0, $sp, 80 # 32-byte Folded Reload +; LA64-NEXT: xvpickve.w $xr0, $xr0, 4 +; LA64-NEXT: # kill: def $f0 killed $f0 killed $xr0 +; LA64-NEXT: pcaddu18i $ra, %call36(log2f) +; LA64-NEXT: jirl $ra, $ra, 0 +; LA64-NEXT: # kill: def $f0 killed $f0 def $xr0 +; LA64-NEXT: vld $vr1, $sp, 48 # 16-byte Folded Reload +; LA64-NEXT: vextrins.w $vr0, $vr1, 16 +; LA64-NEXT: xvst $xr0, $sp, 48 # 32-byte Folded Spill +; LA64-NEXT: xvld $xr0, $sp, 80 # 32-byte Folded Reload +; LA64-NEXT: xvpickve.w $xr0, $xr0, 6 +; LA64-NEXT: # kill: def $f0 killed $f0 killed $xr0 +; LA64-NEXT: pcaddu18i $ra, %call36(log2f) +; LA64-NEXT: jirl $ra, $ra, 0 +; LA64-NEXT: # kill: def $f0 killed $f0 def $vr0 +; LA64-NEXT: xvld $xr1, $sp, 48 # 32-byte Folded Reload +; LA64-NEXT: vextrins.w $vr1, $vr0, 32 +; LA64-NEXT: xvst $xr1, $sp, 48 # 32-byte Folded Spill +; LA64-NEXT: xvld $xr0, $sp, 80 # 32-byte Folded Reload +; LA64-NEXT: xvpickve.w $xr0, $xr0, 7 +; LA64-NEXT: # kill: def $f0 killed $f0 killed $xr0 +; LA64-NEXT: pcaddu18i $ra, %call36(log2f) +; LA64-NEXT: jirl $ra, $ra, 0 +; LA64-NEXT: # kill: def $f0 killed $f0 def $vr0 +; LA64-NEXT: xvld $xr1, $sp, 48 # 32-byte Folded Reload +; LA64-NEXT: vextrins.w $vr1, $vr0, 48 +; LA64-NEXT: xvst $xr1, $sp, 48 # 32-byte Folded Spill +; LA64-NEXT: xvld $xr0, $sp, 80 # 32-byte Folded Reload +; LA64-NEXT: xvpickve.w $xr0, $xr0, 1 +; LA64-NEXT: # kill: def $f0 killed $f0 killed $xr0 +; LA64-NEXT: pcaddu18i $ra, %call36(log2f) +; LA64-NEXT: jirl $ra, $ra, 0 +; LA64-NEXT: # kill: def $f0 killed $f0 def $vr0 +; LA64-NEXT: vst $vr0, $sp, 16 # 16-byte Folded Spill +; LA64-NEXT: xvld $xr0, $sp, 80 # 32-byte Folded Reload +; LA64-NEXT: xvpickve.w $xr0, $xr0, 0 +; LA64-NEXT: # kill: def $f0 killed $f0 killed $xr0 +; LA64-NEXT: pcaddu18i $ra, %call36(log2f) +; LA64-NEXT: jirl $ra, $ra, 0 +; LA64-NEXT: # kill: def $f0 killed $f0 def $xr0 +; LA64-NEXT: vld $vr1, $sp, 16 # 16-byte Folded Reload +; LA64-NEXT: vextrins.w $vr0, $vr1, 16 +; LA64-NEXT: xvst $xr0, $sp, 16 # 32-byte Folded Spill +; LA64-NEXT: xvld $xr0, $sp, 80 # 32-byte Folded Reload +; LA64-NEXT: xvpickve.w $xr0, $xr0, 2 +; LA64-NEXT: # kill: def $f0 killed $f0 killed $xr0 +; LA64-NEXT: pcaddu18i $ra, %call36(log2f) +; LA64-NEXT: jirl $ra, $ra, 0 +; LA64-NEXT: # kill: def $f0 killed $f0 def $vr0 +; LA64-NEXT: xvld $xr1, $sp, 16 # 32-byte Folded Reload +; LA64-NEXT: vextrins.w $vr1, $vr0, 32 +; LA64-NEXT: xvst $xr1, $sp, 16 # 32-byte Folded Spill +; LA64-NEXT: xvld $xr0, $sp, 80 # 32-byte Folded Reload +; LA64-NEXT: xvpickve.w $xr0, $xr0, 3 +; LA64-NEXT: # kill: def $f0 killed $f0 killed $xr0 +; LA64-NEXT: pcaddu18i $ra, %call36(log2f) +; LA64-NEXT: jirl $ra, $ra, 0 +; LA64-NEXT: # kill: def $f0 killed $f0 def $vr0 +; LA64-NEXT: xvld $xr1, $sp, 16 # 32-byte Folded Reload +; LA64-NEXT: vextrins.w $vr1, $vr0, 48 +; LA64-NEXT: xvld $xr0, $sp, 48 # 32-byte Folded Reload +; LA64-NEXT: xvpermi.q $xr1, $xr0, 2 +; LA64-NEXT: xvst $xr1, $fp, 0 +; LA64-NEXT: ld.d $fp, $sp, 112 # 8-byte Folded Reload +; LA64-NEXT: ld.d $ra, $sp, 120 # 8-byte Folded Reload +; LA64-NEXT: addi.d $sp, $sp, 128 +; LA64-NEXT: ret +entry: + %v = load <8 x float>, ptr %a + %r = call <8 x float> @llvm.log2.v8f32(<8 x float> %v) + store <8 x float> %r, ptr %res + ret void +} + +define void @flog2_v4f64(ptr %res, ptr %a) nounwind { +; LA32-LABEL: flog2_v4f64: +; LA32: # %bb.0: # %entry +; LA32-NEXT: addi.w $sp, $sp, -112 +; LA32-NEXT: st.w $ra, $sp, 108 # 4-byte Folded Spill +; LA32-NEXT: st.w $fp, $sp, 104 # 4-byte Folded Spill +; LA32-NEXT: xvld $xr0, $a1, 0 +; LA32-NEXT: xvst $xr0, $sp, 64 # 32-byte Folded Spill +; LA32-NEXT: move $fp, $a0 +; LA32-NEXT: xvpickve.d $xr0, $xr0, 3 +; LA32-NEXT: # kill: def $f0_64 killed $f0_64 killed $xr0 +; LA32-NEXT: bl log2 +; LA32-NEXT: # kill: def $f0_64 killed $f0_64 def $vr0 +; LA32-NEXT: vst $vr0, $sp, 32 # 16-byte Folded Spill +; LA32-NEXT: xvld $xr0, $sp, 64 # 32-byte Folded Reload +; LA32-NEXT: xvpickve.d $xr0, $xr0, 2 +; LA32-NEXT: # kill: def $f0_64 killed $f0_64 killed $xr0 +; LA32-NEXT: bl log2 +; LA32-NEXT: # kill: def $f0_64 killed $f0_64 def $xr0 +; LA32-NEXT: vld $vr1, $sp, 32 # 16-byte Folded Reload +; LA32-NEXT: vextrins.d $vr0, $vr1, 16 +; LA32-NEXT: xvst $xr0, $sp, 32 # 32-byte Folded Spill +; LA32-NEXT: xvld $xr0, $sp, 64 # 32-byte Folded Reload +; LA32-NEXT: xvpickve.d $xr0, $xr0, 1 +; LA32-NEXT: # kill: def $f0_64 killed $f0_64 killed $xr0 +; LA32-NEXT: bl log2 +; LA32-NEXT: # kill: def $f0_64 killed $f0_64 def $vr0 +; LA32-NEXT: vst $vr0, $sp, 16 # 16-byte Folded Spill +; LA32-NEXT: xvld $xr0, $sp, 64 # 32-byte Folded Reload +; LA32-NEXT: xvpickve.d $xr0, $xr0, 0 +; LA32-NEXT: # kill: def $f0_64 killed $f0_64 killed $xr0 +; LA32-NEXT: bl log2 +; LA32-NEXT: # kill: def $f0_64 killed $f0_64 def $xr0 +; LA32-NEXT: vld $vr1, $sp, 16 # 16-byte Folded Reload +; LA32-NEXT: vextrins.d $vr0, $vr1, 16 +; LA32-NEXT: xvld $xr1, $sp, 32 # 32-byte Folded Reload +; LA32-NEXT: xvpermi.q $xr0, $xr1, 2 +; LA32-NEXT: xvst $xr0, $fp, 0 +; LA32-NEXT: ld.w $fp, $sp, 104 # 4-byte Folded Reload +; LA32-NEXT: ld.w $ra, $sp, 108 # 4-byte Folded Reload +; LA32-NEXT: addi.w $sp, $sp, 112 +; LA32-NEXT: ret +; +; LA64-LABEL: flog2_v4f64: +; LA64: # %bb.0: # %entry +; LA64-NEXT: addi.d $sp, $sp, -112 +; LA64-NEXT: st.d $ra, $sp, 104 # 8-byte Folded Spill +; LA64-NEXT: st.d $fp, $sp, 96 # 8-byte Folded Spill +; LA64-NEXT: xvld $xr0, $a1, 0 +; LA64-NEXT: xvst $xr0, $sp, 64 # 32-byte Folded Spill +; LA64-NEXT: move $fp, $a0 +; LA64-NEXT: xvpickve.d $xr0, $xr0, 3 +; LA64-NEXT: # kill: def $f0_64 killed $f0_64 killed $xr0 +; LA64-NEXT: pcaddu18i $ra, %call36(log2) +; LA64-NEXT: jirl $ra, $ra, 0 +; LA64-NEXT: # kill: def $f0_64 killed $f0_64 def $vr0 +; LA64-NEXT: vst $vr0, $sp, 32 # 16-byte Folded Spill +; LA64-NEXT: xvld $xr0, $sp, 64 # 32-byte Folded Reload +; LA64-NEXT: xvpickve.d $xr0, $xr0, 2 +; LA64-NEXT: # kill: def $f0_64 killed $f0_64 killed $xr0 +; LA64-NEXT: pcaddu18i $ra, %call36(log2) +; LA64-NEXT: jirl $ra, $ra, 0 +; LA64-NEXT: # kill: def $f0_64 killed $f0_64 def $xr0 +; LA64-NEXT: vld $vr1, $sp, 32 # 16-byte Folded Reload +; LA64-NEXT: vextrins.d $vr0, $vr1, 16 +; LA64-NEXT: xvst $xr0, $sp, 32 # 32-byte Folded Spill +; LA64-NEXT: xvld $xr0, $sp, 64 # 32-byte Folded Reload +; LA64-NEXT: xvpickve.d $xr0, $xr0, 1 +; LA64-NEXT: # kill: def $f0_64 killed $f0_64 killed $xr0 +; LA64-NEXT: pcaddu18i $ra, %call36(log2) +; LA64-NEXT: jirl $ra, $ra, 0 +; LA64-NEXT: # kill: def $f0_64 killed $f0_64 def $vr0 +; LA64-NEXT: vst $vr0, $sp, 16 # 16-byte Folded Spill +; LA64-NEXT: xvld $xr0, $sp, 64 # 32-byte Folded Reload +; LA64-NEXT: xvpickve.d $xr0, $xr0, 0 +; LA64-NEXT: # kill: def $f0_64 killed $f0_64 killed $xr0 +; LA64-NEXT: pcaddu18i $ra, %call36(log2) +; LA64-NEXT: jirl $ra, $ra, 0 +; LA64-NEXT: # kill: def $f0_64 killed $f0_64 def $xr0 +; LA64-NEXT: vld $vr1, $sp, 16 # 16-byte Folded Reload +; LA64-NEXT: vextrins.d $vr0, $vr1, 16 +; LA64-NEXT: xvld $xr1, $sp, 32 # 32-byte Folded Reload +; LA64-NEXT: xvpermi.q $xr0, $xr1, 2 +; LA64-NEXT: xvst $xr0, $fp, 0 +; LA64-NEXT: ld.d $fp, $sp, 96 # 8-byte Folded Reload +; LA64-NEXT: ld.d $ra, $sp, 104 # 8-byte Folded Reload +; LA64-NEXT: addi.d $sp, $sp, 112 +; LA64-NEXT: ret +entry: + %v = load <4 x double>, ptr %a + %r = call <4 x double> @llvm.log2.v4f64(<4 x double> %v) + store <4 x double> %r, ptr %res + ret void +} diff --git a/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/flog2.ll b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/flog2.ll new file mode 100644 index 0000000..e5e75ec --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/flog2.ll @@ -0,0 +1,162 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6 +; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lsx < %s | FileCheck %s --check-prefix=LA32 +; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s --check-prefix=LA64 + +declare <4 x float> @llvm.log2.v4f32(<4 x float>) +declare <2 x double> @llvm.log2.v2f64(<2 x double>) + +define void @flog2_v4f32(ptr %res, ptr %a) nounwind { +; LA32-LABEL: flog2_v4f32: +; LA32: # %bb.0: # %entry +; LA32-NEXT: addi.w $sp, $sp, -48 +; LA32-NEXT: st.w $ra, $sp, 44 # 4-byte Folded Spill +; LA32-NEXT: st.w $fp, $sp, 40 # 4-byte Folded Spill +; LA32-NEXT: vld $vr0, $a1, 0 +; LA32-NEXT: vst $vr0, $sp, 16 # 16-byte Folded Spill +; LA32-NEXT: move $fp, $a0 +; LA32-NEXT: vreplvei.w $vr0, $vr0, 1 +; LA32-NEXT: # kill: def $f0 killed $f0 killed $vr0 +; LA32-NEXT: bl log2f +; LA32-NEXT: # kill: def $f0 killed $f0 def $vr0 +; LA32-NEXT: vst $vr0, $sp, 0 # 16-byte Folded Spill +; LA32-NEXT: vld $vr0, $sp, 16 # 16-byte Folded Reload +; LA32-NEXT: vreplvei.w $vr0, $vr0, 0 +; LA32-NEXT: # kill: def $f0 killed $f0 killed $vr0 +; LA32-NEXT: bl log2f +; LA32-NEXT: # kill: def $f0 killed $f0 def $vr0 +; LA32-NEXT: vld $vr1, $sp, 0 # 16-byte Folded Reload +; LA32-NEXT: vextrins.w $vr0, $vr1, 16 +; LA32-NEXT: vst $vr0, $sp, 0 # 16-byte Folded Spill +; LA32-NEXT: vld $vr0, $sp, 16 # 16-byte Folded Reload +; LA32-NEXT: vreplvei.w $vr0, $vr0, 2 +; LA32-NEXT: # kill: def $f0 killed $f0 killed $vr0 +; LA32-NEXT: bl log2f +; LA32-NEXT: # kill: def $f0 killed $f0 def $vr0 +; LA32-NEXT: vld $vr1, $sp, 0 # 16-byte Folded Reload +; LA32-NEXT: vextrins.w $vr1, $vr0, 32 +; LA32-NEXT: vst $vr1, $sp, 0 # 16-byte Folded Spill +; LA32-NEXT: vld $vr0, $sp, 16 # 16-byte Folded Reload +; LA32-NEXT: vreplvei.w $vr0, $vr0, 3 +; LA32-NEXT: # kill: def $f0 killed $f0 killed $vr0 +; LA32-NEXT: bl log2f +; LA32-NEXT: # kill: def $f0 killed $f0 def $vr0 +; LA32-NEXT: vld $vr1, $sp, 0 # 16-byte Folded Reload +; LA32-NEXT: vextrins.w $vr1, $vr0, 48 +; LA32-NEXT: vst $vr1, $fp, 0 +; LA32-NEXT: ld.w $fp, $sp, 40 # 4-byte Folded Reload +; LA32-NEXT: ld.w $ra, $sp, 44 # 4-byte Folded Reload +; LA32-NEXT: addi.w $sp, $sp, 48 +; LA32-NEXT: ret +; +; LA64-LABEL: flog2_v4f32: +; LA64: # %bb.0: # %entry +; LA64-NEXT: addi.d $sp, $sp, -48 +; LA64-NEXT: st.d $ra, $sp, 40 # 8-byte Folded Spill +; LA64-NEXT: st.d $fp, $sp, 32 # 8-byte Folded Spill +; LA64-NEXT: vld $vr0, $a1, 0 +; LA64-NEXT: vst $vr0, $sp, 16 # 16-byte Folded Spill +; LA64-NEXT: move $fp, $a0 +; LA64-NEXT: vreplvei.w $vr0, $vr0, 1 +; LA64-NEXT: # kill: def $f0 killed $f0 killed $vr0 +; LA64-NEXT: pcaddu18i $ra, %call36(log2f) +; LA64-NEXT: jirl $ra, $ra, 0 +; LA64-NEXT: # kill: def $f0 killed $f0 def $vr0 +; LA64-NEXT: vst $vr0, $sp, 0 # 16-byte Folded Spill +; LA64-NEXT: vld $vr0, $sp, 16 # 16-byte Folded Reload +; LA64-NEXT: vreplvei.w $vr0, $vr0, 0 +; LA64-NEXT: # kill: def $f0 killed $f0 killed $vr0 +; LA64-NEXT: pcaddu18i $ra, %call36(log2f) +; LA64-NEXT: jirl $ra, $ra, 0 +; LA64-NEXT: # kill: def $f0 killed $f0 def $vr0 +; LA64-NEXT: vld $vr1, $sp, 0 # 16-byte Folded Reload +; LA64-NEXT: vextrins.w $vr0, $vr1, 16 +; LA64-NEXT: vst $vr0, $sp, 0 # 16-byte Folded Spill +; LA64-NEXT: vld $vr0, $sp, 16 # 16-byte Folded Reload +; LA64-NEXT: vreplvei.w $vr0, $vr0, 2 +; LA64-NEXT: # kill: def $f0 killed $f0 killed $vr0 +; LA64-NEXT: pcaddu18i $ra, %call36(log2f) +; LA64-NEXT: jirl $ra, $ra, 0 +; LA64-NEXT: # kill: def $f0 killed $f0 def $vr0 +; LA64-NEXT: vld $vr1, $sp, 0 # 16-byte Folded Reload +; LA64-NEXT: vextrins.w $vr1, $vr0, 32 +; LA64-NEXT: vst $vr1, $sp, 0 # 16-byte Folded Spill +; LA64-NEXT: vld $vr0, $sp, 16 # 16-byte Folded Reload +; LA64-NEXT: vreplvei.w $vr0, $vr0, 3 +; LA64-NEXT: # kill: def $f0 killed $f0 killed $vr0 +; LA64-NEXT: pcaddu18i $ra, %call36(log2f) +; LA64-NEXT: jirl $ra, $ra, 0 +; LA64-NEXT: # kill: def $f0 killed $f0 def $vr0 +; LA64-NEXT: vld $vr1, $sp, 0 # 16-byte Folded Reload +; LA64-NEXT: vextrins.w $vr1, $vr0, 48 +; LA64-NEXT: vst $vr1, $fp, 0 +; LA64-NEXT: ld.d $fp, $sp, 32 # 8-byte Folded Reload +; LA64-NEXT: ld.d $ra, $sp, 40 # 8-byte Folded Reload +; LA64-NEXT: addi.d $sp, $sp, 48 +; LA64-NEXT: ret +entry: + %v = load <4 x float>, ptr %a + %r = call <4 x float> @llvm.log2.v4f32(<4 x float> %v) + store <4 x float> %r, ptr %res + ret void +} + +define void @flog2_v2f64(ptr %res, ptr %a) nounwind { +; LA32-LABEL: flog2_v2f64: +; LA32: # %bb.0: # %entry +; LA32-NEXT: addi.w $sp, $sp, -48 +; LA32-NEXT: st.w $ra, $sp, 44 # 4-byte Folded Spill +; LA32-NEXT: st.w $fp, $sp, 40 # 4-byte Folded Spill +; LA32-NEXT: vld $vr0, $a1, 0 +; LA32-NEXT: vst $vr0, $sp, 0 # 16-byte Folded Spill +; LA32-NEXT: move $fp, $a0 +; LA32-NEXT: vreplvei.d $vr0, $vr0, 1 +; LA32-NEXT: # kill: def $f0_64 killed $f0_64 killed $vr0 +; LA32-NEXT: bl log2 +; LA32-NEXT: # kill: def $f0_64 killed $f0_64 def $vr0 +; LA32-NEXT: vst $vr0, $sp, 16 # 16-byte Folded Spill +; LA32-NEXT: vld $vr0, $sp, 0 # 16-byte Folded Reload +; LA32-NEXT: vreplvei.d $vr0, $vr0, 0 +; LA32-NEXT: # kill: def $f0_64 killed $f0_64 killed $vr0 +; LA32-NEXT: bl log2 +; LA32-NEXT: # kill: def $f0_64 killed $f0_64 def $vr0 +; LA32-NEXT: vld $vr1, $sp, 16 # 16-byte Folded Reload +; LA32-NEXT: vextrins.d $vr0, $vr1, 16 +; LA32-NEXT: vst $vr0, $fp, 0 +; LA32-NEXT: ld.w $fp, $sp, 40 # 4-byte Folded Reload +; LA32-NEXT: ld.w $ra, $sp, 44 # 4-byte Folded Reload +; LA32-NEXT: addi.w $sp, $sp, 48 +; LA32-NEXT: ret +; +; LA64-LABEL: flog2_v2f64: +; LA64: # %bb.0: # %entry +; LA64-NEXT: addi.d $sp, $sp, -48 +; LA64-NEXT: st.d $ra, $sp, 40 # 8-byte Folded Spill +; LA64-NEXT: st.d $fp, $sp, 32 # 8-byte Folded Spill +; LA64-NEXT: vld $vr0, $a1, 0 +; LA64-NEXT: vst $vr0, $sp, 0 # 16-byte Folded Spill +; LA64-NEXT: move $fp, $a0 +; LA64-NEXT: vreplvei.d $vr0, $vr0, 1 +; LA64-NEXT: # kill: def $f0_64 killed $f0_64 killed $vr0 +; LA64-NEXT: pcaddu18i $ra, %call36(log2) +; LA64-NEXT: jirl $ra, $ra, 0 +; LA64-NEXT: # kill: def $f0_64 killed $f0_64 def $vr0 +; LA64-NEXT: vst $vr0, $sp, 16 # 16-byte Folded Spill +; LA64-NEXT: vld $vr0, $sp, 0 # 16-byte Folded Reload +; LA64-NEXT: vreplvei.d $vr0, $vr0, 0 +; LA64-NEXT: # kill: def $f0_64 killed $f0_64 killed $vr0 +; LA64-NEXT: pcaddu18i $ra, %call36(log2) +; LA64-NEXT: jirl $ra, $ra, 0 +; LA64-NEXT: # kill: def $f0_64 killed $f0_64 def $vr0 +; LA64-NEXT: vld $vr1, $sp, 16 # 16-byte Folded Reload +; LA64-NEXT: vextrins.d $vr0, $vr1, 16 +; LA64-NEXT: vst $vr0, $fp, 0 +; LA64-NEXT: ld.d $fp, $sp, 32 # 8-byte Folded Reload +; LA64-NEXT: ld.d $ra, $sp, 40 # 8-byte Folded Reload +; LA64-NEXT: addi.d $sp, $sp, 48 +; LA64-NEXT: ret +entry: + %v = load <2 x double>, ptr %a + %r = call <2 x double> @llvm.log2.v2f64(<2 x double> %v) + store <2 x double> %r, ptr %res + ret void +} diff --git a/llvm/test/MC/AMDGPU/buffer-op-swz-operand.s b/llvm/test/MC/AMDGPU/buffer-op-swz-operand.s new file mode 100644 index 0000000..8bd9148 --- /dev/null +++ b/llvm/test/MC/AMDGPU/buffer-op-swz-operand.s @@ -0,0 +1,43 @@ +// RUN: llvm-mc -triple=amdgcn-amd-amdhsa -mcpu=gfx1100 --show-inst < %s | FileCheck %s + +// CHECK: .amdgcn_target "amdgcn-amd-amdhsa--gfx1100" +buffer_load_dwordx4 v[0:3], v0, s[0:3], 0, offen offset:4092 slc +// CHECK: buffer_load_b128 v[0:3], v0, s[0:3], 0 offen offset:4092 slc ; <MCInst #13135 BUFFER_LOAD_DWORDX4_OFFEN_gfx11 +// CHECK-NEXT: ; <MCOperand Reg:10104> +// CHECK-NEXT: ; <MCOperand Reg:486> +// CHECK-NEXT: ; <MCOperand Reg:7754> +// CHECK-NEXT: ; <MCOperand Imm:0> +// CHECK-NEXT: ; <MCOperand Imm:4092> +// CHECK-NEXT: ; <MCOperand Imm:2> +// CHECK-NEXT: ; <MCOperand Imm:0>> +buffer_store_dword v0, v1, s[0:3], 0 offen slc +// CHECK: buffer_store_b32 v0, v1, s[0:3], 0 offen slc ; <MCInst #14553 BUFFER_STORE_DWORD_OFFEN_gfx11 +// CHECK-NEXT: ; <MCOperand Reg:486> +// CHECK-NEXT: ; <MCOperand Reg:487> +// CHECK-NEXT: ; <MCOperand Reg:7754> +// CHECK-NEXT: ; <MCOperand Imm:0> +// CHECK-NEXT: ; <MCOperand Imm:0> +// CHECK-NEXT: ; <MCOperand Imm:2> +// CHECK-NEXT: ; <MCOperand Imm:0>> + +; tbuffer ops use autogenerate asm parsers +tbuffer_load_format_xyzw v[0:3], v0, s[0:3], 0 format:[BUF_FMT_32_32_SINT] offen offset:4092 slc +// CHECK: tbuffer_load_format_xyzw v[0:3], v0, s[0:3], 0 format:[BUF_FMT_32_32_SINT] offen offset:4092 slc ; <MCInst #34095 TBUFFER_LOAD_FORMAT_XYZW_OFFEN_gfx11 +// CHECK-NEXT: ; <MCOperand Reg:10104> +// CHECK-NEXT: ; <MCOperand Reg:486> +// CHECK-NEXT: ; <MCOperand Reg:7754> +// CHECK-NEXT: ; <MCOperand Imm:0> +// CHECK-NEXT: ; <MCOperand Imm:4092> +// CHECK-NEXT: ; <MCOperand Imm:49> +// CHECK-NEXT: ; <MCOperand Imm:2> +// CHECK-NEXT: ; <MCOperand Imm:0>> +tbuffer_store_d16_format_x v0, v1, s[0:3], 0 format:[BUF_FMT_10_10_10_2_SNORM] offen slc +// CHECK: tbuffer_store_d16_format_x v0, v1, s[0:3], 0 format:[BUF_FMT_10_10_10_2_SNORM] offen slc ; <MCInst #34264 TBUFFER_STORE_FORMAT_D16_X_OFFEN_gfx11 +// CHECK-NEXT: ; <MCOperand Reg:486> +// CHECK-NEXT: ; <MCOperand Reg:487> +// CHECK-NEXT: ; <MCOperand Reg:7754> +// CHECK-NEXT: ; <MCOperand Imm:0> +// CHECK-NEXT: ; <MCOperand Imm:0> +// CHECK-NEXT: ; <MCOperand Imm:33> +// CHECK-NEXT: ; <MCOperand Imm:2> +// CHECK-NEXT: ; <MCOperand Imm:0>> diff --git a/llvm/test/Other/print-on-crash.ll b/llvm/test/Other/print-on-crash.ll index 565da0c..f1e3414 100644 --- a/llvm/test/Other/print-on-crash.ll +++ b/llvm/test/Other/print-on-crash.ll @@ -17,7 +17,7 @@ ; RUN: not --crash opt -print-on-crash -print-module-scope -passes=trigger-crash-module -filter-passes=blah < %s 2>&1 | FileCheck %s --check-prefix=CHECK_FILTERED -; CHECK_SIMPLE: *** Dump of IR Before Last Pass {{.*}} Started *** +; CHECK_SIMPLE: ; *** Dump of IR Before Last Pass {{.*}} Started *** ; CHECK_SIMPLE: @main ; CHECK_SIMPLE: entry: ; CHECK_NO_CRASH-NOT: *** Dump of IR diff --git a/llvm/test/Transforms/InstCombine/fold-selective-shift.ll b/llvm/test/Transforms/InstCombine/fold-selective-shift.ll index 2b22965..dcfd933 100644 --- a/llvm/test/Transforms/InstCombine/fold-selective-shift.ll +++ b/llvm/test/Transforms/InstCombine/fold-selective-shift.ll @@ -21,6 +21,28 @@ define i16 @selective_shift_16(i32 %mask, i16 %upper, i16 %lower) { ret i16 %trunc } +; Will assert if InsertPoint is not set before creating an instruction +; with IRBuilder +define i16 @selective_shift_16_insertpt(i32 %mask, i16 %upper, i16 %lower) { +; CHECK-LABEL: define i16 @selective_shift_16_insertpt( +; CHECK-SAME: i32 [[MASK:%.*]], i16 [[UPPER:%.*]], i16 [[LOWER:%.*]]) { +; CHECK-NEXT: [[MASK_BIT:%.*]] = and i32 [[MASK]], 16 +; CHECK-NEXT: [[MASK_BIT_Z:%.*]] = icmp eq i32 [[MASK_BIT]], 0 +; CHECK-NEXT: [[SEL_V:%.*]] = select i1 [[MASK_BIT_Z]], i16 [[LOWER]], i16 [[UPPER]] +; CHECK-NEXT: [[ADD_ONE:%.*]] = add i16 [[SEL_V]], 1 +; CHECK-NEXT: ret i16 [[ADD_ONE]] +; + %mask.bit = and i32 %mask, 16 + %upper.zext = zext i16 %upper to i32 + %upper.shl = shl nuw i32 %upper.zext, 16 + %lower.zext = zext i16 %lower to i32 + %pack = or disjoint i32 %upper.shl, %lower.zext + %sel = lshr i32 %pack, %mask.bit + %add.one = add i32 %sel, 1 + %trunc = trunc i32 %add.one to i16 + ret i16 %trunc +} + define i16 @selective_shift_16.commute(i32 %mask, i16 %upper, i16 %lower) { ; CHECK-LABEL: define i16 @selective_shift_16.commute( ; CHECK-SAME: i32 [[MASK:%.*]], i16 [[UPPER:%.*]], i16 [[LOWER:%.*]]) { diff --git a/llvm/test/Transforms/InstCombine/sext-of-trunc-nsw.ll b/llvm/test/Transforms/InstCombine/sext-of-trunc-nsw.ll index 516f1a2..4128a15 100644 --- a/llvm/test/Transforms/InstCombine/sext-of-trunc-nsw.ll +++ b/llvm/test/Transforms/InstCombine/sext-of-trunc-nsw.ll @@ -144,6 +144,17 @@ define i24 @wide_source_matching_signbits(i32 %x) { ret i24 %c } +define i32 @wide_source_matching_signbits_has_nsw_flag(i64 %i) { +; CHECK-LABEL: define i32 @wide_source_matching_signbits_has_nsw_flag( +; CHECK-SAME: i64 [[I:%.*]]) { +; CHECK-NEXT: [[A:%.*]] = trunc nsw i64 [[I]] to i32 +; CHECK-NEXT: ret i32 [[A]] +; + %a = trunc nsw i64 %i to i16 + %b = sext i16 %a to i32 + ret i32 %b +} + ; negative test - not enough sign-bits define i24 @wide_source_not_matching_signbits(i32 %x) { diff --git a/llvm/test/Transforms/InstCombine/trunc-sext.ll b/llvm/test/Transforms/InstCombine/trunc-sext.ll new file mode 100644 index 0000000..ac143840 --- /dev/null +++ b/llvm/test/Transforms/InstCombine/trunc-sext.ll @@ -0,0 +1,16 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6 +; RUN: opt < %s -passes=instcombine -S | FileCheck %s + +target datalayout = "i16:16:16-i32:32:32-i64:64:64-n16:32:64" + +define i32 @test(i64 %i) { +; CHECK-LABEL: define i32 @test( +; CHECK-SAME: i64 [[I:%.*]]) { +; CHECK-NEXT: [[A:%.*]] = trunc i64 [[I]] to i16 +; CHECK-NEXT: [[B:%.*]] = sext i16 [[A]] to i32 +; CHECK-NEXT: ret i32 [[B]] +; + %a = trunc i64 %i to i16 + %b = sext i16 %a to i32 + ret i32 %b +} diff --git a/llvm/test/tools/llvm-dwarfdump/X86/type_units_split_dwp_v4.s b/llvm/test/tools/llvm-dwarfdump/X86/type_units_split_dwp_v4.s new file mode 100644 index 0000000..becd9d1 --- /dev/null +++ b/llvm/test/tools/llvm-dwarfdump/X86/type_units_split_dwp_v4.s @@ -0,0 +1,245 @@ +## This test uses TU index for type parsing in dwp and makes sure the DWARF4 type is +## successfully retrieved. + +# RUN: llvm-mc %s --split-dwarf-file=test.dwo -filetype obj -triple x86_64 -o test.o +# RUN: llvm-dwp -e test.o -o test.dwp +# RUN: llvm-dwarfdump test.dwp | FileCheck %s + +# Generated from: +# +# struct t1 { }; +# t1 v1; +# +# $ clang++ -S -g -fdebug-types-section -gsplit-dwarf -o test.4.split.dwp.s -gdwarf-4 + +# CHECK: DW_TAG_variable +# CHECK: DW_AT_type ({{.*}} "t1") + .file "test.cpp" + .section .debug_types.dwo,"e",@progbits + .long .Ldebug_info_dwo_end0-.Ldebug_info_dwo_start0 # Length of Unit +.Ldebug_info_dwo_start0: + .short 4 # DWARF version number + .long 0 # Offset Into Abbrev. Section + .byte 8 # Address Size (in bytes) + .quad -4149699470930386446 # Type Signature + .long 30 # Type DIE Offset + .byte 1 # Abbrev [1] 0x17:0xe DW_TAG_type_unit + .short 33 # DW_AT_language + .long 0 # DW_AT_stmt_list + .byte 2 # Abbrev [2] 0x1e:0x6 DW_TAG_structure_type + .byte 5 # DW_AT_calling_convention + .byte 1 # DW_AT_name + .byte 1 # DW_AT_byte_size + .byte 1 # DW_AT_decl_file + .byte 1 # DW_AT_decl_line + .byte 0 # End Of Children Mark +.Ldebug_info_dwo_end0: + .file 1 "." "test.cpp" + .type v1,@object # @v1 + .bss + .globl v1 +v1: + .zero 1 + .size v1, 1 + + .section .debug_abbrev,"",@progbits + .byte 1 # Abbreviation Code + .byte 17 # DW_TAG_compile_unit + .byte 0 # DW_CHILDREN_no + .byte 16 # DW_AT_stmt_list + .byte 23 # DW_FORM_sec_offset + .byte 27 # DW_AT_comp_dir + .byte 14 # DW_FORM_strp + .ascii "\264B" # DW_AT_GNU_pubnames + .byte 25 # DW_FORM_flag_present + .ascii "\260B" # DW_AT_GNU_dwo_name + .byte 14 # DW_FORM_strp + .ascii "\261B" # DW_AT_GNU_dwo_id + .byte 7 # DW_FORM_data8 + .ascii "\263B" # DW_AT_GNU_addr_base + .byte 23 # DW_FORM_sec_offset + .byte 0 # EOM(1) + .byte 0 # EOM(2) + .byte 0 # EOM(3) + .section .debug_info,"",@progbits +.Lcu_begin0: + .long .Ldebug_info_end0-.Ldebug_info_start0 # Length of Unit +.Ldebug_info_start0: + .short 4 # DWARF version number + .long .debug_abbrev # Offset Into Abbrev. Section + .byte 8 # Address Size (in bytes) + .byte 1 # Abbrev [1] 0xb:0x19 DW_TAG_compile_unit + .long .Lline_table_start0 # DW_AT_stmt_list + .long .Lskel_string0 # DW_AT_comp_dir + # DW_AT_GNU_pubnames + .long .Lskel_string1 # DW_AT_GNU_dwo_name + .quad 1388839634901268525 # DW_AT_GNU_dwo_id + .long .Laddr_table_base0 # DW_AT_GNU_addr_base +.Ldebug_info_end0: + .section .debug_str,"MS",@progbits,1 +.Lskel_string0: + .asciz "." # string offset=0 +.Lskel_string1: + .asciz "test.dwo" # string offset=2 + .section .debug_str.dwo,"eMS",@progbits,1 +.Linfo_string0: + .asciz "v1" # string offset=0 +.Linfo_string1: + .asciz "t1" # string offset=3 +.Linfo_string2: + .asciz "clang version 22.0.0" # string offset=6 +.Linfo_string3: + .asciz "test.cpp" # string offset=27 +.Linfo_string4: + .asciz "test.dwo" # string offset=36 + .section .debug_str_offsets.dwo,"e",@progbits + .long 0 + .long 3 + .long 6 + .long 27 + .long 36 + .section .debug_info.dwo,"e",@progbits + .long .Ldebug_info_dwo_end1-.Ldebug_info_dwo_start1 # Length of Unit +.Ldebug_info_dwo_start1: + .short 4 # DWARF version number + .long 0 # Offset Into Abbrev. Section + .byte 8 # Address Size (in bytes) + .byte 3 # Abbrev [3] 0xb:0x23 DW_TAG_compile_unit + .byte 2 # DW_AT_producer + .short 33 # DW_AT_language + .byte 3 # DW_AT_name + .byte 4 # DW_AT_GNU_dwo_name + .quad 1388839634901268525 # DW_AT_GNU_dwo_id + .byte 4 # Abbrev [4] 0x19:0xb DW_TAG_variable + .byte 0 # DW_AT_name + .long 36 # DW_AT_type + # DW_AT_external + .byte 1 # DW_AT_decl_file + .byte 2 # DW_AT_decl_line + .byte 2 # DW_AT_location + .byte 251 + .byte 0 + .byte 5 # Abbrev [5] 0x24:0x9 DW_TAG_structure_type + # DW_AT_declaration + .quad -4149699470930386446 # DW_AT_signature + .byte 0 # End Of Children Mark +.Ldebug_info_dwo_end1: + .section .debug_abbrev.dwo,"e",@progbits + .byte 1 # Abbreviation Code + .byte 65 # DW_TAG_type_unit + .byte 1 # DW_CHILDREN_yes + .byte 19 # DW_AT_language + .byte 5 # DW_FORM_data2 + .byte 16 # DW_AT_stmt_list + .byte 23 # DW_FORM_sec_offset + .byte 0 # EOM(1) + .byte 0 # EOM(2) + .byte 2 # Abbreviation Code + .byte 19 # DW_TAG_structure_type + .byte 0 # DW_CHILDREN_no + .byte 54 # DW_AT_calling_convention + .byte 11 # DW_FORM_data1 + .byte 3 # DW_AT_name + .ascii "\202>" # DW_FORM_GNU_str_index + .byte 11 # DW_AT_byte_size + .byte 11 # DW_FORM_data1 + .byte 58 # DW_AT_decl_file + .byte 11 # DW_FORM_data1 + .byte 59 # DW_AT_decl_line + .byte 11 # DW_FORM_data1 + .byte 0 # EOM(1) + .byte 0 # EOM(2) + .byte 3 # Abbreviation Code + .byte 17 # DW_TAG_compile_unit + .byte 1 # DW_CHILDREN_yes + .byte 37 # DW_AT_producer + .ascii "\202>" # DW_FORM_GNU_str_index + .byte 19 # DW_AT_language + .byte 5 # DW_FORM_data2 + .byte 3 # DW_AT_name + .ascii "\202>" # DW_FORM_GNU_str_index + .ascii "\260B" # DW_AT_GNU_dwo_name + .ascii "\202>" # DW_FORM_GNU_str_index + .ascii "\261B" # DW_AT_GNU_dwo_id + .byte 7 # DW_FORM_data8 + .byte 0 # EOM(1) + .byte 0 # EOM(2) + .byte 4 # Abbreviation Code + .byte 52 # DW_TAG_variable + .byte 0 # DW_CHILDREN_no + .byte 3 # DW_AT_name + .ascii "\202>" # DW_FORM_GNU_str_index + .byte 73 # DW_AT_type + .byte 19 # DW_FORM_ref4 + .byte 63 # DW_AT_external + .byte 25 # DW_FORM_flag_present + .byte 58 # DW_AT_decl_file + .byte 11 # DW_FORM_data1 + .byte 59 # DW_AT_decl_line + .byte 11 # DW_FORM_data1 + .byte 2 # DW_AT_location + .byte 24 # DW_FORM_exprloc + .byte 0 # EOM(1) + .byte 0 # EOM(2) + .byte 5 # Abbreviation Code + .byte 19 # DW_TAG_structure_type + .byte 0 # DW_CHILDREN_no + .byte 60 # DW_AT_declaration + .byte 25 # DW_FORM_flag_present + .byte 105 # DW_AT_signature + .byte 32 # DW_FORM_ref_sig8 + .byte 0 # EOM(1) + .byte 0 # EOM(2) + .byte 0 # EOM(3) + .section .debug_line.dwo,"e",@progbits +.Ltmp0: + .long .Ldebug_line_end0-.Ldebug_line_start0 # unit length +.Ldebug_line_start0: + .short 4 + .long .Lprologue_end0-.Lprologue_start0 +.Lprologue_start0: + .byte 1 + .byte 1 + .byte 1 + .byte -5 + .byte 14 + .byte 1 + .byte 0 + .ascii "test.cpp" + .byte 0 + .byte 0 + .byte 0 + .byte 0 + .byte 0 +.Lprologue_end0: +.Ldebug_line_end0: + .section .debug_addr,"",@progbits +.Laddr_table_base0: + .quad v1 + .section .debug_gnu_pubnames,"",@progbits + .long .LpubNames_end0-.LpubNames_start0 # Length of Public Names Info +.LpubNames_start0: + .short 2 # DWARF Version + .long .Lcu_begin0 # Offset of Compilation Unit Info + .long 36 # Compilation Unit Length + .long 25 # DIE offset + .byte 32 # Attributes: VARIABLE, EXTERNAL + .asciz "v1" # External Name + .long 0 # End Mark +.LpubNames_end0: + .section .debug_gnu_pubtypes,"",@progbits + .long .LpubTypes_end0-.LpubTypes_start0 # Length of Public Types Info +.LpubTypes_start0: + .short 2 # DWARF Version + .long .Lcu_begin0 # Offset of Compilation Unit Info + .long 36 # Compilation Unit Length + .long 36 # DIE offset + .byte 16 # Attributes: TYPE, EXTERNAL + .asciz "t1" # External Name + .long 0 # End Mark +.LpubTypes_end0: + .ident "clang version 22.0.0" + .section ".note.GNU-stack","",@progbits + .addrsig + .section .debug_line,"",@progbits +.Lline_table_start0: diff --git a/llvm/test/tools/llvm-mca/AMDGPU/buffer-op-swz-operand.s b/llvm/test/tools/llvm-mca/AMDGPU/buffer-op-swz-operand.s new file mode 100644 index 0000000..932d9d1 --- /dev/null +++ b/llvm/test/tools/llvm-mca/AMDGPU/buffer-op-swz-operand.s @@ -0,0 +1,45 @@ +# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py +# RUN: llvm-mca -mtriple=amdgcn-amd-amdhsa -mcpu=gfx950 < %s | FileCheck %s + +buffer_load_dwordx4 v[30:33], v4, s[0:3], 0, offen offset:4092 +buffer_store_dword v0, v1, s[0:3], 0 offen + +# CHECK: Iterations: 100 +# CHECK-NEXT: Instructions: 200 +# CHECK-NEXT: Total Cycles: 280 +# CHECK-NEXT: Total uOps: 200 + +# CHECK: Dispatch Width: 1 +# CHECK-NEXT: uOps Per Cycle: 0.71 +# CHECK-NEXT: IPC: 0.71 +# CHECK-NEXT: Block RThroughput: 2.0 + +# CHECK: Instruction Info: +# CHECK-NEXT: [1]: #uOps +# CHECK-NEXT: [2]: Latency +# CHECK-NEXT: [3]: RThroughput +# CHECK-NEXT: [4]: MayLoad +# CHECK-NEXT: [5]: MayStore +# CHECK-NEXT: [6]: HasSideEffects (U) + +# CHECK: [1] [2] [3] [4] [5] [6] Instructions: +# CHECK-NEXT: 1 80 1.00 * U buffer_load_dwordx4 v[30:33], v4, s[0:3], 0 offen offset:4092 +# CHECK-NEXT: 1 80 1.00 * U buffer_store_dword v0, v1, s[0:3], 0 offen + +# CHECK: Resources: +# CHECK-NEXT: [0] - HWBranch +# CHECK-NEXT: [1] - HWExport +# CHECK-NEXT: [2] - HWLGKM +# CHECK-NEXT: [3] - HWSALU +# CHECK-NEXT: [4] - HWVALU +# CHECK-NEXT: [5] - HWVMEM +# CHECK-NEXT: [6] - HWXDL + +# CHECK: Resource pressure per iteration: +# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] +# CHECK-NEXT: - - - - - 2.00 - + +# CHECK: Resource pressure by instruction: +# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] Instructions: +# CHECK-NEXT: - - - - - 1.00 - buffer_load_dwordx4 v[30:33], v4, s[0:3], 0 offen offset:4092 +# CHECK-NEXT: - - - - - 1.00 - buffer_store_dword v0, v1, s[0:3], 0 offen |
