diff options
Diffstat (limited to 'clang/test/CodeGenOpenCL')
-rw-r--r-- | clang/test/CodeGenOpenCL/amdgpu-features.cl | 2 | ||||
-rw-r--r-- | clang/test/CodeGenOpenCL/builtins-amdgcn-gfx1250-load-monitor.cl | 66 | ||||
-rw-r--r-- | clang/test/CodeGenOpenCL/builtins-amdgcn-gfx1250.cl | 19 |
3 files changed, 86 insertions, 1 deletions
diff --git a/clang/test/CodeGenOpenCL/amdgpu-features.cl b/clang/test/CodeGenOpenCL/amdgpu-features.cl index 75e9710..e96dd66 100644 --- a/clang/test/CodeGenOpenCL/amdgpu-features.cl +++ b/clang/test/CodeGenOpenCL/amdgpu-features.cl @@ -108,7 +108,7 @@ // GFX1153: "target-features"="+16-bit-insts,+atomic-fadd-rtn-insts,+ci-insts,+dl-insts,+dot10-insts,+dot12-insts,+dot5-insts,+dot7-insts,+dot8-insts,+dot9-insts,+dpp,+gfx10-3-insts,+gfx10-insts,+gfx11-insts,+gfx8-insts,+gfx9-insts,+wavefrontsize32" // GFX1200: "target-features"="+16-bit-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-buffer-pk-add-bf16-inst,+atomic-ds-pk-add-16-insts,+atomic-fadd-rtn-insts,+atomic-flat-pk-add-16-insts,+atomic-global-pk-add-bf16-inst,+ci-insts,+dl-insts,+dot10-insts,+dot11-insts,+dot12-insts,+dot7-insts,+dot8-insts,+dot9-insts,+dpp,+fp8-conversion-insts,+gfx10-3-insts,+gfx10-insts,+gfx11-insts,+gfx12-insts,+gfx8-insts,+gfx9-insts,+wavefrontsize32" // GFX1201: "target-features"="+16-bit-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-buffer-pk-add-bf16-inst,+atomic-ds-pk-add-16-insts,+atomic-fadd-rtn-insts,+atomic-flat-pk-add-16-insts,+atomic-global-pk-add-bf16-inst,+ci-insts,+dl-insts,+dot10-insts,+dot11-insts,+dot12-insts,+dot7-insts,+dot8-insts,+dot9-insts,+dpp,+fp8-conversion-insts,+gfx10-3-insts,+gfx10-insts,+gfx11-insts,+gfx12-insts,+gfx8-insts,+gfx9-insts,+wavefrontsize32" -// GFX1250: "target-features"="+16-bit-insts,+ashr-pk-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-buffer-pk-add-bf16-inst,+atomic-ds-pk-add-16-insts,+atomic-fadd-rtn-insts,+atomic-flat-pk-add-16-insts,+atomic-global-pk-add-bf16-inst,+bf16-trans-insts,+bitop3-insts,+ci-insts,+dl-insts,+dot7-insts,+dot8-insts,+dpp,+fp8-conversion-insts,+fp8e5m3-insts,+gfx10-3-insts,+gfx10-insts,+gfx11-insts,+gfx12-insts,+gfx1250-insts,+gfx8-insts,+gfx9-insts,+permlane16-swap,+prng-inst,+setprio-inc-wg-inst,+tanh-insts,+transpose-load-f4f6-insts,+wavefrontsize32 +// GFX1250: "target-features"="+16-bit-insts,+ashr-pk-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-buffer-pk-add-bf16-inst,+atomic-ds-pk-add-16-insts,+atomic-fadd-rtn-insts,+atomic-flat-pk-add-16-insts,+atomic-global-pk-add-bf16-inst,+bf16-trans-insts,+bitop3-insts,+ci-insts,+dl-insts,+dot7-insts,+dot8-insts,+dpp,+fp8-conversion-insts,+fp8e5m3-insts,+gfx10-3-insts,+gfx10-insts,+gfx11-insts,+gfx12-insts,+gfx1250-insts,+gfx8-insts,+gfx9-insts,+permlane16-swap,+prng-inst,+setprio-inc-wg-inst,+tanh-insts,+transpose-load-f4f6-insts,+vmem-pref-insts,+wavefrontsize32 // GFX1103-W64: "target-features"="+16-bit-insts,+atomic-fadd-rtn-insts,+ci-insts,+dl-insts,+dot10-insts,+dot12-insts,+dot5-insts,+dot7-insts,+dot8-insts,+dot9-insts,+dpp,+gfx10-3-insts,+gfx10-insts,+gfx11-insts,+gfx8-insts,+gfx9-insts,+wavefrontsize64" diff --git a/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx1250-load-monitor.cl b/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx1250-load-monitor.cl new file mode 100644 index 0000000..f2552d4 --- /dev/null +++ b/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx1250-load-monitor.cl @@ -0,0 +1,66 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py +// REQUIRES: amdgpu-registered-target +// RUN: %clang_cc1 -cl-std=CL2.0 -triple amdgcn-unknown-unknown -target-cpu gfx1250 -emit-llvm -o - %s | FileCheck %s --check-prefix=CHECK-GFX1250 + +typedef int v2i __attribute__((ext_vector_type(2))); +typedef int v4i __attribute__((ext_vector_type(4))); + +// CHECK-GFX1250-LABEL: @test_amdgcn_global_load_monitor_b32( +// CHECK-GFX1250-NEXT: entry: +// CHECK-GFX1250-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.amdgcn.global.load.monitor.b32.i32(ptr addrspace(1) [[INPTR:%.*]], i32 1) +// CHECK-GFX1250-NEXT: ret i32 [[TMP0]] +// +int test_amdgcn_global_load_monitor_b32(global int* inptr) +{ + return __builtin_amdgcn_global_load_monitor_b32(inptr, 1); +} + +// CHECK-GFX1250-LABEL: @test_amdgcn_global_load_monitor_b64( +// CHECK-GFX1250-NEXT: entry: +// CHECK-GFX1250-NEXT: [[TMP0:%.*]] = tail call <2 x i32> @llvm.amdgcn.global.load.monitor.b64.v2i32(ptr addrspace(1) [[INPTR:%.*]], i32 10) +// CHECK-GFX1250-NEXT: ret <2 x i32> [[TMP0]] +// +v2i test_amdgcn_global_load_monitor_b64(global v2i* inptr) +{ + return __builtin_amdgcn_global_load_monitor_b64(inptr, 10); +} + +// CHECK-GFX1250-LABEL: @test_amdgcn_global_load_monitor_b128( +// CHECK-GFX1250-NEXT: entry: +// CHECK-GFX1250-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.amdgcn.global.load.monitor.b128.v4i32(ptr addrspace(1) [[INPTR:%.*]], i32 22) +// CHECK-GFX1250-NEXT: ret <4 x i32> [[TMP0]] +// +v4i test_amdgcn_global_load_monitor_b128(global v4i* inptr) +{ + return __builtin_amdgcn_global_load_monitor_b128(inptr, 22); +} + +// CHECK-GFX1250-LABEL: @test_amdgcn_flat_load_monitor_b32( +// CHECK-GFX1250-NEXT: entry: +// CHECK-GFX1250-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.amdgcn.flat.load.monitor.b32.i32(ptr [[INPTR:%.*]], i32 27) +// CHECK-GFX1250-NEXT: ret i32 [[TMP0]] +// +int test_amdgcn_flat_load_monitor_b32(int* inptr) +{ + return __builtin_amdgcn_flat_load_monitor_b32(inptr, 27); +} + +// CHECK-GFX1250-LABEL: @test_amdgcn_flat_load_monitor_b64( +// CHECK-GFX1250-NEXT: entry: +// CHECK-GFX1250-NEXT: [[TMP0:%.*]] = tail call <2 x i32> @llvm.amdgcn.flat.load.monitor.b64.v2i32(ptr [[INPTR:%.*]], i32 1) +// CHECK-GFX1250-NEXT: ret <2 x i32> [[TMP0]] +// +v2i test_amdgcn_flat_load_monitor_b64(v2i* inptr) +{ + return __builtin_amdgcn_flat_load_monitor_b64(inptr, 1); +} + +// CHECK-GFX1250-LABEL: @test_amdgcn_flat_load_monitor_b128( +// CHECK-GFX1250-NEXT: entry: +// CHECK-GFX1250-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.amdgcn.flat.load.monitor.b128.v4i32(ptr [[INPTR:%.*]], i32 0) +// CHECK-GFX1250-NEXT: ret <4 x i32> [[TMP0]] +// +v4i test_amdgcn_flat_load_monitor_b128(v4i* inptr) +{ + return __builtin_amdgcn_flat_load_monitor_b128(inptr, 0); +} diff --git a/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx1250.cl b/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx1250.cl index a21862c..81f39f9 100644 --- a/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx1250.cl +++ b/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx1250.cl @@ -440,6 +440,25 @@ void test_permlane16_swap(global uint2* out, uint old, uint src) { *out = __builtin_amdgcn_permlane16_swap(old, src, false, true); } +// CHECK-LABEL: @test_prefetch( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[FPTR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) +// CHECK-NEXT: [[GPTR_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5) +// CHECK-NEXT: [[FPTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[FPTR_ADDR]] to ptr +// CHECK-NEXT: [[GPTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[GPTR_ADDR]] to ptr +// CHECK-NEXT: store ptr [[FPTR:%.*]], ptr [[FPTR_ADDR_ASCAST]], align 8 +// CHECK-NEXT: store ptr addrspace(1) [[GPTR:%.*]], ptr [[GPTR_ADDR_ASCAST]], align 8 +// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[FPTR_ADDR_ASCAST]], align 8 +// CHECK-NEXT: call void @llvm.amdgcn.flat.prefetch(ptr [[TMP0]], i32 0) +// CHECK-NEXT: [[TMP1:%.*]] = load ptr addrspace(1), ptr [[GPTR_ADDR_ASCAST]], align 8 +// CHECK-NEXT: call void @llvm.amdgcn.global.prefetch(ptr addrspace(1) [[TMP1]], i32 8) +// CHECK-NEXT: ret void +// +void test_prefetch(generic void *fptr, global void *gptr) { + __builtin_amdgcn_flat_prefetch(fptr, 0); + __builtin_amdgcn_global_prefetch(gptr, 8); +} + // CHECK-LABEL: @test_cvt_f32_fp8_e5m3( // CHECK-NEXT: entry: // CHECK-NEXT: [[OUT_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5) |