diff options
Diffstat (limited to 'clang/test/CodeGenOpenCL')
6 files changed, 186 insertions, 2 deletions
diff --git a/clang/test/CodeGenOpenCL/amdgpu-features-readonly.cl b/clang/test/CodeGenOpenCL/amdgpu-features-readonly.cl index d23e6f2..5b76cff 100644 --- a/clang/test/CodeGenOpenCL/amdgpu-features-readonly.cl +++ b/clang/test/CodeGenOpenCL/amdgpu-features-readonly.cl @@ -1,4 +1,4 @@ -// RUN: %clang_cc1 -triple amdgcn -target-feature +gws -o /dev/null %s 2>&1 \ +// RUN: %clang_cc1 -triple amdgcn -target-cpu gfx942 -target-feature +gws -o /dev/null %s 2>&1 \ // RUN: | FileCheck --check-prefix=GWS %s // GWS: warning: feature flag '+gws' is ignored since the feature is read only [-Winvalid-command-line-argument] diff --git a/clang/test/CodeGenOpenCL/amdgpu-features.cl b/clang/test/CodeGenOpenCL/amdgpu-features.cl index 75e9710..e96dd66 100644 --- a/clang/test/CodeGenOpenCL/amdgpu-features.cl +++ b/clang/test/CodeGenOpenCL/amdgpu-features.cl @@ -108,7 +108,7 @@ // GFX1153: "target-features"="+16-bit-insts,+atomic-fadd-rtn-insts,+ci-insts,+dl-insts,+dot10-insts,+dot12-insts,+dot5-insts,+dot7-insts,+dot8-insts,+dot9-insts,+dpp,+gfx10-3-insts,+gfx10-insts,+gfx11-insts,+gfx8-insts,+gfx9-insts,+wavefrontsize32" // GFX1200: "target-features"="+16-bit-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-buffer-pk-add-bf16-inst,+atomic-ds-pk-add-16-insts,+atomic-fadd-rtn-insts,+atomic-flat-pk-add-16-insts,+atomic-global-pk-add-bf16-inst,+ci-insts,+dl-insts,+dot10-insts,+dot11-insts,+dot12-insts,+dot7-insts,+dot8-insts,+dot9-insts,+dpp,+fp8-conversion-insts,+gfx10-3-insts,+gfx10-insts,+gfx11-insts,+gfx12-insts,+gfx8-insts,+gfx9-insts,+wavefrontsize32" // GFX1201: "target-features"="+16-bit-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-buffer-pk-add-bf16-inst,+atomic-ds-pk-add-16-insts,+atomic-fadd-rtn-insts,+atomic-flat-pk-add-16-insts,+atomic-global-pk-add-bf16-inst,+ci-insts,+dl-insts,+dot10-insts,+dot11-insts,+dot12-insts,+dot7-insts,+dot8-insts,+dot9-insts,+dpp,+fp8-conversion-insts,+gfx10-3-insts,+gfx10-insts,+gfx11-insts,+gfx12-insts,+gfx8-insts,+gfx9-insts,+wavefrontsize32" -// GFX1250: "target-features"="+16-bit-insts,+ashr-pk-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-buffer-pk-add-bf16-inst,+atomic-ds-pk-add-16-insts,+atomic-fadd-rtn-insts,+atomic-flat-pk-add-16-insts,+atomic-global-pk-add-bf16-inst,+bf16-trans-insts,+bitop3-insts,+ci-insts,+dl-insts,+dot7-insts,+dot8-insts,+dpp,+fp8-conversion-insts,+fp8e5m3-insts,+gfx10-3-insts,+gfx10-insts,+gfx11-insts,+gfx12-insts,+gfx1250-insts,+gfx8-insts,+gfx9-insts,+permlane16-swap,+prng-inst,+setprio-inc-wg-inst,+tanh-insts,+transpose-load-f4f6-insts,+wavefrontsize32 +// GFX1250: "target-features"="+16-bit-insts,+ashr-pk-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-buffer-pk-add-bf16-inst,+atomic-ds-pk-add-16-insts,+atomic-fadd-rtn-insts,+atomic-flat-pk-add-16-insts,+atomic-global-pk-add-bf16-inst,+bf16-trans-insts,+bitop3-insts,+ci-insts,+dl-insts,+dot7-insts,+dot8-insts,+dpp,+fp8-conversion-insts,+fp8e5m3-insts,+gfx10-3-insts,+gfx10-insts,+gfx11-insts,+gfx12-insts,+gfx1250-insts,+gfx8-insts,+gfx9-insts,+permlane16-swap,+prng-inst,+setprio-inc-wg-inst,+tanh-insts,+transpose-load-f4f6-insts,+vmem-pref-insts,+wavefrontsize32 // GFX1103-W64: "target-features"="+16-bit-insts,+atomic-fadd-rtn-insts,+ci-insts,+dl-insts,+dot10-insts,+dot12-insts,+dot5-insts,+dot7-insts,+dot8-insts,+dot9-insts,+dpp,+gfx10-3-insts,+gfx10-insts,+gfx11-insts,+gfx8-insts,+gfx9-insts,+wavefrontsize64" diff --git a/clang/test/CodeGenOpenCL/amdgpu-readonly-features-written-with-no-target.cl b/clang/test/CodeGenOpenCL/amdgpu-readonly-features-written-with-no-target.cl new file mode 100644 index 0000000..1542efa --- /dev/null +++ b/clang/test/CodeGenOpenCL/amdgpu-readonly-features-written-with-no-target.cl @@ -0,0 +1,16 @@ +// REQUIRES: amdgpu-registered-target + +// Check the readonly feature will can be written to the IR +// if there is no target specified. + +// RUN: %clang_cc1 -triple amdgcn -emit-llvm -o - %s | FileCheck --check-prefix=NOCPU %s +// RUN: %clang_cc1 -triple amdgcn -target-cpu gfx942 -emit-llvm -o - %s | FileCheck --check-prefix=GFX942 %s +// RUN: %clang_cc1 -triple amdgcn -target-cpu gfx1100 -emit-llvm -o - %s | FileCheck --check-prefix=GFX1100 %s +// RUN: %clang_cc1 -triple amdgcn -target-cpu gfx1200 -emit-llvm -o - %s | FileCheck --check-prefix=GFX1200 %s + +__attribute__((target("gws,image-insts,vmem-to-lds-load-insts"))) void test() {} + +// NOCPU: "target-features"="+gws,+image-insts,+vmem-to-lds-load-insts" +// GFX942: "target-features"="+16-bit-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-ds-pk-add-16-insts,+atomic-fadd-rtn-insts,+atomic-flat-pk-add-16-insts,+atomic-global-pk-add-bf16-inst,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+fp8-conversion-insts,+fp8-insts,+gfx8-insts,+gfx9-insts,+gfx90a-insts,+gfx940-insts,+mai-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize64,+xf32-insts" +// GFX1100: "target-features"="+16-bit-insts,+atomic-fadd-rtn-insts,+ci-insts,+dl-insts,+dot10-insts,+dot12-insts,+dot5-insts,+dot7-insts,+dot8-insts,+dot9-insts,+dpp,+gfx10-3-insts,+gfx10-insts,+gfx11-insts,+gfx8-insts,+gfx9-insts,+wavefrontsize32" +// GFX1200: "target-features"="+16-bit-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-buffer-pk-add-bf16-inst,+atomic-ds-pk-add-16-insts,+atomic-fadd-rtn-insts,+atomic-flat-pk-add-16-insts,+atomic-global-pk-add-bf16-inst,+ci-insts,+dl-insts,+dot10-insts,+dot11-insts,+dot12-insts,+dot7-insts,+dot8-insts,+dot9-insts,+dpp,+fp8-conversion-insts,+gfx10-3-insts,+gfx10-insts,+gfx11-insts,+gfx12-insts,+gfx8-insts,+gfx9-insts,+wavefrontsize32" diff --git a/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx1250-async-load-store-lds.cl b/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx1250-async-load-store-lds.cl index e3fe31f..ccc05f0 100644 --- a/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx1250-async-load-store-lds.cl +++ b/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx1250-async-load-store-lds.cl @@ -2,6 +2,89 @@ // REQUIRES: amdgpu-registered-target // RUN: %clang_cc1 -cl-std=CL2.0 -triple amdgcn-unknown-unknown -target-cpu gfx1250 -emit-llvm -o - %s | FileCheck %s --check-prefix=CHECK-GFX1250 +typedef int v2i __attribute__((ext_vector_type(2))); +typedef int v4i __attribute__((ext_vector_type(4))); + +// CHECK-GFX1250-LABEL: @test_amdgcn_global_load_async_to_lds_b8( +// CHECK-GFX1250-NEXT: entry: +// CHECK-GFX1250-NEXT: tail call void @llvm.amdgcn.global.load.async.to.lds.b8(ptr addrspace(1) [[GADDR:%.*]], ptr addrspace(3) [[LADDR:%.*]], i32 16, i32 0) +// CHECK-GFX1250-NEXT: ret void +// +void test_amdgcn_global_load_async_to_lds_b8( global char* gaddr, local char* laddr) +{ + __builtin_amdgcn_global_load_async_to_lds_b8(gaddr, laddr, 16, 0); +} + +// CHECK-GFX1250-LABEL: @test_amdgcn_global_load_async_to_lds_b32( +// CHECK-GFX1250-NEXT: entry: +// CHECK-GFX1250-NEXT: tail call void @llvm.amdgcn.global.load.async.to.lds.b32(ptr addrspace(1) [[GADDR:%.*]], ptr addrspace(3) [[LADDR:%.*]], i32 16, i32 0) +// CHECK-GFX1250-NEXT: ret void +// +void test_amdgcn_global_load_async_to_lds_b32(global int* gaddr, local int* laddr) +{ + __builtin_amdgcn_global_load_async_to_lds_b32(gaddr, laddr, 16, 0); +} + +// CHECK-GFX1250-LABEL: @test_amdgcn_global_load_async_to_lds_b64( +// CHECK-GFX1250-NEXT: entry: +// CHECK-GFX1250-NEXT: tail call void @llvm.amdgcn.global.load.async.to.lds.b64(ptr addrspace(1) [[GADDR:%.*]], ptr addrspace(3) [[LADDR:%.*]], i32 16, i32 0) +// CHECK-GFX1250-NEXT: ret void +// +void test_amdgcn_global_load_async_to_lds_b64(global v2i* gaddr, local v2i* laddr) +{ + __builtin_amdgcn_global_load_async_to_lds_b64(gaddr, laddr, 16, 0); +} + +// CHECK-GFX1250-LABEL: @test_amdgcn_global_load_async_to_lds_b128( +// CHECK-GFX1250-NEXT: entry: +// CHECK-GFX1250-NEXT: tail call void @llvm.amdgcn.global.load.async.to.lds.b128(ptr addrspace(1) [[GADDR:%.*]], ptr addrspace(3) [[LADDR:%.*]], i32 16, i32 0) +// CHECK-GFX1250-NEXT: ret void +// +void test_amdgcn_global_load_async_to_lds_b128( global v4i* gaddr, local v4i* laddr) +{ + __builtin_amdgcn_global_load_async_to_lds_b128(gaddr, laddr, 16, 0); +} + +// CHECK-GFX1250-LABEL: @test_amdgcn_global_store_async_from_lds_b8( +// CHECK-GFX1250-NEXT: entry: +// CHECK-GFX1250-NEXT: tail call void @llvm.amdgcn.global.store.async.from.lds.b8(ptr addrspace(1) [[GADDR:%.*]], ptr addrspace(3) [[LADDR:%.*]], i32 16, i32 0) +// CHECK-GFX1250-NEXT: ret void +// +void test_amdgcn_global_store_async_from_lds_b8(global char* gaddr, local char* laddr) +{ + __builtin_amdgcn_global_store_async_from_lds_b8(gaddr, laddr, 16, 0); +} + +// CHECK-GFX1250-LABEL: @test_amdgcn_global_store_async_from_lds_b32( +// CHECK-GFX1250-NEXT: entry: +// CHECK-GFX1250-NEXT: tail call void @llvm.amdgcn.global.store.async.from.lds.b32(ptr addrspace(1) [[GADDR:%.*]], ptr addrspace(3) [[LADDR:%.*]], i32 16, i32 0) +// CHECK-GFX1250-NEXT: ret void +// +void test_amdgcn_global_store_async_from_lds_b32(global int* gaddr, local int* laddr) +{ + __builtin_amdgcn_global_store_async_from_lds_b32(gaddr, laddr, 16, 0); +} + +// CHECK-GFX1250-LABEL: @test_amdgcn_global_store_async_from_lds_b64( +// CHECK-GFX1250-NEXT: entry: +// CHECK-GFX1250-NEXT: tail call void @llvm.amdgcn.global.store.async.from.lds.b64(ptr addrspace(1) [[GADDR:%.*]], ptr addrspace(3) [[LADDR:%.*]], i32 16, i32 0) +// CHECK-GFX1250-NEXT: ret void +// +void test_amdgcn_global_store_async_from_lds_b64(global v2i* gaddr, local v2i* laddr) +{ + __builtin_amdgcn_global_store_async_from_lds_b64(gaddr, laddr, 16, 0); +} + +// CHECK-GFX1250-LABEL: @test_amdgcn_global_store_async_from_lds_b128( +// CHECK-GFX1250-NEXT: entry: +// CHECK-GFX1250-NEXT: tail call void @llvm.amdgcn.global.store.async.from.lds.b128(ptr addrspace(1) [[GADDR:%.*]], ptr addrspace(3) [[LADDR:%.*]], i32 16, i32 0) +// CHECK-GFX1250-NEXT: ret void +// +void test_amdgcn_global_store_async_from_lds_b128(global v4i* gaddr, local v4i* laddr) +{ + __builtin_amdgcn_global_store_async_from_lds_b128(gaddr, laddr, 16, 0); +} + // CHECK-GFX1250-LABEL: @test_amdgcn_ds_atomic_async_barrier_arrive_b64( // CHECK-GFX1250-NEXT: entry: // CHECK-GFX1250-NEXT: tail call void @llvm.amdgcn.ds.atomic.async.barrier.arrive.b64(ptr addrspace(3) [[ADDR:%.*]]) diff --git a/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx1250-load-monitor.cl b/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx1250-load-monitor.cl new file mode 100644 index 0000000..f2552d4 --- /dev/null +++ b/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx1250-load-monitor.cl @@ -0,0 +1,66 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py +// REQUIRES: amdgpu-registered-target +// RUN: %clang_cc1 -cl-std=CL2.0 -triple amdgcn-unknown-unknown -target-cpu gfx1250 -emit-llvm -o - %s | FileCheck %s --check-prefix=CHECK-GFX1250 + +typedef int v2i __attribute__((ext_vector_type(2))); +typedef int v4i __attribute__((ext_vector_type(4))); + +// CHECK-GFX1250-LABEL: @test_amdgcn_global_load_monitor_b32( +// CHECK-GFX1250-NEXT: entry: +// CHECK-GFX1250-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.amdgcn.global.load.monitor.b32.i32(ptr addrspace(1) [[INPTR:%.*]], i32 1) +// CHECK-GFX1250-NEXT: ret i32 [[TMP0]] +// +int test_amdgcn_global_load_monitor_b32(global int* inptr) +{ + return __builtin_amdgcn_global_load_monitor_b32(inptr, 1); +} + +// CHECK-GFX1250-LABEL: @test_amdgcn_global_load_monitor_b64( +// CHECK-GFX1250-NEXT: entry: +// CHECK-GFX1250-NEXT: [[TMP0:%.*]] = tail call <2 x i32> @llvm.amdgcn.global.load.monitor.b64.v2i32(ptr addrspace(1) [[INPTR:%.*]], i32 10) +// CHECK-GFX1250-NEXT: ret <2 x i32> [[TMP0]] +// +v2i test_amdgcn_global_load_monitor_b64(global v2i* inptr) +{ + return __builtin_amdgcn_global_load_monitor_b64(inptr, 10); +} + +// CHECK-GFX1250-LABEL: @test_amdgcn_global_load_monitor_b128( +// CHECK-GFX1250-NEXT: entry: +// CHECK-GFX1250-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.amdgcn.global.load.monitor.b128.v4i32(ptr addrspace(1) [[INPTR:%.*]], i32 22) +// CHECK-GFX1250-NEXT: ret <4 x i32> [[TMP0]] +// +v4i test_amdgcn_global_load_monitor_b128(global v4i* inptr) +{ + return __builtin_amdgcn_global_load_monitor_b128(inptr, 22); +} + +// CHECK-GFX1250-LABEL: @test_amdgcn_flat_load_monitor_b32( +// CHECK-GFX1250-NEXT: entry: +// CHECK-GFX1250-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.amdgcn.flat.load.monitor.b32.i32(ptr [[INPTR:%.*]], i32 27) +// CHECK-GFX1250-NEXT: ret i32 [[TMP0]] +// +int test_amdgcn_flat_load_monitor_b32(int* inptr) +{ + return __builtin_amdgcn_flat_load_monitor_b32(inptr, 27); +} + +// CHECK-GFX1250-LABEL: @test_amdgcn_flat_load_monitor_b64( +// CHECK-GFX1250-NEXT: entry: +// CHECK-GFX1250-NEXT: [[TMP0:%.*]] = tail call <2 x i32> @llvm.amdgcn.flat.load.monitor.b64.v2i32(ptr [[INPTR:%.*]], i32 1) +// CHECK-GFX1250-NEXT: ret <2 x i32> [[TMP0]] +// +v2i test_amdgcn_flat_load_monitor_b64(v2i* inptr) +{ + return __builtin_amdgcn_flat_load_monitor_b64(inptr, 1); +} + +// CHECK-GFX1250-LABEL: @test_amdgcn_flat_load_monitor_b128( +// CHECK-GFX1250-NEXT: entry: +// CHECK-GFX1250-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.amdgcn.flat.load.monitor.b128.v4i32(ptr [[INPTR:%.*]], i32 0) +// CHECK-GFX1250-NEXT: ret <4 x i32> [[TMP0]] +// +v4i test_amdgcn_flat_load_monitor_b128(v4i* inptr) +{ + return __builtin_amdgcn_flat_load_monitor_b128(inptr, 0); +} diff --git a/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx1250.cl b/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx1250.cl index a21862c..81f39f9 100644 --- a/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx1250.cl +++ b/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx1250.cl @@ -440,6 +440,25 @@ void test_permlane16_swap(global uint2* out, uint old, uint src) { *out = __builtin_amdgcn_permlane16_swap(old, src, false, true); } +// CHECK-LABEL: @test_prefetch( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[FPTR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) +// CHECK-NEXT: [[GPTR_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5) +// CHECK-NEXT: [[FPTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[FPTR_ADDR]] to ptr +// CHECK-NEXT: [[GPTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[GPTR_ADDR]] to ptr +// CHECK-NEXT: store ptr [[FPTR:%.*]], ptr [[FPTR_ADDR_ASCAST]], align 8 +// CHECK-NEXT: store ptr addrspace(1) [[GPTR:%.*]], ptr [[GPTR_ADDR_ASCAST]], align 8 +// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[FPTR_ADDR_ASCAST]], align 8 +// CHECK-NEXT: call void @llvm.amdgcn.flat.prefetch(ptr [[TMP0]], i32 0) +// CHECK-NEXT: [[TMP1:%.*]] = load ptr addrspace(1), ptr [[GPTR_ADDR_ASCAST]], align 8 +// CHECK-NEXT: call void @llvm.amdgcn.global.prefetch(ptr addrspace(1) [[TMP1]], i32 8) +// CHECK-NEXT: ret void +// +void test_prefetch(generic void *fptr, global void *gptr) { + __builtin_amdgcn_flat_prefetch(fptr, 0); + __builtin_amdgcn_global_prefetch(gptr, 8); +} + // CHECK-LABEL: @test_cvt_f32_fp8_e5m3( // CHECK-NEXT: entry: // CHECK-NEXT: [[OUT_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5) |