aboutsummaryrefslogtreecommitdiff
path: root/clang/test/CodeGenOpenCL
diff options
context:
space:
mode:
Diffstat (limited to 'clang/test/CodeGenOpenCL')
-rw-r--r--clang/test/CodeGenOpenCL/amdgpu-features.cl2
-rw-r--r--clang/test/CodeGenOpenCL/builtins-amdgcn-gfx1250-load-monitor.cl66
-rw-r--r--clang/test/CodeGenOpenCL/builtins-amdgcn-gfx1250.cl19
3 files changed, 86 insertions, 1 deletions
diff --git a/clang/test/CodeGenOpenCL/amdgpu-features.cl b/clang/test/CodeGenOpenCL/amdgpu-features.cl
index 75e9710..e96dd66 100644
--- a/clang/test/CodeGenOpenCL/amdgpu-features.cl
+++ b/clang/test/CodeGenOpenCL/amdgpu-features.cl
@@ -108,7 +108,7 @@
// GFX1153: "target-features"="+16-bit-insts,+atomic-fadd-rtn-insts,+ci-insts,+dl-insts,+dot10-insts,+dot12-insts,+dot5-insts,+dot7-insts,+dot8-insts,+dot9-insts,+dpp,+gfx10-3-insts,+gfx10-insts,+gfx11-insts,+gfx8-insts,+gfx9-insts,+wavefrontsize32"
// GFX1200: "target-features"="+16-bit-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-buffer-pk-add-bf16-inst,+atomic-ds-pk-add-16-insts,+atomic-fadd-rtn-insts,+atomic-flat-pk-add-16-insts,+atomic-global-pk-add-bf16-inst,+ci-insts,+dl-insts,+dot10-insts,+dot11-insts,+dot12-insts,+dot7-insts,+dot8-insts,+dot9-insts,+dpp,+fp8-conversion-insts,+gfx10-3-insts,+gfx10-insts,+gfx11-insts,+gfx12-insts,+gfx8-insts,+gfx9-insts,+wavefrontsize32"
// GFX1201: "target-features"="+16-bit-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-buffer-pk-add-bf16-inst,+atomic-ds-pk-add-16-insts,+atomic-fadd-rtn-insts,+atomic-flat-pk-add-16-insts,+atomic-global-pk-add-bf16-inst,+ci-insts,+dl-insts,+dot10-insts,+dot11-insts,+dot12-insts,+dot7-insts,+dot8-insts,+dot9-insts,+dpp,+fp8-conversion-insts,+gfx10-3-insts,+gfx10-insts,+gfx11-insts,+gfx12-insts,+gfx8-insts,+gfx9-insts,+wavefrontsize32"
-// GFX1250: "target-features"="+16-bit-insts,+ashr-pk-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-buffer-pk-add-bf16-inst,+atomic-ds-pk-add-16-insts,+atomic-fadd-rtn-insts,+atomic-flat-pk-add-16-insts,+atomic-global-pk-add-bf16-inst,+bf16-trans-insts,+bitop3-insts,+ci-insts,+dl-insts,+dot7-insts,+dot8-insts,+dpp,+fp8-conversion-insts,+fp8e5m3-insts,+gfx10-3-insts,+gfx10-insts,+gfx11-insts,+gfx12-insts,+gfx1250-insts,+gfx8-insts,+gfx9-insts,+permlane16-swap,+prng-inst,+setprio-inc-wg-inst,+tanh-insts,+transpose-load-f4f6-insts,+wavefrontsize32
+// GFX1250: "target-features"="+16-bit-insts,+ashr-pk-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-buffer-pk-add-bf16-inst,+atomic-ds-pk-add-16-insts,+atomic-fadd-rtn-insts,+atomic-flat-pk-add-16-insts,+atomic-global-pk-add-bf16-inst,+bf16-trans-insts,+bitop3-insts,+ci-insts,+dl-insts,+dot7-insts,+dot8-insts,+dpp,+fp8-conversion-insts,+fp8e5m3-insts,+gfx10-3-insts,+gfx10-insts,+gfx11-insts,+gfx12-insts,+gfx1250-insts,+gfx8-insts,+gfx9-insts,+permlane16-swap,+prng-inst,+setprio-inc-wg-inst,+tanh-insts,+transpose-load-f4f6-insts,+vmem-pref-insts,+wavefrontsize32
// GFX1103-W64: "target-features"="+16-bit-insts,+atomic-fadd-rtn-insts,+ci-insts,+dl-insts,+dot10-insts,+dot12-insts,+dot5-insts,+dot7-insts,+dot8-insts,+dot9-insts,+dpp,+gfx10-3-insts,+gfx10-insts,+gfx11-insts,+gfx8-insts,+gfx9-insts,+wavefrontsize64"
diff --git a/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx1250-load-monitor.cl b/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx1250-load-monitor.cl
new file mode 100644
index 0000000..f2552d4
--- /dev/null
+++ b/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx1250-load-monitor.cl
@@ -0,0 +1,66 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
+// REQUIRES: amdgpu-registered-target
+// RUN: %clang_cc1 -cl-std=CL2.0 -triple amdgcn-unknown-unknown -target-cpu gfx1250 -emit-llvm -o - %s | FileCheck %s --check-prefix=CHECK-GFX1250
+
+typedef int v2i __attribute__((ext_vector_type(2)));
+typedef int v4i __attribute__((ext_vector_type(4)));
+
+// CHECK-GFX1250-LABEL: @test_amdgcn_global_load_monitor_b32(
+// CHECK-GFX1250-NEXT: entry:
+// CHECK-GFX1250-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.amdgcn.global.load.monitor.b32.i32(ptr addrspace(1) [[INPTR:%.*]], i32 1)
+// CHECK-GFX1250-NEXT: ret i32 [[TMP0]]
+//
+int test_amdgcn_global_load_monitor_b32(global int* inptr)
+{
+ return __builtin_amdgcn_global_load_monitor_b32(inptr, 1);
+}
+
+// CHECK-GFX1250-LABEL: @test_amdgcn_global_load_monitor_b64(
+// CHECK-GFX1250-NEXT: entry:
+// CHECK-GFX1250-NEXT: [[TMP0:%.*]] = tail call <2 x i32> @llvm.amdgcn.global.load.monitor.b64.v2i32(ptr addrspace(1) [[INPTR:%.*]], i32 10)
+// CHECK-GFX1250-NEXT: ret <2 x i32> [[TMP0]]
+//
+v2i test_amdgcn_global_load_monitor_b64(global v2i* inptr)
+{
+ return __builtin_amdgcn_global_load_monitor_b64(inptr, 10);
+}
+
+// CHECK-GFX1250-LABEL: @test_amdgcn_global_load_monitor_b128(
+// CHECK-GFX1250-NEXT: entry:
+// CHECK-GFX1250-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.amdgcn.global.load.monitor.b128.v4i32(ptr addrspace(1) [[INPTR:%.*]], i32 22)
+// CHECK-GFX1250-NEXT: ret <4 x i32> [[TMP0]]
+//
+v4i test_amdgcn_global_load_monitor_b128(global v4i* inptr)
+{
+ return __builtin_amdgcn_global_load_monitor_b128(inptr, 22);
+}
+
+// CHECK-GFX1250-LABEL: @test_amdgcn_flat_load_monitor_b32(
+// CHECK-GFX1250-NEXT: entry:
+// CHECK-GFX1250-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.amdgcn.flat.load.monitor.b32.i32(ptr [[INPTR:%.*]], i32 27)
+// CHECK-GFX1250-NEXT: ret i32 [[TMP0]]
+//
+int test_amdgcn_flat_load_monitor_b32(int* inptr)
+{
+ return __builtin_amdgcn_flat_load_monitor_b32(inptr, 27);
+}
+
+// CHECK-GFX1250-LABEL: @test_amdgcn_flat_load_monitor_b64(
+// CHECK-GFX1250-NEXT: entry:
+// CHECK-GFX1250-NEXT: [[TMP0:%.*]] = tail call <2 x i32> @llvm.amdgcn.flat.load.monitor.b64.v2i32(ptr [[INPTR:%.*]], i32 1)
+// CHECK-GFX1250-NEXT: ret <2 x i32> [[TMP0]]
+//
+v2i test_amdgcn_flat_load_monitor_b64(v2i* inptr)
+{
+ return __builtin_amdgcn_flat_load_monitor_b64(inptr, 1);
+}
+
+// CHECK-GFX1250-LABEL: @test_amdgcn_flat_load_monitor_b128(
+// CHECK-GFX1250-NEXT: entry:
+// CHECK-GFX1250-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.amdgcn.flat.load.monitor.b128.v4i32(ptr [[INPTR:%.*]], i32 0)
+// CHECK-GFX1250-NEXT: ret <4 x i32> [[TMP0]]
+//
+v4i test_amdgcn_flat_load_monitor_b128(v4i* inptr)
+{
+ return __builtin_amdgcn_flat_load_monitor_b128(inptr, 0);
+}
diff --git a/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx1250.cl b/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx1250.cl
index a21862c..81f39f9 100644
--- a/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx1250.cl
+++ b/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx1250.cl
@@ -440,6 +440,25 @@ void test_permlane16_swap(global uint2* out, uint old, uint src) {
*out = __builtin_amdgcn_permlane16_swap(old, src, false, true);
}
+// CHECK-LABEL: @test_prefetch(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[FPTR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK-NEXT: [[GPTR_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5)
+// CHECK-NEXT: [[FPTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[FPTR_ADDR]] to ptr
+// CHECK-NEXT: [[GPTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[GPTR_ADDR]] to ptr
+// CHECK-NEXT: store ptr [[FPTR:%.*]], ptr [[FPTR_ADDR_ASCAST]], align 8
+// CHECK-NEXT: store ptr addrspace(1) [[GPTR:%.*]], ptr [[GPTR_ADDR_ASCAST]], align 8
+// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[FPTR_ADDR_ASCAST]], align 8
+// CHECK-NEXT: call void @llvm.amdgcn.flat.prefetch(ptr [[TMP0]], i32 0)
+// CHECK-NEXT: [[TMP1:%.*]] = load ptr addrspace(1), ptr [[GPTR_ADDR_ASCAST]], align 8
+// CHECK-NEXT: call void @llvm.amdgcn.global.prefetch(ptr addrspace(1) [[TMP1]], i32 8)
+// CHECK-NEXT: ret void
+//
+void test_prefetch(generic void *fptr, global void *gptr) {
+ __builtin_amdgcn_flat_prefetch(fptr, 0);
+ __builtin_amdgcn_global_prefetch(gptr, 8);
+}
+
// CHECK-LABEL: @test_cvt_f32_fp8_e5m3(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[OUT_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5)